In [56]:
import os
import pandas as pd
import re
import string
from nltk.corpus import stopwords
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from sklearn.feature_extraction.text import CountVectorizer
from textblob import TextBlob



In [57]:
# Fungsi untuk membaca semua CSV dalam direktori
def load_reviews_from_directory(directory):
    data_frames = []
    for root, dirs, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.csv'):
                csv_path = os.path.join(root, file_name)
                df = pd.read_csv(csv_path)
                if not df.empty:
                    data_frames.append(df)
    if data_frames:
        return pd.concat(data_frames, ignore_index=True)
    else:
        raise ValueError("Tidak ada file CSV valid di direktori:", directory)

# Stopwords dan stemmer
stop_words = set(stopwords.words('indonesian'))
factory = StemmerFactory()
stemmer = factory.create_stemmer()



In [58]:
# Fungsi untuk preprocessing teks
def preprocess_text(text):
    text = re.sub(r"\d+", "", text)
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    words = text.split()
    words = list(dict.fromkeys(words))
    words = [word for word in words if word not in stop_words]
    words = [stemmer.stem(word) for word in words]
    return " ".join(words)



In [59]:
# Direktori dataset
directories = [
    'C:/Users/acer/Downloads/Tugas 3/Dataset/kamera instan',
    'C:/Users/acer/Downloads/Tugas 3/Dataset/kamera mirrorless',
    'C:/Users/acer/Downloads/Tugas 3/Dataset/kamera pocket'
]



In [60]:
# Proses setiap direktori untuk analisis
for directory in directories:
    # Memuat ulasan dari direktori
    df = load_reviews_from_directory(directory)
    
    # Preprocessing teks
    df['processed_text'] = df['review'].astype(str).apply(preprocess_text)
    
    # Analisis sentimen
    sentiment_scores = df['processed_text'].apply(lambda text: TextBlob(text).sentiment.polarity)
    df['sentiment'] = sentiment_scores.apply(lambda score: 'positif' if score > 0 else 'negatif')
    
    # Pisahkan ulasan positif dan negatif
    positive_reviews = df[df['sentiment'] == 'positif']['processed_text']
    negative_reviews = df[df['sentiment'] == 'negatif']['processed_text']
    
    # Ekstraksi kata kunci dari ulasan positif dan negatif
    vectorizer = CountVectorizer(max_features=10)
    
    # Kata kunci dari ulasan positif
    positive_keywords = vectorizer.fit_transform(positive_reviews).toarray().sum(axis=0)
    positive_keyword_freq = dict(zip(vectorizer.get_feature_names_out(), positive_keywords))
    positive_keyword_freq = sorted(positive_keyword_freq.items(), key=lambda x: x[1], reverse=True)
    
    # Kata kunci dari ulasan negatif
    negative_keywords = vectorizer.fit_transform(negative_reviews).toarray().sum(axis=0)
    negative_keyword_freq = dict(zip(vectorizer.get_feature_names_out(), negative_keywords))
    negative_keyword_freq = sorted(negative_keyword_freq.items(), key=lambda x: x[1], reverse=True)
    
    # Tampilkan rangkuman untuk tiap direktori
    print(f"\nKamera: {os.path.basename(directory)}")
    print("Kelebihan (Kata Kunci Positif):")
    for keyword, freq in positive_keyword_freq:
        print(f"- {keyword}: {freq} kali")
    
    print("\nKekurangan (Kata Kunci Negatif):")
    for keyword, freq in negative_keyword_freq:
        print(f"- {keyword}: {freq} kali")

    # Simpan hasil rangkuman ke file
    output_path = f'C:/Users/acer/Downloads/Tugas 3/Dataset/rangkuman_{os.path.basename(directory)}.txt'
    with open(output_path, 'w') as file:
        file.write(f"Kamera: {os.path.basename(directory)}\n")
        file.write("Kelebihan (Kata Kunci Positif):\n")
        for keyword, freq in positive_keyword_freq:
            file.write(f"- {keyword}: {freq} kali\n")
        
        file.write("\nKekurangan (Kata Kunci Negatif):\n")
        for keyword, freq in negative_keyword_freq:
            file.write(f"- {keyword}: {freq} kali\n")
    
    print("\nHasil rangkuman disimpan di:", output_path)



Kamera: kamera instan
Kelebihan (Kata Kunci Positif):
- good: 5 kali
- barang: 4 kali
- ok: 4 kali
- bagus: 3 kali
- cepat: 3 kali
- cepet: 3 kali
- kasih: 3 kali
- original: 3 kali
- top: 3 kali
- sesuai: 2 kali

Kekurangan (Kata Kunci Negatif):
- bagus: 33 kali
- barang: 24 kali
- banget: 16 kali
- kamera: 13 kali
- suka: 11 kali
- cepat: 10 kali
- nya: 9 kali
- sesuai: 7 kali
- terima: 7 kali
- kurir: 6 kali

Hasil rangkuman disimpan di: C:/Users/acer/Downloads/Tugas 3/Dataset/rangkuman_kamera instan.txt

Kamera: kamera mirrorless
Kelebihan (Kata Kunci Positif):
- barang: 12 kali
- kirim: 9 kali
- original: 7 kali
- good: 5 kali
- seller: 5 kali
- thanks: 5 kali
- bonus: 4 kali
- lazada: 4 kali
- sesuai: 4 kali
- ok: 3 kali

Kekurangan (Kata Kunci Negatif):
- barang: 22 kali
- sesuai: 16 kali
- cepat: 14 kali
- terima: 14 kali
- bagus: 13 kali
- kirim: 12 kali
- deskripsi: 8 kali
- kamera: 8 kali
- mantap: 8 kali
- kasih: 7 kali

Hasil rangkuman disimpan di: C:/Users/acer/Downloads