In [None]:
!pip install Sastrawi
!pip install transformers
!pip install torch
!pip install nltk



In [None]:
from transformers import pipeline
import torch
import pandas as pd
import re
from tqdm import tqdm
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import nltk
from nltk.corpus import stopwords

# Pastikan semua dependensi sudah diunduh
nltk.download('stopwords')

# Inisialisasi perangkat keras untuk pipeline
import torch
device = 0 if torch.cuda.is_available() else -1  # Gunakan GPU jika tersedia

# Inisialisasi pipeline untuk analisis sentimen
sentiment_pipeline = pipeline(
    "text-classification",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    device=device
)

# Inisialisasi stemmer untuk bahasa Indonesia
factory = StemmerFactory()
stemmer = factory.create_stemmer()

# Stopwords untuk bahasa Inggris dan Indonesia
stop_words_en = set(stopwords.words('english'))
stop_words_id = set(stopwords.words('indonesian'))

# Baca data ulasan
file_path = "Data ulasan.csv"
data = pd.read_csv(file_path)

# Kamus aspek dan kata kunci terkait
aspect_keywords = {
    "Fitur": [
        "fitur", "fungsi", "tajwid", "aplikasi", "audio", "belajar", "al-qur'an",
        "menu", "mode", "pengaturan", "highlight", "tema", "pengingat", "offline",
        "rekam", "ulang", "notifikasi", "bookmark", "suara", "update", "akses",
        "pencarian", "hasil", "otomatis"
    ],
    "Pelayanan": [
        "respons", "layanan", "customer", "bantuan", "support", "balasan", "admin",
        "kecepatan", "respon cepat", "tim", "komunikasi", "dukungan", "tanggapan",
        "hubungi", "pelanggan", "pertanyaan", "jawaban", "helpdesk", "chat", "care"
    ],
    "Pengalaman": [
        "bug", "error", "mudah", "sulit", "cepat", "lambat", "pengalaman", "loading",
        "crash", "interaksi", "feedback", "intuitif", "kesan", "smooth", "lag",
        "pemakaian", "kenyamanan", "hambatan", "responsif", "fungsi"
    ],
    "Religiusitas": [
        "alhamdulillah", "iman", "ibadah", "syukur", "doa", "shalat", "qur'an",
        "tafsir", "ayat", "spiritual", "motivasi", "dzikir", "hidayah", "berkah",
        "taubat", "ikhlas", "istiqomah", "amalan", "fadhilah", "wirid"
    ],
    "Umum": [
        "bagus", "praktis", "desain", "grafik", "tampilan", "baik", "terbaik",
        "puas", "serbaguna", "modern", "user-friendly", "sederhana", "cantik",
        "estetis", "kompatibel", "ringan", "inovatif", "fleksibel", "efisien", "stabil"
    ],
}

# Fungsi untuk menghapus stopwords
def remove_stopwords(text, stop_words):
    return ' '.join([word for word in text.split() if word.lower() not in stop_words])

# Fungsi untuk stemming
def stem_text(text):
    return stemmer.stem(text)

# Fungsi untuk mengecek relevansi aspek terhadap ulasan
def is_aspect_relevant(review, keywords):
    review = remove_stopwords(review, stop_words_en | stop_words_id)  # Hapus stopwords
    review = stem_text(review)  # Stemming
    review_tokens = re.findall(r'\w+', review.lower())  # Tokenisasi sederhana
    for keyword in keywords:
        if keyword in review_tokens:
            return True
    return False

# Fungsi untuk analisis aspek berbasis sentimen
def aspect_based_analysis_with_transformers(review, aspect_keywords):
    for aspect, keywords in aspect_keywords.items():
        if is_aspect_relevant(review, keywords):
            input_text = f"{aspect}: {review}"
            sentiment = sentiment_pipeline(input_text)[0]["label"]
            if sentiment != "Neutral":  # Return aspek jika sentimen bukan netral
                return aspect
    return "Umum"

# Proses ulasan dalam data dengan progress bar
results = []
for _, row in tqdm(data.iterrows(), total=data.shape[0], desc="Processing Reviews"):
    review = row["ulasan"]  # Pastikan nama kolom sesuai
    identified_aspect = aspect_based_analysis_with_transformers(review, aspect_keywords)
    results.append({"Ulasan": review, "Aspek Teridentifikasi": identified_aspect})

# Simpan hasil ke file baru
output_path = "Hasil_ABSA.csv"
output_df = pd.DataFrame(results)
output_df.to_csv(output_path, index=False)

print(f"Hasil telah disimpan ke {output_path}")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Processing Reviews:   0%|          | 10/9938 [00:04<58:16,  2.84it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing Reviews: 100%|██████████| 9938/9938 [12:47<00:00, 12.94it/s]

Hasil telah disimpan ke Hasil_ABSA.csv





In [None]:
# Tampilkan hasil
for result in results:
    print(f"Ulasan: {result['Ulasan']}")
    print(f"Aspek Teridentifikasi: {result['Aspek Teridentifikasi']}")
    print("-" * 20)

NameError: name 'results' is not defined