<a href="https://colab.research.google.com/github/Andriansyah2501/Proyek_Analisis_Sentimen/blob/main/Analisis_Sentiment_Skema_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
from google.colab import drive
drive.mount('/content/drive')

# Memuat dataset CSV dari hasil clustering
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset sentimen/dataset_review_makanan_bergizi_3200.csv')

# Cek nama kolom
print("Kolom yang tersedia:", df.columns)

# Buat kolom 'sentiment' berdasarkan rating
# Misalnya Rating 1-2 = Negatif, 3 = Netral, 4-5 = Positif
def assign_sentiment(rating):
    if rating <= 2:
        return 'negatif'
    elif rating == 3:
        return 'netral'
    else:
        return 'positif'

df['sentiment'] = df['rating'].apply(assign_sentiment)

# Filter data untuk sentimen yang valid
sentiments = ['positif', 'negatif', 'netral']
df = df[df['sentiment'].isin(sentiments)]

# Bersihkan data
df = df.dropna(subset=['review'])

# Preprocessing function
def preprocess(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub(r"\d+", "", text)
    text = text.strip()
    return text

df['cleaned_text'] = df['review'].apply(preprocess)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['sentiment']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\n🎯 Akurasi Testing:", round(accuracy * 100, 2), "%")
print("\n📊 Classification Report:\n", report)

# Inference
contoh_kalimat = "Aplikasi ini sangat jelek dan sering error"
contoh_clean = preprocess(contoh_kalimat)
contoh_vector = vectorizer.transform([contoh_clean])
prediksi = model.predict(contoh_vector)

print("\n🧠 Inference Contoh:")
print("Kalimat:", contoh_kalimat)
print("Hasil Prediksi Sentimen:", prediksi[0])


Mounted at /content/drive
Kolom yang tersedia: Index(['review', 'rating'], dtype='object')

🎯 Akurasi Testing: 100.0 %

📊 Classification Report:
               precision    recall  f1-score   support

     negatif       1.00      1.00      1.00       117
      netral       1.00      1.00      1.00       116
     positif       1.00      1.00      1.00       407

    accuracy                           1.00       640
   macro avg       1.00      1.00      1.00       640
weighted avg       1.00      1.00      1.00       640


🧠 Inference Contoh:
Kalimat: Aplikasi ini sangat jelek dan sering error
Hasil Prediksi Sentimen: positif
