In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Analisis_Sentimen_DBS/Analisis-Sentimen/cleaned_dataset.csv")
df

Unnamed: 0,stemmed_review,label
0,bagus,positif
1,shopi banget,positif
2,nang cepat santun trimakasih,positif
3,puas belanja shopee,positif
4,akun paylater nonaktif ga alas pakai tahun skr...,negatif
...,...,...
9995,oke,positif
9996,lumayan,positif
9997,aplikasi bagus,positif
9998,amana,positif


In [3]:
df.dropna(subset=['stemmed_review', 'label'], inplace=True)

In [6]:
# TF-IDF vectorization
tfidf = TfidfVectorizer(ngram_range=(1,2), max_features=10000)
X = tfidf.fit_transform(df['stemmed_review'])
y = df['label']

In [7]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [8]:
# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=150, random_state=42)
rf_model.fit(X_train, y_train)

# Predict
y_pred = rf_model.predict(X_test)

In [9]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9922440537745605
Classification Report:
               precision    recall  f1-score   support

     negatif       0.99      0.99      0.99       360
      netral       0.91      0.98      0.94        88
     positif       1.00      0.99      0.99      1486

    accuracy                           0.99      1934
   macro avg       0.97      0.99      0.98      1934
weighted avg       0.99      0.99      0.99      1934



In [15]:
def predict_sentiment(text):
    # Ubah teks menjadi lowercase
    text = text.lower()

    # Bisa tambahkan preprocessing seperti stemming/tokenizing jika perlu
    # Contoh simple tanpa preprocessing lanjut
    text_vector = tfidf.transform([text])

    # Prediksi label
    pred_label = rf_model.predict(text_vector)[0]

    # Tampilkan hasil
    print(f"Teks: \"{text}\"")
    print(f"Prediksi Sentimen: {pred_label}")
    return pred_label

sample_text = "Aplikasi ini sangat membantu dan mudah digunakan"
predict_sentiment(sample_text)

Teks: "aplikasi ini sangat membantu dan mudah digunakan"
Prediksi Sentimen: positif


'positif'