In [None]:
# 1. Install dependensi
%pip install langchain_community replicate pandas scikit-learn matplotlib

In [None]:
# 2. Import library
import os
from getpass import getpass
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from langchain_community.llms import Replicate
import seaborn as sns

In [None]:
# 3. Setup API Token Replicate
api_token = os.environ.get("REPLICATE_API_TOKEN")
if not api_token:
    api_token = getpass("Masukkan API Token Replicate (disembunyikan): ").strip()
    os.environ["REPLICATE_API_TOKEN"] = api_token

In [None]:
# 4. Load Dataset dari Lokal
csv_path = "IMDB Dataset.csv"

df = pd.read_csv(csv_path)

print("Jumlah data:", len(df))
print(df.head())

In [None]:
# 4,1. Mount Google Drive (untuk akses CSV) Jika di Google Collab
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# 4,2. Load Dataset dari Google Drive (Jika Menggunakan Google Collab)
# csv_path = "/content/drive/MyDrive/IMDB Dataset.csv"
# df = pd.read_csv(csv_path)

# print("Jumlah data:", len(df))
# print(df.head())

In [None]:
# 5. Setup Granite Model
model = "ibm-granite/granite-3.3-8b-instruct"
client = Replicate(model=model, replicate_api_token=api_token)

def classify_review(text: str) -> str:
    """Panggil Granite API untuk klasifikasi sentimen review"""
    prompt = f"""
    Klasifikasikan ulasan film berikut sebagai Positive atau Negative.
    Hanya jawab dengan salah satu kata: Positive atau Negative.

    Review:
    {text}
    """
    try:
        response = client.invoke(prompt)
        return response.strip().capitalize()
    except Exception as e:
        return f"[ERROR] {e}"

In [None]:
# 6. Ambil Sample Data untuk Uji
sample_df = df.sample(100, random_state=42).reset_index(drop=True)

y_true = []
y_pred = []

for i, row in sample_df.iterrows():
    review = row["review"]
    true_label = row["sentiment"].capitalize()
    pred_label = classify_review(review)

    y_true.append(true_label)
    y_pred.append(pred_label)

    print(f"[{i+1}] True: {true_label} | Pred: {pred_label}")

In [None]:
# 7. Evaluasi: Confusion Matrix & Classification Report
cm = confusion_matrix(y_true, y_pred, labels=["Positive", "Negative"])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Positive", "Negative"])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix Granite")
plt.show()

print("\nClassification Report:\n")
print(classification_report(y_true, y_pred))

In [None]:
# 8. Distribusi Prediksi
pred_counts = pd.Series(y_pred).value_counts()
sns.barplot(x=pred_counts.index, y=pred_counts.values)
plt.title("Distribusi Prediksi Sentimen (Granite)")
plt.xlabel("Sentiment")
plt.ylabel("Jumlah")
plt.show()