In [1]:
import pandas as pd
from transformers import pipeline
from tqdm.auto import tqdm

# Muat model pre-trained IndoBERT
indoBERT = "mdhugol/indonesia-bert-sentiment-classification"
sentiment_pipeline = pipeline("sentiment-analysis", model=indoBERT)


Device set to use cpu


In [2]:
# Path file clean
df = pd.read_csv("../data/processed/gojek_cleaned.csv")

print("Jumlah data:", len(df))
df.head()


Jumlah data: 282


Unnamed: 0,clean_text
0,pembayaran harian yang praktis ada pexx card y...
1,aplikasi online yg ada fitur mobil pick up nya...
2,aku lagi survey data tentang seberapa sering k...
3,bukan ide baru bahkan lahirnya gojek juga awal...
4,pesan taksi listrik green sm kini bisa via apl...


In [3]:
# Prediksi sentimen (gunakan tqdm untuk progress bar)
clean_texts = df["clean_text"].astype(str).tolist()

results = []
for text in tqdm(clean_texts):
    try:
        prediction = sentiment_pipeline(text[:512])[0]  # potong agar tidak > max token
        results.append(prediction["label"])
    except:
        results.append("neutral")  # fallback jika error


  0%|          | 0/282 [00:00<?, ?it/s]

In [6]:
# Mapping label
label_map = {
    "LABEL_0": "negative",
    "LABEL_1": "neutral",
    "LABEL_2": "positive"
}

# Ubah label ke bentuk string
df["sentiment"] = df["sentiment"].map(label_map)

# Cek hasil
df[["clean_text", "sentiment"]].head()


Unnamed: 0,clean_text,sentiment
0,pembayaran harian yang praktis ada pexx card y...,negative
1,aplikasi online yg ada fitur mobil pick up nya...,neutral
2,aku lagi survey data tentang seberapa sering k...,negative
3,bukan ide baru bahkan lahirnya gojek juga awal...,positive
4,pesan taksi listrik green sm kini bisa via apl...,neutral


In [7]:
import os

# Pastikan folder output ada
output_dir = "../data/processed"
os.makedirs(output_dir, exist_ok=True)

# Simpan ke CSV
df.to_csv(os.path.join(output_dir, "gojek_cleaned_labeled.csv"), index=False)

print("✅ File tersimpan: data/processed/gojek_cleaned_labeled.csv")


✅ File tersimpan: data/processed/gojek_cleaned_labeled.csv
