<a href="https://colab.research.google.com/github/Syamabbas/Logistics-Customer-Insights-Tool/blob/main/Analysis_Sentiment_Review_Logistik.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas Sastrawi openpyxl

Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl.metadata (909 bytes)
Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


In [2]:
import numpy
import pandas as pd
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from collections import defaultdict

In [3]:
# ===============================
# 1. LOAD DATA
# ===============================

df = pd.read_excel("/content/drive/MyDrive/Google Collab/Analysis Sentiment/Blended data/Blend Jan 2026.xlsx")
TEXT_COL = "clean_text"   # sesuaikan jika nama kolom berbeda
df[TEXT_COL] = df[TEXT_COL].astype(str)
df.head()

Unnamed: 0,Region,Username,Waktu,Komentar,tanggal_review,clean_text,label,confidence,positive_word,negative_word
0,"Bali, Gianyar",sherly marco,3 bulan lalu,"barang saya kurang 1 koli, katanya dikirim bes...",2025-09-10,barang saya kurang 1 koli katanya dikirim beso...,negatif,2,,"tidak sampai, nunggu"
1,"Bali, Gianyar",Coco Gates,4 minggu lalu,"Barang sudah 4 hari ngak diantar antar,ngak ad...",2025-11-12,barang sudah 4 hari tidak diantar antar tidak ...,negatif,2,,"tidak becus, tidak diantar"
2,"Bali, Gianyar",Air Bander,10 bulan lalu,"Ekspedisi amburadul, sistem kacau, tanya posis...",2025-02-10,ekspedisi amburadul sistem kacau tanya posisi ...,negatif,4,,"ambil sendiri, amburadul, kacau, kapok"
3,"Bali, Gianyar",Ayu Juniati,11 bulan lalu,"padahal udah langganan sama sentral,,paket dar...",2025-01-10,padahal udah langganan sama sentral paket dari...,positif,1,langganan,
4,"Bali, Gianyar",duwi marjiono,2 bulan lalu,Bagus baik ramah fast response,2025-10-10,bagus baik ramah fast response,positif,3,"bagus, baik, bagus",


In [4]:
# ===============================
# 2. Aspek
# ===============================

url = f"https://docs.google.com/spreadsheets/d/12ypDLePiU0f-50wsfsQ8N21aWLjmeb8q5k9L8tn4YIU/gviz/tq?tqx=out:csv&sheet=Kamus_Aspek"

aspect_keywords = pd.read_csv(url)
aspect_keywords

Unnamed: 0,Aspect,Keyword
0,Waktu Pengiriman,cepat
1,Waktu Pengiriman,cepet
2,Waktu Pengiriman,lama
3,Waktu Pengiriman,lambat
4,Waktu Pengiriman,telat
...,...,...
316,Layanan Bisnis,hitungan
317,Komplain,cukup
318,Waktu Pengiriman,belum ada diantar
319,Waktu Pengiriman,tidak datang


In [5]:
# ===============================
# 3. PRIORITY ORDER
# ===============================

ASPECT_PRIORITY_ORDER = [
    "Waktu Pengiriman",
    "Penanganan Barang",
    "Layanan SDM",
    "Layanan Sistem",
    "Layanan Bisnis",
    "Komplain"
]

In [6]:
# ===============================
# 5. STEMMING
# ===============================

factory = StemmerFactory()
stemmer = factory.create_stemmer()

def stem_text(text):
    return stemmer.stem(text)


In [7]:
# ===============================
# 6. PREPROCESS KEYWORDS
# ===============================
def df_to_aspect_dict(df):
    aspect_dict = {}
    for _, row in df.iterrows():
        aspect = row["Aspect"]
        keyword = row["Keyword"]

        if pd.isna(aspect) or pd.isna(keyword):
            continue

        aspect_dict.setdefault(aspect, []).append(keyword.lower())
    return aspect_dict

aspect_keywords = df_to_aspect_dict(aspect_keywords)

In [8]:
# ===============================
# 7. ASPECT CLASSIFIER + PRIORITY
# ===============================

def classify_aspect_with_priority(text, aspect_dict, priority_order):
    if pd.isna(text):
        return "Tidak Terklasifikasi", "Tidak Terklasifikasi"

    text_stem = stem_text(text)

    aspect_scores = {}

    for aspect, keywords in aspect_dict.items():
        score = 0
        for kw in keywords:
            if re.search(r"\b" + re.escape(kw) + r"\b", text_stem):
                score += 1
        if score > 0:
            aspect_scores[aspect] = score

    if not aspect_scores:
        return "Tidak Terklasifikasi", "Tidak Terklasifikasi"

    # multi-aspect
    all_aspects = "; ".join(sorted(aspect_scores.keys()))

    # priority logic
    max_score = max(aspect_scores.values())
    top_aspects = [a for a, s in aspect_scores.items() if s == max_score]

    priority_aspect = "Tidak Terklasifikasi"
    for aspect in priority_order:
        if aspect in top_aspects:
            priority_aspect = aspect
            break

    return all_aspects, priority_aspect


In [9]:
# ===============================
# 8. APPLY KE DATAFRAME
# ===============================
df[["Aspek", "Aspek_Priority"]] = df[TEXT_COL].apply(
    lambda x: pd.Series(
        classify_aspect_with_priority(
            x,
            aspect_keywords,
            ASPECT_PRIORITY_ORDER
        )
    )
)

In [10]:
# ===============================
# 9. SAVE OUTPUT
# ===============================
df.to_excel("/content/drive/MyDrive/Google Collab/Analysis Sentiment/Final/Hasil Analysis Jan 2026.xlsx", index=False)

print("✅ Aspect & Aspect Priority berhasil ditambahkan")

✅ Aspect & Aspect Priority berhasil ditambahkan
