In [1]:
import pickle
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load model
model = load_model(r'C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\best_model_lstm.h5')

# Load tokenizer
with open(r'C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\tokenizer_lstm.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

# Load label encoder (opsional, kalau kamu ingin mengembalikan ke 'positive', dll)
with open(r'C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)



In [3]:
def predict_lstm_sentiment(df, model, tokenizer, max_len=200):
    # Ubah ke urutan angka
    sequences = tokenizer.texts_to_sequences(df['text_final_stemmed'])
    padded = pad_sequences(sequences, maxlen=max_len, padding='post')

    # Prediksi
    probs = model.predict(padded)
    pred_indices = np.argmax(probs, axis=1)
    
    # Jika pakai label_encoder
    pred_labels = label_encoder.inverse_transform(pred_indices)

    # Tambahkan ke DataFrame
    df['sentiment_pred'] = pred_labels
    return df


In [4]:
df_RevKanjistudy_featured = pd.read_csv(r"C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\preprocessed_with_features\review_kanjistudy_processedtext_with_features.csv")

In [5]:
df_RevKanjistudy_featured_labeled = predict_lstm_sentiment(df_RevKanjistudy_featured, model, tokenizer)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 408ms/step


In [6]:
df_RevKanjistudy_featured_labeled.to_csv(r"C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\featured_labeled_lstm_data\review_kanjistudy_processedtext_with_features_labeled.csv", index=False)

In [7]:
from collections import defaultdict
import ast

In [8]:
def safe_parse_fitur(x):
    if isinstance(x, list):
        return x
    elif isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except:
            return [x]  # fallback: buat jadi list satu elemen
    else:
        return []

# Terapkan ke kolom
df_RevKanjistudy_featured_labeled['fitur'] = df_RevKanjistudy_featured_labeled['fitur_terdeteksi'].apply(safe_parse_fitur)

In [9]:
def aggregate_sentiment_by_feature_split(df):
    result = defaultdict(lambda: {'positive': 0, 'negative': 0})

    for _, row in df.iterrows():
        sentiment = row['sentiment_pred']  # 'positive' atau 'negative'
        fitur_raw = row['fitur_terdeteksi']

        # Jika fitur masih string dengan koma, pisahkan
        if isinstance(fitur_raw, str):
            fitur_list = [f.strip() for f in fitur_raw.split(',')]
        else:
            fitur_list = fitur_raw  # kalau sudah list

        for fitur in fitur_list:
            if sentiment == 'positive':
                result[fitur]['positive'] += 1
            elif sentiment == 'negative':
                result[fitur]['negative'] += 1

    return result


In [10]:
import json

result_dict = aggregate_sentiment_by_feature_split(df_RevKanjistudy_featured_labeled)

# Simpan ke file JSON
with open(r'C:\Latihan_Python\casptone-project-dicoding_nihongonavigator\final_labeled_agregat_data\hasil_sentimen_kanjistudy_agregat.json', 'w', encoding='utf-8') as f:
    json.dump(result_dict, f, ensure_ascii=False, indent=2)
