# 1. Import library

In [1]:
# 1. Import libraries (sama seperti sebelumnya)
from empath import Empath
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import contractions
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout, Layer
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from sklearn.metrics import classification_report, multilabel_confusion_matrix, hamming_loss
from tensorflow.keras.metrics import AUC
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from deep_translator import GoogleTranslator
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Inisialisasi
lexicon = Empath()
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

custom_stopwords = {'like', 'get', 'go', 'know', 'would', 'could', 'also'}
stop_words.update(custom_stopwords)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


# 2. Data Loading

In [3]:
df = pd.read_csv("dataset.csv")
df.head()

Unnamed: 0,statement,label
0,"Final doctor appointment tomorrow, tired of co...",Anxiety
1,Anyone have bone or muscle pain that was stres...,Anxiety
2,Listening to your body? I'm curious how those ...,Anxiety
3,Weekly /r/HealthAnxiety Challenge - Exercise A...,Anxiety
4,This is killing me So i had a bacterial stomac...,Anxiety


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4810 entries, 0 to 4809
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  4810 non-null   object
 1   label      4810 non-null   object
dtypes: object(2)
memory usage: 75.3+ KB


# 3. Preprocessing

## 3.1. Cleaning n lemmetizing

In [5]:
def clean_text(text):
    text = text.lower()  # Mengubah teks menjadi huruf kecil 
    text = contractions.fix(text)  # Memperbaiki kontraksi 
    text = re.sub(r'http\S+|www\S+', '', text)  # Menghapus URL 
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Menghapus karakter non-ASCII 
    text = re.sub(r'\d+', '', text)  # Menghapus angka 
    text = re.sub(rf"[{re.escape(string.punctuation)}]", '', text)  # Menghapus tanda baca 
    text = re.sub(r'\s+', ' ', text).strip()  # Ganti multiple spasi dengan 1 spasi)
    return text  # Mengembalikan teks yang sudah dibersihkan

def preprocess_text(text):
    text = clean_text(text) 
    words = text.split()  # Memisahkan teks menjadi list kata 
    processed_words = []  # List untuk menyimpan kata yang sudah diproses
    for word in words:
        if word not in stop_words and len(word) > 2:  # Filter: hapus stopword dan kata dengan panjang ≤ 2
            lemma = lemmatizer.lemmatize(word, pos='v')  # Lemmatisasi sebagai verb (e.g., "running" → "run")
            lemma = lemmatizer.lemmatize(lemma, pos='n')  # Lemmatisasi sebagai noun (e.g., "wolves" → "wolf")
            lemma = lemmatizer.lemmatize(lemma, pos='a')  # Lemmatisasi sebagai adjective (e.g., "better" → "good")
            lemma = lemmatizer.lemmatize(lemma, pos='r')  # Lemmatisasi sebagai adverb (e.g., "quickly" → "quick")
            processed_words.append(lemma)  # Tambahkan kata yang sudah dilematisasi ke list
    return ' '.join(processed_words)  # Gabungkan list kata menjadi teks dengan spasi

df['statement'] = df['statement'].astype(str)
df['cleaned_statement'] = df['statement'].apply(preprocess_text)


## 3.2. Emotion Extraction with Emapth

In [54]:
# Daftar emosi yang digunakan
emotions = ['anxiety', 'fear', 'nervousness', 'sadness', 'suffering', 'shame']

# Mapping keyword per emosi
keyword_emotion_map = {
    'anxiety': ['anxious', 'nervous', 'overwhelmed', 'restless', 'panic', 'worried'],
    'fear': ['fear', 'scared', 'terrified', 'afraid', 'panic'],
    'nervousness': ['nervous', 'dizzy', 'shaky', 'jittery', 'restless'],
    'sadness': ['sad', 'cry', 'heartbroken', 'hopeless', 'misery', 'despair', 'alone'],
    'suffering': ['suffering', 'pain', 'hurt', 'agony', 'tired'],
    'shame': ['shame', 'guilt', 'embarrassed', 'worthless', 'regret']
}

In [55]:
# Fungsi ambil skor Empath
def label_from_empath(text):
    scores = lexicon.analyze(text, categories=emotions, normalize=True)
    return scores

# Fungsi boosting skor Empath berdasarkan keyword
def boost_empath_scores_with_keywords(text, empath_scores, keyword_emotion_map):
    text_lower = text.lower()
    boosted_scores = empath_scores.copy()

    for emotion, keywords in keyword_emotion_map.items():
        for kw in keywords:
            if re.search(rf'\b{kw}\b', text_lower):
                boosted_scores[emotion] = boosted_scores.get(emotion, 0) + 0.15  # nilai boosting bisa diatur

    return boosted_scores



In [56]:
# 1. Skor awal dari Empath
df['empath_scores'] = df['cleaned_statement'].apply(label_from_empath)

# 2. Boost dengan keyword
df['boosted_scores'] = df.apply(
    lambda row: boost_empath_scores_with_keywords(
        row['cleaned_statement'], row['empath_scores'], keyword_emotion_map
    ), axis=1
)



In [57]:
df

Unnamed: 0,statement,label,cleaned_statement,anxiety,fear,nervousness,sadness,suffering,shame,empath_scores,boosted_scores
0,"Final doctor appointment tomorrow, tired of co...",Anxiety,final doctor appointment tomorrow tire constan...,2,2,2,0,0,0,"{'anxiety': 0.0, 'fear': 0.02097902097902098, ...","{'anxiety': 0.15, 'fear': 0.17097902097902098,..."
1,Anyone have bone or muscle pain that was stres...,Anxiety,anyone bone muscle pain stressanxiety induce j...,0,2,0,0,3,2,"{'anxiety': 0.0, 'fear': 0.14285714285714285, ...","{'anxiety': 0.0, 'fear': 0.14285714285714285, ..."
2,Listening to your body? I'm curious how those ...,Anxiety,listen body curious health anxiety listen body...,0,1,1,1,0,0,"{'anxiety': 0.0, 'fear': 0.047619047619047616,...","{'anxiety': 0.0, 'fear': 0.047619047619047616,..."
3,Weekly /r/HealthAnxiety Challenge - Exercise A...,Anxiety,weekly rhealthanxiety challenge exercise littl...,0,2,2,2,0,0,"{'anxiety': 0.0, 'fear': 0.07317073170731707, ...","{'anxiety': 0.0, 'fear': 0.22317073170731705, ..."
4,This is killing me So i had a bacterial stomac...,Anxiety,kill bacterial stomach infectionpain right sid...,0,1,0,2,2,0,"{'anxiety': 0.0, 'fear': 0.029850746268656716,...","{'anxiety': 0.0, 'fear': 0.029850746268656716,..."
...,...,...,...,...,...,...,...,...,...,...,...
4805,Nobody takes me seriously I’ve (24M) dealt wit...,Anxiety,nobody take seriously deal depressionanxiety y...,0,0,1,2,0,1,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4806,"selfishness ""I don't feel very good, it's lik...",Anxiety,selfishness feel good belong world think ever ...,0,0,0,1,2,2,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4807,Is there any way to sleep better? I can't slee...,Anxiety,way sleep good cannot sleep night med help,0,0,0,0,0,0,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4808,"Public speaking tips? Hi, all. I have to give ...",Anxiety,public speak tip give presentation work next w...,2,2,2,0,0,0,"{'anxiety': 0.0, 'fear': 0.08823529411764706, ...","{'anxiety': 0.15, 'fear': 0.23823529411764705,..."


# 4. Modeling

## 4.1. Preprocessing data

In [58]:
# 4. Modeling
# 4.1. Preprocessing data untuk Model
X = df['cleaned_statement'].values

# --- PERUBAHAN PENTING DI SINI ---
# Target y adalah skor kontinu dari 'boosted_scores'
y_df_continuous = pd.DataFrame(df['boosted_scores'].tolist())
y = y_df_continuous[emotions].values # Ini adalah target kontinu Anda!

# Parameter Tokenizer dan Padding
vocab_size = 10000
oov_token = '<OOV>'
maxlen = 100

# Tokenisasi
print("Melakukan tokenisasi...")
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

# Padding
print("Melakukan padding...")
X_pad = pad_sequences(X_seq, maxlen=maxlen, padding='post', truncating='post')

# Pembagian Data (Train, Validation, Test)
X_train_full, X_test, y_train_full, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=42)

print(f"Ukuran training set: {X_train.shape[0]} sampel")
print(f"Ukuran validation set: {X_val.shape[0]} sampel")
print(f"Ukuran test set: {X_test.shape[0]} sampel")

num_emotions = y.shape[1]

Melakukan tokenisasi...
Melakukan padding...
Ukuran training set: 2886 sampel
Ukuran validation set: 962 sampel
Ukuran test set: 962 sampel


In [59]:


# --- Definisi Model (Tidak Berubah dari sebelumnya, sudah cocok untuk regresi) ---
embedding_dim = 128

input_layer = Input(shape=(maxlen,))
embedding_layer = Embedding(input_dim=vocab_size,
                            output_dim=embedding_dim,
                            input_length=maxlen)(input_layer)

bilstm_1 = Bidirectional(LSTM(128, return_sequences=True))(embedding_layer)
dropout_1 = Dropout(0.5)(bilstm_1)

bilstm_2 = Bidirectional(LSTM(64))(dropout_1)
dropout_2 = Dropout(0.5)(bilstm_2)

dense_hidden = Dense(64, activation='relu')(dropout_2)

# Output Layer: activation='linear' dan loss=MeanSquaredError() adalah BENAR untuk regresi
output_layer = Dense(num_emotions, activation='linear')(dense_hidden)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile Model
model.compile(
    loss=MeanSquaredError(),
    optimizer=Adam(learning_rate=1e-3),
    metrics=[MeanSquaredError(), 'mae']
)

print("\n--- Model Summary ---")
model.summary()





--- Model Summary ---


In [60]:

# --- Callbacks untuk Pelatihan Optimal ---
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    'basic_emotion_regression_model.h5',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)



In [61]:
# --- Pelatihan Model ---
print("\n--- Mulai Pelatihan Model ---")
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, model_checkpoint]
)



--- Mulai Pelatihan Model ---
Epoch 1/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step - loss: 0.0086 - mae: 0.0660 - mean_squared_error: 0.0086
Epoch 1: val_loss improved from inf to 0.00558, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 319ms/step - loss: 0.0085 - mae: 0.0659 - mean_squared_error: 0.0085 - val_loss: 0.0056 - val_mae: 0.0539 - val_mean_squared_error: 0.0065
Epoch 2/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 269ms/step - loss: 0.0051 - mae: 0.0513 - mean_squared_error: 0.0051
Epoch 2: val_loss improved from 0.00558 to 0.00445, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 309ms/step - loss: 0.0051 - mae: 0.0513 - mean_squared_error: 0.0051 - val_loss: 0.0045 - val_mae: 0.0477 - val_mean_squared_error: 0.0053
Epoch 3/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step - loss: 0.0034 - mae: 0.0415 - mean_squared_error: 0.0034
Epoch 3: val_loss improved from 0.00445 to 0.00425, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 296ms/step - loss: 0.0034 - mae: 0.0415 - mean_squared_error: 0.0034 - val_loss: 0.0042 - val_mae: 0.0469 - val_mean_squared_error: 0.0049
Epoch 4/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step - loss: 0.0029 - mae: 0.0386 - mean_squared_error: 0.0029
Epoch 4: val_loss improved from 0.00425 to 0.00385, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - loss: 0.0029 - mae: 0.0386 - mean_squared_error: 0.0029 - val_loss: 0.0038 - val_mae: 0.0418 - val_mean_squared_error: 0.0044
Epoch 5/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274ms/step - loss: 0.0022 - mae: 0.0334 - mean_squared_error: 0.0022
Epoch 5: val_loss improved from 0.00385 to 0.00345, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 315ms/step - loss: 0.0022 - mae: 0.0334 - mean_squared_error: 0.0022 - val_loss: 0.0035 - val_mae: 0.0407 - val_mean_squared_error: 0.0041
Epoch 6/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step - loss: 0.0018 - mae: 0.0305 - mean_squared_error: 0.0018
Epoch 6: val_loss improved from 0.00345 to 0.00341, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 298ms/step - loss: 0.0018 - mae: 0.0305 - mean_squared_error: 0.0018 - val_loss: 0.0034 - val_mae: 0.0376 - val_mean_squared_error: 0.0041
Epoch 7/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step - loss: 0.0017 - mae: 0.0295 - mean_squared_error: 0.0017
Epoch 7: val_loss improved from 0.00341 to 0.00330, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - loss: 0.0017 - mae: 0.0295 - mean_squared_error: 0.0017 - val_loss: 0.0033 - val_mae: 0.0378 - val_mean_squared_error: 0.0041
Epoch 8/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step - loss: 0.0015 - mae: 0.0271 - mean_squared_error: 0.0015
Epoch 8: val_loss improved from 0.00330 to 0.00323, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 307ms/step - loss: 0.0015 - mae: 0.0271 - mean_squared_error: 0.0015 - val_loss: 0.0032 - val_mae: 0.0374 - val_mean_squared_error: 0.0041
Epoch 9/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step - loss: 0.0014 - mae: 0.0263 - mean_squared_error: 0.0014
Epoch 9: val_loss did not improve from 0.00323
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - loss: 0.0014 - mae: 0.0263 - mean_squared_error: 0.0014 - val_loss: 0.0033 - val_mae: 0.0363 - val_mean_squared_error: 0.0040
Epoch 10/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step - loss: 0.0012 - mae: 0.0247 - mean_squared_error: 0.0012
Epoch 10: val_loss improved from 0.00323 to 0.00319, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - loss: 0.0012 - mae: 0.0247 - mean_squared_error: 0.0012 - val_loss: 0.0032 - val_mae: 0.0358 - val_mean_squared_error: 0.0040
Epoch 11/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step - loss: 0.0011 - mae: 0.0237 - mean_squared_error: 0.0011
Epoch 11: val_loss did not improve from 0.00319
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 290ms/step - loss: 0.0011 - mae: 0.0238 - mean_squared_error: 0.0011 - val_loss: 0.0034 - val_mae: 0.0381 - val_mean_squared_error: 0.0042
Epoch 12/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255ms/step - loss: 0.0011 - mae: 0.0244 - mean_squared_error: 0.0011
Epoch 12: val_loss did not improve from 0.00319
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 289ms/step - loss: 0.0011 - mae: 0.0244 - mean_squared_error: 0.0011 - val_loss: 0.0032 - val_mae: 0.0362 - val_mean_squared_error: 0.0040
Ep



[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 294ms/step - loss: 9.9118e-04 - mae: 0.0226 - mean_squared_error: 9.9140e-04 - val_loss: 0.0031 - val_mae: 0.0345 - val_mean_squared_error: 0.0039
Epoch 15/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255ms/step - loss: 9.9652e-04 - mae: 0.0224 - mean_squared_error: 9.9651e-04
Epoch 15: val_loss did not improve from 0.00308
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 290ms/step - loss: 9.9635e-04 - mae: 0.0224 - mean_squared_error: 9.9634e-04 - val_loss: 0.0032 - val_mae: 0.0354 - val_mean_squared_error: 0.0039
Epoch 16/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step - loss: 9.9407e-04 - mae: 0.0223 - mean_squared_error: 9.9423e-04
Epoch 16: val_loss did not improve from 0.00308
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 288ms/step - loss: 9.9367e-04 - mae: 0.0223 - mean_squared_error: 9.9400e-04 - val_loss: 0.0032 - val_mae: 0.

In [62]:
# --- Evaluasi Model ---
print("\n--- Evaluasi Model pada Test Set ---")
try:
    # Memuat model terbaik
    # Perhatikan: Keras 3.x dan TensorFlow terbaru mungkin lebih suka format .keras
    # Jika ada error, coba ganti menjadi model = tf.keras.models.load_model('basic_emotion_regression_model.keras')
    # dan saat menyimpan: model_checkpoint = ModelCheckpoint('basic_emotion_regression_model.keras', ...)
    best_model = tf.keras.models.load_model('basic_emotion_regression_model.h5', compile=False)
    best_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=1e-3), metrics=[MeanSquaredError(), 'mae'])
except Exception as e:
    print(f"Gagal memuat model terbaik, menggunakan model yang terakhir dilatih: {e}")
    best_model = model

loss, mse, mae = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")




--- Evaluasi Model pada Test Set ---
Test MSE: 0.0031
Test MAE: 0.0351


In [63]:

# --- Contoh Prediksi pada Test Set (dengan pasca-pemrosesan ke level) ---
print("\n--- Contoh Prediksi pada Test Set ---")
emotion_labels = ['anxiety', 'fear', 'nervousness', 'sadness', 'suffering', 'shame']

num_samples_to_show = min(5, X_test.shape[0])
for i in range(num_samples_to_show):
    sample_text_input = X_test[i:i+1]
    true_raw_scores = y_test[i] # Ground truth adalah skor kontinu

    # --- Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level ---
    predicted_raw_scores = best_model.predict(sample_text_input, verbose=0)[0]

    # Dapatkan 3 emosi dominan dan levelnya
    final_emotion_levels = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, predicted_raw_scores)}
    )
    # Untuk true emotions, kita juga perlu mengonversi dari raw score ke level 0-3 dengan logika top 3
    true_emotion_levels = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, true_raw_scores)}
    )

    # Dapatkan teks asli dari dataframe (ini butuh mapping indeks)
    original_text_idx = np.where((X_pad == sample_text_input).all(axis=1))[0]
    original_text = df['cleaned_statement'].iloc[original_text_idx[0]] if len(original_text_idx) > 0 else "N/A"

    print(f"\n--- Sampel {i+1} ---")
    print(f"Teks Asli: '{original_text}'")
    print(f"Emosi Sebenarnya (Level 0-3): {true_emotion_levels}") # Kini true_emotion_levels
    print(f"Prediksi Emosi (Level 0-3): {final_emotion_levels}")
    # print(f"Raw Prediksi (Float): {predicted_raw_scores}") # Opsional: lihat skor mentah




--- Contoh Prediksi pada Test Set ---

--- Sampel 1 ---
Teks Asli: 'dream one move new unit anjrit worry'
Emosi Sebenarnya (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}
Prediksi Emosi (Level 0-3): {'anxiety': 1, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}

--- Sampel 2 ---
Teks Asli: 'symptom without everyone think possible people symptom long time headache fatigue muscle ache without'
Emosi Sebenarnya (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 0, 'sadness': 0, 'suffering': 2, 'shame': 0}
Prediksi Emosi (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 1, 'suffering': 2, 'shame': 0}

--- Sampel 3 ---
Teks Asli: 'brain tumor concern feel super anxious lately think worth vent moment year old feel anxious idea brain tumor headache presentish minor call bad headache life feel see tiny streak light huge glare anything mess vision feel see tiny streak sometimes sometimes feel double 

In [64]:
# --- Fungsi untuk Prediksi Teks Baru (di luar dataset) ---
def predict_emotion_for_new_text(text, model, tokenizer, maxlen, emotion_labels):
    processed_text = preprocess_text(text) # Gunakan preprocessing yang sama
    seq = tokenizer.texts_to_sequences([processed_text])
    padded_seq = pad_sequences(seq, maxlen=maxlen, padding='post', truncating='post')

    raw_prediction = model.predict(padded_seq, verbose=0)[0]

    # Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level
    predicted_dict = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, raw_prediction)}
    )
    return predicted_dict

print("\n--- Contoh Prediksi untuk Teks Baru ---")
new_texts = [
    "I am truly happy today, life is great!",
    "This situation makes me feel so helpless and trapped.",
    "I have an exam soon, feeling a mix of nerves and excitement.",
    "The news today filled me with deep sorrow.",
    "I endured a lot, but I am recovering.",
    "My mistake from yesterday still causes me a lot of shame."
]

for text in new_texts:
    predicted_new_text = predict_emotion_for_new_text(text, best_model, tokenizer, maxlen, emotion_labels)
    print(f"Teks: '{text}'")
    print(f"Prediksi: {predicted_new_text}\n")


--- Contoh Prediksi untuk Teks Baru ---
Teks: 'I am truly happy today, life is great!'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 2, 'sadness': 2, 'suffering': 0, 'shame': 0}

Teks: 'This situation makes me feel so helpless and trapped.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 2, 'shame': 2}

Teks: 'I have an exam soon, feeling a mix of nerves and excitement.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 1, 'shame': 2}

Teks: 'The news today filled me with deep sorrow.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 2, 'suffering': 0, 'shame': 2}

Teks: 'I endured a lot, but I am recovering.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 1, 'shame': 2}

Teks: 'My mistake from yesterday still causes me a lot of shame.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 1, 'suffering': 0, 'shame': 2}



In [65]:
# Pastikan deep_translator sudah terinstal: pip install deep-translator
from deep_translator import GoogleTranslator # Import library GoogleTranslator
import tensorflow as tf # Pastikan TensorFlow sudah terinstal


In [67]:
# Muat kembali model terbaik jika belum di-load
try:
    best_model = tf.keras.models.load_model('basic_emotion_regression_model.h5', compile=False)
    best_model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=[tf.keras.metrics.MeanSquaredError(), 'mae'])
except Exception as e:
    print(f"Gagal memuat model terbaik, menggunakan model yang terakhir dilatih: {e}")
    best_model = model

print("\n--- Konversi Model ke TensorFlow Lite ---")
# Buat TFLite converter
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)

# --- Tambahkan konfigurasi ini untuk mengatasi ConverterError ---
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # Mengizinkan operasi built-in TFLite
    tf.lite.OpsSet.SELECT_TF_OPS    # Mengizinkan operasi TensorFlow yang tidak memiliki built-in TFLite
]
# Ini adalah flag eksperimental yang disarankan untuk dinonaktifkan
converter._experimental_lower_tensor_list_ops = False
# Opsional: Jika Anda ingin mengizinkan operasi TF yang tidak standar, bisa juga tambahkan:
# converter.allow_custom_ops = True

# Konversi model
tflite_model = converter.convert()

# Simpan model TFLite ke file
tflite_model_path = 'emotion_regression_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)
print(f"Model TensorFlow Lite berhasil disimpan di: {tflite_model_path}")


--- Konversi Model ke TensorFlow Lite ---
INFO:tensorflow:Assets written to: C:\Users\laila\AppData\Local\Temp\tmp_dwu_xyf\assets


INFO:tensorflow:Assets written to: C:\Users\laila\AppData\Local\Temp\tmp_dwu_xyf\assets


Saved artifact at 'C:\Users\laila\AppData\Local\Temp\tmp_dwu_xyf'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name='input_layer_3')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  2224029409232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029409424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029408464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029412880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029410576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029411728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029406352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029406160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029410960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2224029405200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  222402941115

In [72]:
# --- Fungsi untuk Prediksi Menggunakan Model TFLite dengan Terjemahan ---
def predict_emotion_tflite_with_translation(text, tflite_model_path, tokenizer, maxlen, emotion_labels):
    # Inisialisasi interpreter TFLite
    interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
    interpreter.allocate_tensors()

    # Dapatkan detail input dan output
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # 1. Terjemahkan teks jika bukan bahasa Inggris
    # Anda mungkin perlu logika deteksi bahasa di sini,
    # atau secara eksplisit meminta pengguna memilih bahasa input.
    # Untuk contoh ini, kita asumsikan input bisa Bahasa Indonesia dan langsung diterjemahkan.
    try:
        # Menentukan source language secara otomatis atau eksplisit 'id'
        # Target language adalah 'en' (English)
        translated_text = GoogleTranslator(source='auto', target='en').translate(text)
        print(f"Teks asli: '{text}' -> Diterjemahkan: '{translated_text}'")
    except Exception as e:
        print(f"Gagal menerjemahkan teks: {e}. Menggunakan teks asli.")
        translated_text = text # Fallback ke teks asli jika terjemahan gagal

    # 2. Pra-proses teks yang sudah diterjemahkan (atau teks asli jika gagal diterjemahkan)
    processed_text = preprocess_text(translated_text) # Gunakan preprocessing yang sama

     # 3. Tokenisasi dan Padding
    seq = tokenizer.texts_to_sequences([processed_text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=maxlen, padding='post', truncating='post')

    # --- PERUBAHAN DI SINI ---
    # Convert input data to the type expected by the TFLite model
    # Ganti dtype=np.int32 menjadi dtype=np.float32
    input_data = np.array(padded_seq, dtype=np.float32) # <-- Perubahan ini

    # 4. Set input tensor dan jalankan inferensi
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # 5. Dapatkan hasil prediksi
    raw_prediction = interpreter.get_tensor(output_details[0]['index'])[0]

    # 6. Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level (fungsi yang sama dari Keras model)
    predicted_dict = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, raw_prediction)}
    )
    return predicted_dict

In [73]:
# --- Contoh Penggunaan Prediksi dengan TFLite dan Terjemahan ---
print("\n--- Contoh Prediksi untuk Teks Baru (dengan TFLite dan Terjemahan) ---")
new_texts_with_id = [
    "Saya sangat bahagia hari ini, hidup ini indah!", # Indonesian
    "Situasi ini membuat saya merasa sangat tidak berdaya dan terperangkap.", # Indonesian
    "Saya akan ujian sebentar lagi, merasa gugup sekaligus bersemangat.", # Indonesian
    "Berita hari ini membuat saya sedih sekali.", # Indonesian
    "Saya sudah banyak menderita, tapi saya pulih.", # Indonesian
    "Kesalahan saya kemarin masih menyebabkan banyak rasa malu.", # Indonesian
    "I am very worried about my health.", # English (to show it still works for English)
    "Saya merasa cemas dengan masa depan.", # Indonesian
    "Sungguh menyedihkan apa yang terjadi kemarin." # Indonesian
]

# Ambil best_model dari proses sebelumnya (yang sudah di-load atau fall-back)
# emotion_labels dan maxlen juga dari definisi sebelumnya

for text in new_texts_with_id:
    predicted_emotion = predict_emotion_tflite_with_translation(text, tflite_model_path, tokenizer, maxlen, emotion_labels)
    print(f"Prediksi: {predicted_emotion}\n")


--- Contoh Prediksi untuk Teks Baru (dengan TFLite dan Terjemahan) ---
Teks asli: 'Saya sangat bahagia hari ini, hidup ini indah!' -> Diterjemahkan: 'I am very happy today, life is beautiful!'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 2, 'suffering': 0, 'shame': 1}

Teks asli: 'Situasi ini membuat saya merasa sangat tidak berdaya dan terperangkap.' -> Diterjemahkan: 'This situation made me feel very helpless and trapped.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 2, 'shame': 2}

Teks asli: 'Saya akan ujian sebentar lagi, merasa gugup sekaligus bersemangat.' -> Diterjemahkan: 'I will take an exam soon, feel nervous and excited.'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 3, 'sadness': 0, 'suffering': 0, 'shame': 2}

Teks asli: 'Berita hari ini membuat saya sedih sekali.' -> Diterjemahkan: 'Today's news makes me so sad.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 2, 'suffering': 0, 'shame': 2}

T