# 1. Import library

In [33]:
# 1. Import libraries (sama seperti sebelumnya)
from empath import Empath
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import contractions
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model # Menggunakan Model API
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dense, Dropout # Pastikan Input ada di sini
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError # Untuk regression
from tensorflow.keras.metrics import MeanAbsoluteError, MeanSquaredError as KerasMSE # Opsional: import metrik secara eksplisit
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf # Untuk tf.keras.models.load_model

In [34]:
# NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Inisialisasi
lexicon = Empath()
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

custom_stopwords = {'like', 'get', 'go', 'know', 'would', 'could', 'also'}
stop_words.update(custom_stopwords)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\laila\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


# 2. Data Loading

In [35]:
df = pd.read_csv("dataset.csv")
df.head()

Unnamed: 0,statement,label
0,"Final doctor appointment tomorrow, tired of co...",Anxiety
1,Anyone have bone or muscle pain that was stres...,Anxiety
2,Listening to your body? I'm curious how those ...,Anxiety
3,Weekly /r/HealthAnxiety Challenge - Exercise A...,Anxiety
4,This is killing me So i had a bacterial stomac...,Anxiety


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4810 entries, 0 to 4809
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  4810 non-null   object
 1   label      4810 non-null   object
dtypes: object(2)
memory usage: 75.3+ KB


# 3. Preprocessing

## 3.1. Cleaning n lemmetizing

In [37]:
def clean_text(text):
    text = text.lower()  # Mengubah teks menjadi huruf kecil 
    text = contractions.fix(text)  # Memperbaiki kontraksi 
    text = re.sub(r'http\S+|www\S+', '', text)  # Menghapus URL 
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Menghapus karakter non-ASCII 
    text = re.sub(r'\d+', '', text)  # Menghapus angka 
    text = re.sub(rf"[{re.escape(string.punctuation)}]", '', text)  # Menghapus tanda baca 
    text = re.sub(r'\s+', ' ', text).strip()  # Ganti multiple spasi dengan 1 spasi)
    return text  # Mengembalikan teks yang sudah dibersihkan

def preprocess_text(text):
    text = clean_text(text) 
    words = text.split()  # Memisahkan teks menjadi list kata 
    processed_words = []  # List untuk menyimpan kata yang sudah diproses
    for word in words:
        if word not in stop_words and len(word) > 2:  # Filter: hapus stopword dan kata dengan panjang ≤ 2
            lemma = lemmatizer.lemmatize(word, pos='v')  # Lemmatisasi sebagai verb (e.g., "running" → "run")
            lemma = lemmatizer.lemmatize(lemma, pos='n')  # Lemmatisasi sebagai noun (e.g., "wolves" → "wolf")
            lemma = lemmatizer.lemmatize(lemma, pos='a')  # Lemmatisasi sebagai adjective (e.g., "better" → "good")
            lemma = lemmatizer.lemmatize(lemma, pos='r')  # Lemmatisasi sebagai adverb (e.g., "quickly" → "quick")
            processed_words.append(lemma)  # Tambahkan kata yang sudah dilematisasi ke list
    return ' '.join(processed_words)  # Gabungkan list kata menjadi teks dengan spasi

df['statement'] = df['statement'].astype(str)
df['cleaned_statement'] = df['statement'].apply(preprocess_text)


## 3.2. Emotion Extraction with Emapth

In [38]:
# Daftar emosi yang digunakan
emotions = ['anxiety', 'fear', 'nervousness', 'sadness', 'suffering', 'shame']

# Mapping keyword per emosi
keyword_emotion_map = {
    'anxiety': ['anxious', 'nervous', 'overwhelmed', 'restless', 'panic', 'worried'],
    'fear': ['fear', 'scared', 'terrified', 'afraid', 'panic'],
    'nervousness': ['nervous', 'dizzy', 'shaky', 'jittery', 'restless'],
    'sadness': ['sad', 'cry', 'heartbroken', 'hopeless', 'misery', 'despair', 'alone'],
    'suffering': ['suffering', 'pain', 'hurt', 'agony', 'tired'],
    'shame': ['shame', 'guilt', 'embarrassed', 'worthless', 'regret']
}

In [39]:
# --- 3. Fungsi-fungsi Empath dan Boosting ---
def label_from_empath(text):
    scores = lexicon.analyze(text, categories=emotions, normalize=True)
    return scores

def boost_empath_scores_with_keywords(text, empath_scores, keyword_emotion_map):
    text_lower = text.lower()
    boosted_scores = empath_scores.copy()

    for emotion, keywords in keyword_emotion_map.items():
        for kw in keywords:
            if re.search(rf'\b{kw}\b', text_lower):
                boosted_scores[emotion] = boosted_scores.get(emotion, 0) + 0.15

    return boosted_scores

# Fungsi konversi skor ke level (HANYA untuk pasca-pemrosesan dan visualisasi)
def score_to_level(score):
    if score == 0:
        return 0
    elif score <= 0.05:
        return 1
    elif score <= 0.25:
        return 2
    else:
        return 3

# Ambil 3 emosi teratas, ubah ke level, sisanya level 0 (HANYA untuk pasca-pemrosesan dan visualisasi)
def top_3_level_emotions(empath_scores_dict):
    if not isinstance(empath_scores_dict, dict):
        return {emotion: 0 for emotion in emotions}

    top3_items = sorted([item for item in empath_scores_dict.items() if item[0] in emotions],
                        key=lambda x: x[1],
                        reverse=True)[:3]

    result = {emotion: 0 for emotion in emotions}
    for emo, score in top3_items:
        result[emo] = score_to_level(score)
    return result



In [40]:
# --- 4. Penerapan Empath dan Boosting pada DataFrame ---
# 1. Skor awal dari Empath
df['empath_scores'] = df['cleaned_statement'].apply(label_from_empath)

# 2. Boost dengan keyword
df['boosted_scores'] = df.apply(
    lambda row: boost_empath_scores_with_keywords(
        row['cleaned_statement'], row['empath_scores'], keyword_emotion_map
    ), axis=1
)

In [41]:
df

Unnamed: 0,statement,label,cleaned_statement,empath_scores,boosted_scores
0,"Final doctor appointment tomorrow, tired of co...",Anxiety,final doctor appointment tomorrow tire constan...,"{'anxiety': 0.0, 'fear': 0.02097902097902098, ...","{'anxiety': 0.15, 'fear': 0.17097902097902098,..."
1,Anyone have bone or muscle pain that was stres...,Anxiety,anyone bone muscle pain stressanxiety induce j...,"{'anxiety': 0.0, 'fear': 0.14285714285714285, ...","{'anxiety': 0.0, 'fear': 0.14285714285714285, ..."
2,Listening to your body? I'm curious how those ...,Anxiety,listen body curious health anxiety listen body...,"{'anxiety': 0.0, 'fear': 0.047619047619047616,...","{'anxiety': 0.0, 'fear': 0.047619047619047616,..."
3,Weekly /r/HealthAnxiety Challenge - Exercise A...,Anxiety,weekly rhealthanxiety challenge exercise littl...,"{'anxiety': 0.0, 'fear': 0.07317073170731707, ...","{'anxiety': 0.0, 'fear': 0.22317073170731705, ..."
4,This is killing me So i had a bacterial stomac...,Anxiety,kill bacterial stomach infectionpain right sid...,"{'anxiety': 0.0, 'fear': 0.029850746268656716,...","{'anxiety': 0.0, 'fear': 0.029850746268656716,..."
...,...,...,...,...,...
4805,Nobody takes me seriously I’ve (24M) dealt wit...,Anxiety,nobody take seriously deal depressionanxiety y...,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4806,"selfishness ""I don't feel very good, it's lik...",Anxiety,selfishness feel good belong world think ever ...,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4807,Is there any way to sleep better? I can't slee...,Anxiety,way sleep good cannot sleep night med help,"{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0...","{'anxiety': 0.0, 'fear': 0.0, 'nervousness': 0..."
4808,"Public speaking tips? Hi, all. I have to give ...",Anxiety,public speak tip give presentation work next w...,"{'anxiety': 0.0, 'fear': 0.08823529411764706, ...","{'anxiety': 0.15, 'fear': 0.23823529411764705,..."


# 4. Modeling

## 4.1. Preprocessing data

In [42]:
# 4. Modeling
# 4.1. Preprocessing data untuk Model
X = df['cleaned_statement'].values

# --- PERUBAHAN PENTING DI SINI ---
# Target y adalah skor kontinu dari 'boosted_scores'
y_df_continuous = pd.DataFrame(df['boosted_scores'].tolist())
y = y_df_continuous[emotions].values # Ini adalah target kontinu Anda!

# Parameter Tokenizer dan Padding
vocab_size = 10000
oov_token = '<OOV>'
maxlen = 100

# Tokenisasi
print("Melakukan tokenisasi...")
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

# Padding
print("Melakukan padding...")
X_pad = pad_sequences(X_seq, maxlen=maxlen, padding='post', truncating='post')

# Pembagian Data (Train, Validation, Test)
X_train_full, X_test, y_train_full, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=42)

print(f"Ukuran training set: {X_train.shape[0]} sampel")
print(f"Ukuran validation set: {X_val.shape[0]} sampel")
print(f"Ukuran test set: {X_test.shape[0]} sampel")

num_emotions = y.shape[1]

Melakukan tokenisasi...
Melakukan padding...
Ukuran training set: 2886 sampel
Ukuran validation set: 962 sampel
Ukuran test set: 962 sampel


In [None]:
# --- Definisi Model (Tidak Berubah dari sebelumnya, sudah cocok untuk regresi) ---
embedding_dim = 128

input_layer = Input(shape=(maxlen,)) # Input layer tetap didefinisikan dengan maxlen
embedding_layer = Embedding(input_dim=vocab_size,
                            output_dim=embedding_dim)(input_layer)  

bilstm_1 = Bidirectional(LSTM(128, return_sequences=True))(embedding_layer)
dropout_1 = Dropout(0.5)(bilstm_1)

bilstm_2 = Bidirectional(LSTM(64))(dropout_1)
dropout_2 = Dropout(0.5)(bilstm_2)

dense_hidden = Dense(64, activation='relu')(dropout_2)

# Output Layer: activation='linear' dan loss=MeanSquaredError() adalah BENAR untuk regresi
output_layer = Dense(num_emotions, activation='linear')(dense_hidden)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile Model
model.compile(
    loss=MeanSquaredError(),
    optimizer=Adam(learning_rate=1e-3),
    metrics=[MeanSquaredError(), 'mae']
)

print("\n--- Model Summary ---")
model.summary()


--- Model Summary ---


In [44]:

# --- Callbacks untuk Pelatihan Optimal ---
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    'basic_emotion_regression_model.h5',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)



In [45]:
# --- Pelatihan Model ---
print("\n--- Mulai Pelatihan Model ---")
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, model_checkpoint]
)



--- Mulai Pelatihan Model ---
Epoch 1/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - loss: 0.0085 - mae: 0.0658 - mean_squared_error: 0.0085
Epoch 1: val_loss improved from inf to 0.00565, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 335ms/step - loss: 0.0085 - mae: 0.0658 - mean_squared_error: 0.0085 - val_loss: 0.0057 - val_mae: 0.0546 - val_mean_squared_error: 0.0064
Epoch 2/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step - loss: 0.0051 - mae: 0.0513 - mean_squared_error: 0.0051
Epoch 2: val_loss improved from 0.00565 to 0.00438, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 324ms/step - loss: 0.0051 - mae: 0.0513 - mean_squared_error: 0.0051 - val_loss: 0.0044 - val_mae: 0.0455 - val_mean_squared_error: 0.0049
Epoch 3/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step - loss: 0.0034 - mae: 0.0420 - mean_squared_error: 0.0034
Epoch 3: val_loss improved from 0.00438 to 0.00428, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 319ms/step - loss: 0.0034 - mae: 0.0420 - mean_squared_error: 0.0034 - val_loss: 0.0043 - val_mae: 0.0438 - val_mean_squared_error: 0.0048
Epoch 4/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268ms/step - loss: 0.0028 - mae: 0.0379 - mean_squared_error: 0.0028
Epoch 4: val_loss improved from 0.00428 to 0.00403, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 306ms/step - loss: 0.0028 - mae: 0.0378 - mean_squared_error: 0.0028 - val_loss: 0.0040 - val_mae: 0.0429 - val_mean_squared_error: 0.0047
Epoch 5/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step - loss: 0.0023 - mae: 0.0349 - mean_squared_error: 0.0023
Epoch 5: val_loss improved from 0.00403 to 0.00394, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 315ms/step - loss: 0.0023 - mae: 0.0349 - mean_squared_error: 0.0023 - val_loss: 0.0039 - val_mae: 0.0436 - val_mean_squared_error: 0.0044
Epoch 6/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 307ms/step - loss: 0.0019 - mae: 0.0314 - mean_squared_error: 0.0019
Epoch 6: val_loss improved from 0.00394 to 0.00349, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 349ms/step - loss: 0.0019 - mae: 0.0314 - mean_squared_error: 0.0019 - val_loss: 0.0035 - val_mae: 0.0393 - val_mean_squared_error: 0.0041
Epoch 7/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296ms/step - loss: 0.0016 - mae: 0.0289 - mean_squared_error: 0.0016
Epoch 7: val_loss did not improve from 0.00349
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 328ms/step - loss: 0.0016 - mae: 0.0289 - mean_squared_error: 0.0016 - val_loss: 0.0035 - val_mae: 0.0395 - val_mean_squared_error: 0.0041
Epoch 8/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step - loss: 0.0014 - mae: 0.0271 - mean_squared_error: 0.0014
Epoch 8: val_loss improved from 0.00349 to 0.00337, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 313ms/step - loss: 0.0014 - mae: 0.0271 - mean_squared_error: 0.0014 - val_loss: 0.0034 - val_mae: 0.0375 - val_mean_squared_error: 0.0040
Epoch 9/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step - loss: 0.0015 - mae: 0.0273 - mean_squared_error: 0.0015
Epoch 9: val_loss improved from 0.00337 to 0.00332, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 299ms/step - loss: 0.0015 - mae: 0.0273 - mean_squared_error: 0.0015 - val_loss: 0.0033 - val_mae: 0.0373 - val_mean_squared_error: 0.0041
Epoch 10/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 269ms/step - loss: 0.0013 - mae: 0.0255 - mean_squared_error: 0.0013
Epoch 10: val_loss did not improve from 0.00332
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 301ms/step - loss: 0.0013 - mae: 0.0255 - mean_squared_error: 0.0013 - val_loss: 0.0034 - val_mae: 0.0368 - val_mean_squared_error: 0.0041
Epoch 11/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 260ms/step - loss: 0.0012 - mae: 0.0244 - mean_squared_error: 0.0012
Epoch 11: val_loss improved from 0.00332 to 0.00325, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 291ms/step - loss: 0.0012 - mae: 0.0244 - mean_squared_error: 0.0012 - val_loss: 0.0033 - val_mae: 0.0362 - val_mean_squared_error: 0.0039
Epoch 12/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step - loss: 0.0011 - mae: 0.0235 - mean_squared_error: 0.0011
Epoch 12: val_loss did not improve from 0.00325
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 308ms/step - loss: 0.0011 - mae: 0.0235 - mean_squared_error: 0.0011 - val_loss: 0.0034 - val_mae: 0.0362 - val_mean_squared_error: 0.0042
Epoch 13/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step - loss: 0.0011 - mae: 0.0236 - mean_squared_error: 0.0011
Epoch 13: val_loss did not improve from 0.00325
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 300ms/step - loss: 0.0011 - mae: 0.0236 - mean_squared_error: 0.0011 - val_loss: 0.0034 - val_mae: 0.0366 - val_mean_squared_error: 0.0041
Ep



[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 320ms/step - loss: 9.8957e-04 - mae: 0.0225 - mean_squared_error: 9.8952e-04 - val_loss: 0.0032 - val_mae: 0.0348 - val_mean_squared_error: 0.0039
Epoch 19/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291ms/step - loss: 8.4432e-04 - mae: 0.0204 - mean_squared_error: 8.4488e-04
Epoch 19: val_loss did not improve from 0.00323
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 328ms/step - loss: 8.4478e-04 - mae: 0.0204 - mean_squared_error: 8.4588e-04 - val_loss: 0.0033 - val_mae: 0.0356 - val_mean_squared_error: 0.0040
Epoch 20/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step - loss: 9.3857e-04 - mae: 0.0214 - mean_squared_error: 9.3837e-04
Epoch 20: val_loss did not improve from 0.00323
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 320ms/step - loss: 9.3874e-04 - mae: 0.0214 - mean_squared_error: 9.3834e-04 - val_loss: 0.0033 - val_mae: 0.



[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 287ms/step - loss: 7.9119e-04 - mae: 0.0198 - mean_squared_error: 7.9144e-04 - val_loss: 0.0032 - val_mae: 0.0343 - val_mean_squared_error: 0.0039
Epoch 26/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step - loss: 7.0474e-04 - mae: 0.0190 - mean_squared_error: 7.0497e-04
Epoch 26: val_loss did not improve from 0.00322
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 290ms/step - loss: 7.0533e-04 - mae: 0.0190 - mean_squared_error: 7.0578e-04 - val_loss: 0.0033 - val_mae: 0.0341 - val_mean_squared_error: 0.0039
Epoch 27/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255ms/step - loss: 7.4873e-04 - mae: 0.0191 - mean_squared_error: 7.4871e-04
Epoch 27: val_loss improved from 0.00322 to 0.00319, saving model to basic_emotion_regression_model.h5




[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 289ms/step - loss: 7.4805e-04 - mae: 0.0191 - mean_squared_error: 7.4800e-04 - val_loss: 0.0032 - val_mae: 0.0341 - val_mean_squared_error: 0.0038
Epoch 28/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step - loss: 6.8741e-04 - mae: 0.0183 - mean_squared_error: 6.8723e-04
Epoch 28: val_loss did not improve from 0.00319
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 285ms/step - loss: 6.8757e-04 - mae: 0.0183 - mean_squared_error: 6.8722e-04 - val_loss: 0.0032 - val_mae: 0.0344 - val_mean_squared_error: 0.0039
Epoch 29/50
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step - loss: 6.8688e-04 - mae: 0.0182 - mean_squared_error: 6.8687e-04
Epoch 29: val_loss did not improve from 0.00319
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 283ms/step - loss: 6.8769e-04 - mae: 0.0182 - mean_squared_error: 6.8767e-04 - val_loss: 0.0033 - val_mae: 0.

In [46]:
# --- Evaluasi Model ---
print("\n--- Evaluasi Model pada Test Set ---")
try:
    # Memuat model terbaik
    best_model = tf.keras.models.load_model('basic_emotion_regression_model.h5', compile=False)
    best_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=1e-3), metrics=[KerasMSE(), MeanAbsoluteError()])
except Exception as e:
    print(f"Gagal memuat model terbaik, menggunakan model yang terakhir dilatih: {e}")
    best_model = model

loss, mse, mae = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")


--- Evaluasi Model pada Test Set ---
Test MSE: 0.0033
Test MAE: 0.0342


In [53]:

# --- Contoh Prediksi pada Test Set (dengan pasca-pemrosesan ke level) ---
print("\n--- Contoh Prediksi pada Test Set ---")
emotion_labels = ['anxiety', 'fear', 'nervousness', 'sadness', 'suffering', 'shame']

num_samples_to_show = min(5, X_test.shape[0])
for i in range(num_samples_to_show):
    sample_text_input = X_test[i:i+1]
    true_raw_scores = y_test[i] # Ground truth adalah skor kontinu

    # --- Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level ---
    predicted_raw_scores = best_model.predict(sample_text_input, verbose=0)[0]

    # Dapatkan 3 emosi dominan dan levelnya
    final_emotion_levels = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, predicted_raw_scores)}
    )
    # Untuk true emotions, kita juga perlu mengonversi dari raw score ke level 0-3 dengan logika top 3
    true_emotion_levels = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, true_raw_scores)}
    )

    # Dapatkan teks asli dari dataframe (ini butuh mapping indeks)
    original_text_idx = np.where((X_pad == sample_text_input).all(axis=1))[0]
    original_text = df['cleaned_statement'].iloc[original_text_idx[0]] if len(original_text_idx) > 0 else "N/A"

    print(f"\n--- Sampel {i+1} ---")
    print(f"Teks Asli: '{original_text}'")
    print(f"Emosi Sebenarnya (Level 0-3): {true_emotion_levels}") # Kini true_emotion_levels
    print(f"Prediksi Emosi (Level 0-3): {final_emotion_levels}")
    # print(f"Raw Prediksi (Float): {predicted_raw_scores}") # Opsional: lihat skor mentah




--- Contoh Prediksi pada Test Set ---

--- Sampel 1 ---
Teks Asli: 'dream one move new unit anjrit worry'
Emosi Sebenarnya (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}
Prediksi Emosi (Level 0-3): {'anxiety': 2, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}

--- Sampel 2 ---
Teks Asli: 'symptom without everyone think possible people symptom long time headache fatigue muscle ache without'
Emosi Sebenarnya (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 0, 'sadness': 0, 'suffering': 2, 'shame': 0}
Prediksi Emosi (Level 0-3): {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 2, 'shame': 2}

--- Sampel 3 ---
Teks Asli: 'brain tumor concern feel super anxious lately think worth vent moment year old feel anxious idea brain tumor headache presentish minor call bad headache life feel see tiny streak light huge glare anything mess vision feel see tiny streak sometimes sometimes feel double 

In [54]:
# --- Fungsi untuk Prediksi Teks Baru (di luar dataset) ---
def predict_emotion_for_new_text(text, model, tokenizer, maxlen, emotion_labels):
    processed_text = preprocess_text(text) # Gunakan preprocessing yang sama
    seq = tokenizer.texts_to_sequences([processed_text])
    padded_seq = pad_sequences(seq, maxlen=maxlen, padding='post', truncating='post')

    raw_prediction = model.predict(padded_seq, verbose=0)[0]

    # Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level
    predicted_dict = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, raw_prediction)}
    )
    return predicted_dict

print("\n--- Contoh Prediksi untuk Teks Baru ---")
new_texts = [
    "I am truly happy today, life is great!",
    "This situation makes me feel so helpless and trapped.",
    "I have an exam soon, feeling a mix of nerves and excitement.",
    "The news today filled me with deep sorrow.",
    "I endured a lot, but I am recovering.",
    "My mistake from yesterday still causes me a lot of shame."
]

for text in new_texts:
    predicted_new_text = predict_emotion_for_new_text(text, best_model, tokenizer, maxlen, emotion_labels)
    print(f"Teks: '{text}'")
    print(f"Prediksi: {predicted_new_text}\n")


--- Contoh Prediksi untuk Teks Baru ---
Teks: 'I am truly happy today, life is great!'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 2, 'sadness': 1, 'suffering': 0, 'shame': 0}

Teks: 'This situation makes me feel so helpless and trapped.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 2, 'shame': 2}

Teks: 'I have an exam soon, feeling a mix of nerves and excitement.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 1, 'suffering': 0, 'shame': 2}

Teks: 'The news today filled me with deep sorrow.'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 1}

Teks: 'I endured a lot, but I am recovering.'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}

Teks: 'My mistake from yesterday still causes me a lot of shame.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 1, 'suffering': 0, 'shame': 2}



In [55]:
# Pastikan deep_translator sudah terinstal: pip install deep-translator
from deep_translator import GoogleTranslator # Import library GoogleTranslator
import tensorflow as tf # Pastikan TensorFlow sudah terinstal


In [56]:
# Muat kembali model terbaik jika belum di-load
try:
    best_model = tf.keras.models.load_model('basic_emotion_regression_model.h5', compile=False)
    best_model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=[tf.keras.metrics.MeanSquaredError(), 'mae'])
except Exception as e:
    print(f"Gagal memuat model terbaik, menggunakan model yang terakhir dilatih: {e}")
    best_model = model

print("\n--- Konversi Model ke TensorFlow Lite ---")
# Buat TFLite converter
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)

# --- Tambahkan konfigurasi ini untuk mengatasi ConverterError ---
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # Mengizinkan operasi built-in TFLite
    tf.lite.OpsSet.SELECT_TF_OPS    # Mengizinkan operasi TensorFlow yang tidak memiliki built-in TFLite
]
# Ini adalah flag eksperimental yang disarankan untuk dinonaktifkan
converter._experimental_lower_tensor_list_ops = False
# Opsional: Jika Anda ingin mengizinkan operasi TF yang tidak standar, bisa juga tambahkan:
# converter.allow_custom_ops = True

# Konversi model
tflite_model = converter.convert()

# Simpan model TFLite ke file
tflite_model_path = 'emotion_regression_model_v2.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)
print(f"Model TensorFlow Lite berhasil disimpan di: {tflite_model_path}")


--- Konversi Model ke TensorFlow Lite ---
INFO:tensorflow:Assets written to: C:\Users\laila\AppData\Local\Temp\tmp95br8jzj\assets


INFO:tensorflow:Assets written to: C:\Users\laila\AppData\Local\Temp\tmp95br8jzj\assets


Saved artifact at 'C:\Users\laila\AppData\Local\Temp\tmp95br8jzj'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  2094224563600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224561872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224567632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224561488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224568976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224568784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224564368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224563792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224564176: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2094224562640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  209422456974

In [57]:
# --- Fungsi untuk Prediksi Menggunakan Model TFLite dengan Terjemahan ---
def predict_emotion_tflite_with_translation(text, tflite_model_path, tokenizer, maxlen, emotion_labels):
    # Inisialisasi interpreter TFLite
    interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
    interpreter.allocate_tensors()

    # Dapatkan detail input dan output
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # 1. Terjemahkan teks jika bukan bahasa Inggris
    # Anda mungkin perlu logika deteksi bahasa di sini,
    # atau secara eksplisit meminta pengguna memilih bahasa input.
    # Untuk contoh ini, kita asumsikan input bisa Bahasa Indonesia dan langsung diterjemahkan.
    try:
        # Menentukan source language secara otomatis atau eksplisit 'id'
        # Target language adalah 'en' (English)
        translated_text = GoogleTranslator(source='auto', target='en').translate(text)
        print(f"Teks asli: '{text}' -> Diterjemahkan: '{translated_text}'")
    except Exception as e:
        print(f"Gagal menerjemahkan teks: {e}. Menggunakan teks asli.")
        translated_text = text # Fallback ke teks asli jika terjemahan gagal

    # 2. Pra-proses teks yang sudah diterjemahkan (atau teks asli jika gagal diterjemahkan)
    processed_text = preprocess_text(translated_text) # Gunakan preprocessing yang sama

     # 3. Tokenisasi dan Padding
    seq = tokenizer.texts_to_sequences([processed_text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=maxlen, padding='post', truncating='post')

    # --- PERUBAHAN DI SINI ---
    # Convert input data to the type expected by the TFLite model
    # Ganti dtype=np.int32 menjadi dtype=np.float32
    input_data = np.array(padded_seq, dtype=np.float32) # <-- Perubahan ini

    # 4. Set input tensor dan jalankan inferensi
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # 5. Dapatkan hasil prediksi
    raw_prediction = interpreter.get_tensor(output_details[0]['index'])[0]

    # 6. Pasca-pemrosesan untuk mendapatkan 3 emosi dominan dalam level (fungsi yang sama dari Keras model)
    predicted_dict = top_3_level_emotions(
        {label: score for label, score in zip(emotion_labels, raw_prediction)}
    )
    return predicted_dict

In [58]:
# --- Contoh Penggunaan Prediksi dengan TFLite dan Terjemahan ---
print("\n--- Contoh Prediksi untuk Teks Baru (dengan TFLite dan Terjemahan) ---")
new_texts_with_id = [
    "Saya sangat bahagia hari ini, hidup ini indah!", # Indonesian
    "Situasi ini membuat saya merasa sangat tidak berdaya dan terperangkap.", # Indonesian
    "Saya akan ujian sebentar lagi, merasa gugup sekaligus bersemangat.", # Indonesian
    "Berita hari ini membuat saya sedih sekali.", # Indonesian
    "Saya sudah banyak menderita, tapi saya pulih.", # Indonesian
    "Kesalahan saya kemarin masih menyebabkan banyak rasa malu.", # Indonesian
    "I am very worried about my health.", # English (to show it still works for English)
    "Saya merasa cemas dengan masa depan.", # Indonesian
    "Sungguh menyedihkan apa yang terjadi kemarin." # Indonesian
]

# Ambil best_model dari proses sebelumnya (yang sudah di-load atau fall-back)
# emotion_labels dan maxlen juga dari definisi sebelumnya

for text in new_texts_with_id:
    predicted_emotion = predict_emotion_tflite_with_translation(text, tflite_model_path, tokenizer, maxlen, emotion_labels)
    print(f"Prediksi: {predicted_emotion}\n")


--- Contoh Prediksi untuk Teks Baru (dengan TFLite dan Terjemahan) ---
Teks asli: 'Saya sangat bahagia hari ini, hidup ini indah!' -> Diterjemahkan: 'I am very happy today, life is beautiful!'


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


Prediksi: {'anxiety': 0, 'fear': 1, 'nervousness': 2, 'sadness': 0, 'suffering': 0, 'shame': 2}

Teks asli: 'Situasi ini membuat saya merasa sangat tidak berdaya dan terperangkap.' -> Diterjemahkan: 'This situation made me feel very helpless and trapped.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'suffering': 2, 'shame': 2}

Teks asli: 'Saya akan ujian sebentar lagi, merasa gugup sekaligus bersemangat.' -> Diterjemahkan: 'I will take an exam soon, feel nervous and excited.'
Prediksi: {'anxiety': 2, 'fear': 0, 'nervousness': 3, 'sadness': 0, 'suffering': 0, 'shame': 2}

Teks asli: 'Berita hari ini membuat saya sedih sekali.' -> Diterjemahkan: 'Today's news makes me so sad.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 2, 'suffering': 0, 'shame': 2}

Teks asli: 'Saya sudah banyak menderita, tapi saya pulih.' -> Diterjemahkan: 'I have suffered a lot, but I recovered.'
Prediksi: {'anxiety': 0, 'fear': 0, 'nervousness': 2, 'sadness': 0, 'sufferi