In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from gensim.models import Word2Vec
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
from nltk.corpus import stopwords
import nltk
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import pathlib
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input
import matplotlib.pyplot as plt
# Membuat objek stemmer dari Sastrawi
factory = StemmerFactory()
stemmer = factory.create_stemmer()


nltk.download('stopwords')

df = pd.read_csv('data.csv')

stop_words = set(stopwords.words('indonesian'))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\adisu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
def preprocess_text(text):
    # Menghapus karakter non-kata dan menurunkan huruf
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()

    # Menghapus stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])

    # Melakukan stemming dengan Sastrawi
    text = ' '.join([stemmer.stem(word) for word in text.split()])

    return text.split()

In [3]:
df['processed_text'] = df['text'].apply(preprocess_text)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['processed_text'])

X = tokenizer.texts_to_sequences(df['processed_text'])
X = pad_sequences(X, padding='post')

In [4]:
model_word2vec = Word2Vec(df['processed_text'], vector_size=100, window=5, min_count=1, workers=4)

In [5]:
embedding_dim = 100
embedding_matrix = np.zeros((len(tokenizer.word_index) + 1, embedding_dim))

In [6]:
for word, i in tokenizer.word_index.items():
    if word in model_word2vec.wv:
        embedding_matrix[i] = model_word2vec.wv[word]

In [7]:
y = df['label']
train_size = int(len(X) * 0.8)

X_train = X[:train_size]
X_test = X[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]

In [8]:
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1,
              output_dim=embedding_dim,
              weights=[embedding_matrix],
              trainable=True),
    GRU(64, return_sequences=True, activation='relu'),
    GRU(32, return_sequences=False, activation='relu'),
    Dropout(0.5),
    Dense(8, activation='softmax')  # 8 kelas emosi
])

In [9]:
model.compile(
    optimizer=Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
    )

In [10]:
model.fit(
    X_train,
    y_train,
    epochs=15,
    batch_size=32,
    validation_data=(X_test, y_test)
    )

Epoch 1/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.1378 - loss: 2.0543 - val_accuracy: 0.2587 - val_loss: 1.7959
Epoch 2/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.3577 - loss: 1.6112 - val_accuracy: 0.7613 - val_loss: 0.7452
Epoch 3/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6897 - loss: 0.8350 - val_accuracy: 0.8875 - val_loss: 0.4243
Epoch 4/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8391 - loss: 0.5058 - val_accuracy: 0.9112 - val_loss: 0.3507
Epoch 5/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8663 - loss: 0.4054 - val_accuracy: 0.9212 - val_loss: 0.3274
Epoch 6/15
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.9163 - loss: 0.2764 - val_accuracy: 0.9287 - val_loss: 0.2790
Epoch 7/15
[1m100/100

<keras.src.callbacks.history.History at 0x159a8c02c90>

In [11]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Model Accuracy: {accuracy:.4f}')
model.save("emotion_prediction.keras")

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9088 - loss: 0.6156
Model Accuracy: 0.9312


In [12]:
import numpy as np

def predict_emotion(text, top_n=3):
    # Preprocess the input text
    processed_text = preprocess_text(text)

    # Convert the text into a sequence of integers
    sequence = tokenizer.texts_to_sequences([processed_text])
    padded_sequence = pad_sequences(sequence, maxlen=X.shape[1], padding='post')

    # Get the model's predictions
    prediction = model.predict(padded_sequence)

    # Get the indices of the top N emotions based on prediction probabilities
    top_indices = np.argsort(prediction[0])[::-1][:top_n]

    # Define the possible emotions
    emotions = ['Waspada', 'Marah', 'Jijik', 'Takut', 'Senang', 'Sedih', 'Terkejut', 'Percaya']

    # Get the top N emotions with their corresponding probabilities
    top_emotions = [(emotions[i], prediction[0][i]) for i in top_indices]

    return top_emotions


In [13]:
teks_emosi=['Tiba-tiba aku menerima panggilan dari teman lama yang sudah lama tidak berhubungan, itu sangat mengejutkan!', # Waspada
    'Saya sangat marah karena pertemuan tadi pagi sangat tidak produktif. Semua ide yang saya usulkan ditolak begitu saja!', # Marah
    'Aku merasa jijik ketika melihat sampah berserakan di lantai rumah makan.', #Jijik
    'Aku merasa takut berjalan sendirian di jalan sepi malam ini.', #Takut
    'Hari ini sangat menyenankan! Saya bertemu teman lama dan kami menghabiskan waktu bersama di kafe. Rasanya seperti kembali ke masa-masa indah!', #Senang
    'Hari ini aku merasa sangat sedih, rasanya seperti dunia ini tidak adil', #Sedih
    'Aku sangat terkejut ketika tahu bahwa teman dekatku sudah menikah tanpa memberitahuku.', #Terkejut
    'Aku percaya bahwa dengan kerja keras dan doa, aku akan berhasil mencapai impian.' #Percaya
]

for text in teks_emosi:
    emotion = predict_emotion(text, top_n=1)
    print(f"Teks: {text}")
    print(f"Emosi yang diprediksi: {emotion}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673ms/step
Teks: Tiba-tiba aku menerima panggilan dari teman lama yang sudah lama tidak berhubungan, itu sangat mengejutkan!
Emosi yang diprediksi: [('Terkejut', 1.0)]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Teks: Saya sangat marah karena pertemuan tadi pagi sangat tidak produktif. Semua ide yang saya usulkan ditolak begitu saja!
Emosi yang diprediksi: [('Marah', 1.0)]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Teks: Aku merasa jijik ketika melihat sampah berserakan di lantai rumah makan.
Emosi yang diprediksi: [('Jijik', 1.0)]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Teks: Aku merasa takut berjalan sendirian di jalan sepi malam ini.
Emosi yang diprediksi: [('Takut', 0.9999558)]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Teks: Hari ini sangat menyenankan! Saya bertemu teman lama dan kami menghabiskan w

In [14]:
export_dir = 'saved_model/'
tf.saved_model.save(model, export_dir)


INFO:tensorflow:Assets written to: saved_model/assets


INFO:tensorflow:Assets written to: saved_model/assets


In [15]:
model.save("emotion_prediction.keras")

In [16]:
import pickle

# Simpan Tokenizer ke dalam file dengan Pickle
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

print("Tokenizer telah disimpan!")


Tokenizer telah disimpan!


In [17]:
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()


In [18]:
tflite_model_file = pathlib.Path('model.tflite')
tflite_model_file.write_bytes(tflite_model)


126492