IMPLEMENTASI MODEL

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Langkah 1: Baca dataset
file_path = 'hasil_deteksi_emosi.xlsx'  # Ganti dengan path yang benar
df = pd.read_excel(file_path)

# Langkah 2: Preprocessing Teks
# Tokenisasi teks
tokenizer = Tokenizer(num_words=10000)  # Menggunakan 10.000 kata paling umum di dataset
tokenizer.fit_on_texts(df['comment_text'])
sequences = tokenizer.texts_to_sequences(df['comment_text'])

# Padding sequence
maxlen = 150  # Menentukan panjang maksimal sequence
X = pad_sequences(sequences, maxlen=maxlen)

# Langkah 3: Persiapan Label
# Encode label ke bentuk numerik
label_encoder = LabelEncoder()
df['combined_emotions'] = df.apply(lambda row: '-'.join(row[df.columns[3:]].astype(str)), axis=1)
y = label_encoder.fit_transform(df['combined_emotions'])

# One-hot encoding label
y = to_categorical(y)

# Langkah 4: Membagi Data
# Membagi dataset menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Langkah 5: Membangun Model CNN
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=maxlen),
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(units=y.shape[1], activation='softmax')  # Jumlah unit sesuai dengan jumlah label unik
])

# Langkah 6: Kompilasi Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Langkah 7: Melatih Model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Langkah 8: Evaluasi Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

# Langkah 9: Menampilkan Akurasi
print(f"Accuracy: {accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9340282035561005


In [9]:
def predict_emotion(text, tokenizer, model, label_encoder, maxlen=150):
    # Preprocessing teks
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    
    # Membuat prediksi
    prediction = model.predict(padded_sequence)
    # Mendapatkan kelas dengan probabilitas tertinggi
    class_index = np.argmax(prediction, axis=1)
    # Mengonversi indeks kelas kembali menjadi label asli
    emotion_label = label_encoder.inverse_transform(class_index)
    return emotion_label

# Contoh teks untuk diuji
sample_text = "Saya sangat senang hari ini!"

# Memanggil fungsi predict_emotion dengan teks contoh
predicted_emotion = predict_emotion(sample_text, tokenizer, model, label_encoder, maxlen=150)

predicted_emotion  # Menampilkan emosi yang diprediksi



array(['Joy-Surprise-nan-nan-nan-nan-nan-nan'], dtype=object)

In [12]:
# Buat prediksi pada data yang ada
predictions = model.predict(X)

# Ubah prediksi menjadi label menggunakan label_encoder
predicted_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))


# Tambahkan hasil prediksi ke DataFrame
df['Predicted_Emotion'] = predicted_labels

# Menyimpan DataFrame yang telah diperbarui ke file Excel baru
output_file_path = 'path_to_exported_predictions.xlsx'
df.to_excel(output_file_path, index=False)

print(f"File saved to {output_file_path}")

File saved to path_to_exported_predictions.xlsx
