In [3]:
# Hybrid CNN + LSTM Model for Emotion and Topic Classification
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Bidirectional, Dense, Dropout, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Load the dataset
data_path = 'F:\\Masters\\Thesis\\BanglaEmotion\\Train.csv'  # Update with your path
val_data_path = 'F:\\Masters\\Thesis\\BanglaEmotion\\Val.csv'  # Validation data path
test_data_path = 'F:\\Masters\\Thesis\\BanglaEmotion\\Test.csv'  # Test data path

# Load all datasets
df = pd.read_csv(data_path)
df_val = pd.read_csv(val_data_path)
df_test = pd.read_csv(test_data_path)

# Combine emotion columns into labels for training, validation, and test data
emotion_columns = ['Love', 'Joy', 'Surprise', 'Anger', 'Sadness', 'Fear']
df['Emotion_Label'] = df[emotion_columns].idxmax(axis=1)
df_val['Emotion_Label'] = df_val[emotion_columns].idxmax(axis=1)
df_test['Emotion_Label'] = df_test[emotion_columns].idxmax(axis=1)

# Encode labels for emotion and topic
emotion_label_encoder = LabelEncoder()
df['Emotion_Label'] = emotion_label_encoder.fit_transform(df['Emotion_Label'])
df_val['Emotion_Label'] = emotion_label_encoder.transform(df_val['Emotion_Label'])
df_test['Emotion_Label'] = emotion_label_encoder.transform(df_test['Emotion_Label'])

topic_label_encoder = LabelEncoder()
df['Topic'] = topic_label_encoder.fit_transform(df['Topic'])
df_val['Topic'] = topic_label_encoder.transform(df_val['Topic'])
df_test['Topic'] = topic_label_encoder.transform(df_test['Topic'])

# Tokenization and Padding for Text Data
max_words = 5000  # Maximum number of words to consider
max_len = 100  # Maximum sequence length
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['Data'])

X_train = pad_sequences(tokenizer.texts_to_sequences(df['Data']), maxlen=max_len)
X_val = pad_sequences(tokenizer.texts_to_sequences(df_val['Data']), maxlen=max_len)
X_test = pad_sequences(tokenizer.texts_to_sequences(df_test['Data']), maxlen=max_len)

# Convert labels to categorical
y_train_emotion = to_categorical(df['Emotion_Label'])
y_val_emotion = to_categorical(df_val['Emotion_Label'])
y_test_emotion = to_categorical(df_test['Emotion_Label'])

y_train_topic = to_categorical(df['Topic'])
y_val_topic = to_categorical(df_val['Topic'])
y_test_topic = to_categorical(df_test['Topic'])

# Hybrid CNN + LSTM Model
embedding_dim = 128
num_filters = 64
kernel_size = 3
lstm_units = 64

# Input Layer
input_layer = Input(shape=(max_len,))

# Embedding Layer
embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len)(input_layer)

# CNN Layer
conv_layer = Conv1D(filters=num_filters, kernel_size=kernel_size, activation='relu')(embedding_layer)
pool_layer = GlobalMaxPooling1D()(conv_layer)

# LSTM Layer
lstm_layer = Bidirectional(LSTM(lstm_units, return_sequences=False))(embedding_layer)

# Concatenate CNN and LSTM Features
concat_layer = Concatenate()([pool_layer, lstm_layer])
dropout_layer = Dropout(0.5)(concat_layer)

# Dense layers for separate outputs
emotion_output = Dense(y_train_emotion.shape[1], activation='softmax', name='emotion_output')(dropout_layer)
topic_output = Dense(y_train_topic.shape[1], activation='softmax', name='topic_output')(dropout_layer)

# Model
model = Model(inputs=input_layer, outputs=[emotion_output, topic_output])
model.compile(
    optimizer='adam',
    loss={'emotion_output': 'categorical_crossentropy', 'topic_output': 'categorical_crossentropy'},
    metrics={'emotion_output': 'accuracy', 'topic_output': 'accuracy'}
)

# Model Summary
model.summary()

# Training the Model
history = model.fit(
    X_train, 
    {'emotion_output': y_train_emotion, 'topic_output': y_train_topic},
    validation_data=(X_val, {'emotion_output': y_val_emotion, 'topic_output': y_val_topic}),
    epochs=10,
    batch_size=32
)

# Evaluate on Test Data
test_loss, test_emotion_loss, test_topic_loss, test_emotion_acc, test_topic_acc = model.evaluate(
    X_test, 
    {'emotion_output': y_test_emotion, 'topic_output': y_test_topic}
)

print(f"Test Emotion Accuracy: {test_emotion_acc:.4f}")
print(f"Test Topic Accuracy: {test_topic_acc:.4f}")

# Predictions
emotion_pred = np.argmax(model.predict(X_test)[0], axis=1)
topic_pred = np.argmax(model.predict(X_test)[1], axis=1)

# Classification Reports
print("Emotion Classification Report on Test Data:")
print(classification_report(np.argmax(y_test_emotion, axis=1), emotion_pred, target_names=emotion_label_encoder.classes_))

print("Topic Classification Report on Test Data:")
print(classification_report(np.argmax(y_test_topic, axis=1), topic_pred, target_names=topic_label_encoder.classes_))




Epoch 1/10
[1m576/576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 119ms/step - emotion_output_accuracy: 0.3856 - emotion_output_loss: 1.4430 - loss: 3.4392 - topic_output_accuracy: 0.2819 - topic_output_loss: 1.9962 - val_emotion_output_accuracy: 0.5681 - val_emotion_output_loss: 1.0724 - val_loss: 2.5296 - val_topic_output_accuracy: 0.5217 - val_topic_output_loss: 1.4569
Epoch 2/10
[1m576/576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 117ms/step - emotion_output_accuracy: 0.6095 - emotion_output_loss: 1.0103 - loss: 2.3681 - topic_output_accuracy: 0.5624 - topic_output_loss: 1.3578 - val_emotion_output_accuracy: 0.5965 - val_emotion_output_loss: 1.0083 - val_loss: 2.2977 - val_topic_output_accuracy: 0.5779 - val_topic_output_loss: 1.2891
Epoch 3/10
[1m576/576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 119ms/step - emotion_output_accuracy: 0.6709 - emotion_output_loss: 0.8526 - loss: 1.9456 - topic_output_accuracy: 0.6458 - topic_output_loss: 1.092

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
