# üî• BiLSTM Model Training: Emotion Classification

This notebook trains a Bidirectional LSTM model on preprocessed emotion data.


## üìö Import Libraries


In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')


## üìÇ Load Preprocessed Data


In [None]:
# Load preprocessed training and validation data
train_df = pd.read_pickle('./data/train_preprocessed.pkl')
val_df = pd.read_pickle('./data/validation_preprocessed.pkl')

print(f"Training data shape: {train_df.shape}")
print(f"Validation data shape: {val_df.shape}")
print(f"\nColumns: {train_df.columns.tolist()}")
print(f"\nFirst few rows:")
print(train_df.head())


## üìä Prepare Data

Split the data into features (X) and labels (y).


In [None]:
# Prepare X and y
X_train = train_df['Text']
y_train = train_df['Label']
X_val = val_df['Text']
y_val = val_df['Label']

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"\nLabel distribution in training set:")
print(y_train.value_counts().sort_index())


## üî† Tokenization

Convert text to sequences of integers.


In [None]:
# Initialize tokenizer
tokenizer = Tokenizer(num_words=60000)
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_val_sequences = tokenizer.texts_to_sequences(X_val)

# Find maximum sequence length
maxlen = max(len(tokens) for tokens in X_train_sequences)
print(f"Maximum sequence length: {maxlen}")

# Pad sequences
X_train_padded = pad_sequences(X_train_sequences, maxlen=maxlen, padding='post')
X_val_padded = pad_sequences(X_val_sequences, maxlen=maxlen, padding='post')

print(f"\nX_train_padded shape: {X_train_padded.shape}")
print(f"X_val_padded shape: {X_val_padded.shape}")

# Calculate input size for embedding layer
input_size = np.max(X_train_padded) + 1
print(f"Vocabulary size (input_size): {input_size}")


## üõ†Ô∏è Build BiLSTM Model

**Model Architecture:**
1. **Embedding Layer**: Converts word indices to dense vectors
2. **Bidirectional LSTM**: Processes sequences in both directions (forward and backward)
3. **Batch Normalization**: Normalizes activations for stable training
4. **Dropout**: Prevents overfitting (50% dropout rate)
5. **Dense Layer**: Fully connected layer with ReLU activation
6. **Output Layer**: 6 units with softmax for emotion classification


In [None]:
# Define the BiLSTM model
model = Sequential()

# Embedding layer
model.add(Embedding(input_dim=input_size, output_dim=100, input_shape=(maxlen,)))

# Bidirectional LSTM layer with 128 units
model.add(Bidirectional(LSTM(128)))

# Batch normalization
model.add(BatchNormalization())

# Dropout for regularization
model.add(Dropout(0.5))

# Dense layer with ReLU activation
model.add(Dense(64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer with 6 units (6 emotions) and softmax activation
model.add(Dense(6, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()


## üöÄ Train the Model

Train with early stopping to prevent overfitting.


In [None]:
# Train the model
history = model.fit(
    X_train_padded, y_train,
    epochs=15,
    batch_size=32,
    validation_data=(X_val_padded, y_val),
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True)]
)


## üìä Visualize Training Progress


In [None]:
# Get the epoch with the highest validation accuracy
best_epoch = history.history['val_accuracy'].index(max(history.history['val_accuracy'])) + 1

# Create a subplot with 1 row and 2 columns
fig, axs = plt.subplots(1, 2, figsize=(16, 5))

# Plot training and validation accuracy
axs[0].plot(history.history['accuracy'], label='Training Accuracy', color='blue')
axs[0].plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')
axs[0].scatter(best_epoch - 1, history.history['val_accuracy'][best_epoch - 1], 
               color='green', label=f'Best Epoch: {best_epoch}')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].set_title('Training and Validation Accuracy')
axs[0].legend()
axs[0].grid(True)

# Plot training and validation loss
axs[1].plot(history.history['loss'], label='Training Loss', color='blue')
axs[1].plot(history.history['val_loss'], label='Validation Loss', color='red')
axs[1].scatter(best_epoch - 1, history.history['val_loss'][best_epoch - 1], 
               color='green', label=f'Best Epoch: {best_epoch}')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].set_title('Training and Validation Loss')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()


## üìà Evaluate Model Performance


In [None]:
# Evaluate on validation set
val_loss, val_accuracy = model.evaluate(X_val_padded, y_val)
print(f"\nValidation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")


## üéØ Confusion Matrix


In [None]:
# Get predictions
y_val_pred = model.predict(X_val_padded)
y_val_pred = np.argmax(y_val_pred, axis=1)

# Create confusion matrix
cm = confusion_matrix(y_val, y_val_pred)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', 
            xticklabels=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'],
            yticklabels=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('BiLSTM Model - Confusion Matrix')
plt.tight_layout()
plt.show()


## üìù Classification Report


In [None]:
# Print classification report
emotion_names = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred, target_names=emotion_names))


## üíæ Save Model and Tokenizer

Save the trained model and tokenizer for later use.


In [None]:
# Save the model
model.save('./data/lstm_model.keras')
print("‚úÖ Model saved to: ./data/lstm_model.keras")

# Save the tokenizer
with open('./data/lstm_tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)
print("‚úÖ Tokenizer saved to: ./data/lstm_tokenizer.pkl")

# Save training metadata
metadata = {
    'maxlen': maxlen,
    'input_size': input_size,
    'label_mapping': label_mapping,
    'val_accuracy': val_accuracy,
    'val_loss': val_loss,
    'best_epoch': best_epoch
}

with open('./data/lstm_metadata.pkl', 'wb') as f:
    pickle.dump(metadata, f)
print("‚úÖ Metadata saved to: ./data/lstm_metadata.pkl")

print(f"\n{'='*60}")
print("BiLSTM MODEL TRAINING COMPLETE!")
print(f"{'='*60}")
print(f"Final Validation Accuracy: {val_accuracy:.4f}")
print(f"Final Validation Loss: {val_loss:.4f}")
