In [1]:
# 📌 STEP 1: Imports
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# 📌 STEP 2: Load Data
# Make sure you upload these files to Colab or mount from Drive
train_embeddings = np.load('final_training_embeddings2.npy')  # shape: (samples, timesteps, features) /content/final_training_embeddings2.npy
test_embeddings = np.load('final_testing_embeddings2.npy')    # shape: (samples, timesteps, features) /content/final_testing_embeddings2.npy

train_embeddings = np.expand_dims(train_embeddings, axis=1)  # Now (samples, 1, features)
test_embeddings = np.expand_dims(test_embeddings, axis=1)

# Load CSV with labels
train_labels_df = pd.read_csv('final_testing_dataset.csv') #/content/final_testing_dataset.csv

# 📌 STEP 3: Extract multi-label targets
emotion_labels = [
    'anger', 'brain dysfunction (forget)', 'emptiness', 'hopelessness',
    'loneliness', 'sadness', 'suicide intent', 'worthlessness'
]
y_train = train_labels_df[emotion_labels].values  # shape: (samples, 8)

# 📌 STEP 4: Build the LSTM Model
model = Sequential([
    LSTM(64, input_shape=(train_embeddings.shape[1], train_embeddings.shape[2])),
    Dense(32, activation='relu'),
    Dense(8, activation='sigmoid')  # one sigmoid output per emotion
])

# 📌 STEP 5: Compile
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 📌 STEP 6: Train
model.fit(train_embeddings, y_train, epochs=60, batch_size=32)

# 📌 STEP 7: Predict on test set
predictions = model.predict(test_embeddings)  # shape: (test_samples, 8)

# Convert to binary 0/1 labels
predicted_labels = (predictions > 0.5).astype(int)

# 📌 STEP 8: Show sample predictions
print("Predictions for first 5 test samples:")
for i in range(5):
    print(f"Sample {i+1}: {predicted_labels[i]}")

Epoch 1/60


  super().__init__(**kwargs)


[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.0939 - loss: 0.6462
Epoch 2/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.1327 - loss: 0.5168
Epoch 3/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2031 - loss: 0.4741
Epoch 4/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2478 - loss: 0.4452
Epoch 5/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2770 - loss: 0.4447
Epoch 6/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2831 - loss: 0.4323
Epoch 7/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2660 - loss: 0.4318
Epoch 8/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2838 - loss: 0.4318
Epoch 9/60
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━

In [2]:
from sklearn.metrics import classification_report

# 📌 True labels for test set (assuming same CSV used for test labels)
test_labels_df = pd.read_csv('final_testing_dataset.csv')  # load true test labels
y_test = test_labels_df[emotion_labels].values  # shape: (samples, 8)

# 📌 Generate classification report
report = classification_report(y_test, predicted_labels, target_names=emotion_labels)
print(report)


                            precision    recall  f1-score   support

                     anger       0.87      0.81      0.84      1754
brain dysfunction (forget)       0.79      0.62      0.70       813
                 emptiness       0.90      0.78      0.84      1573
              hopelessness       0.92      0.94      0.93      2919
                loneliness       0.89      0.90      0.89      1929
                   sadness       0.91      0.96      0.93      3260
            suicide intent       0.82      0.84      0.83      1035
             worthlessness       0.87      0.84      0.85      2095

                 micro avg       0.89      0.87      0.88     15378
                 macro avg       0.87      0.83      0.85     15378
              weighted avg       0.89      0.87      0.88     15378
               samples avg       0.87      0.85      0.84     15378



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
