<a href="https://colab.research.google.com/github/JasperAD11/Sentiment-Across-Signals-Neural-Networks-vs.-LLMs/blob/main/notebook_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part 1

## Libraries

In [1]:
import tensorflow as tf
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras import layers, models, initializers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import TextVectorization, Input, Embedding, LSTM, Dropout, Dense
from tensorflow.keras.initializers import Constant
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

## Binary model

### Dataset

In [2]:
!curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xzf aclImdb_v1.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 80.2M  100 80.2M    0     0  6494k      0  0:00:12  0:00:12 --:--:-- 15.5M


In [3]:
# Directory path
dataset_dir = "aclImdb"

# Remove unsup data (not labeled)
shutil.rmtree(os.path.join(dataset_dir, 'train', 'unsup'))

# Load training and test sets
batch_size = 32
seed = 42

train_ds = tf.keras.utils.text_dataset_from_directory(
    os.path.join(dataset_dir, "train"),
    batch_size=batch_size,
    validation_split=0.2,
    subset="training",
    seed=seed
)

val_ds = tf.keras.utils.text_dataset_from_directory(
    os.path.join(dataset_dir, "train"),
    batch_size=batch_size,
    validation_split=0.2,
    subset="validation",
    seed=seed
)

test_ds = tf.keras.utils.text_dataset_from_directory(
    os.path.join(dataset_dir, "test"),
    batch_size=batch_size
)

# To train the Final Model
full_train_ds = train_ds.concatenate(val_ds).shuffle(10000)

Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
Found 25000 files belonging to 2 classes.


In [4]:
max_vocab = 20000
sequence_len = 300

vectorizer = TextVectorization(
    max_tokens=max_vocab,
    output_mode='int',
    output_sequence_length=sequence_len
)

# Adapt vectorizer on training data
text_only_train = train_ds.map(lambda x, y: x)
vectorizer.adapt(text_only_train)

In [5]:
# Convert datasets to NumPy arrays or tensors
def vectorize_dataset(ds):
    return ds.map(lambda x, y: (vectorizer(x), y)).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

train_ds = vectorize_dataset(train_ds)
val_ds = vectorize_dataset(val_ds)
test_ds = vectorize_dataset(test_ds)
full_train_ds = vectorize_dataset(full_train_ds)


### Final Binary Model (model 2 in notebook1)

In [18]:
model_binary = keras.Sequential([
    layers.Embedding(input_dim=max_vocab, output_dim=128),
    layers.GlobalAveragePooling1D(),  # Sequence → single vector    TEST WITHOUT

    # Dense layer 1
    layers.Dense(8, activation='relu'),

    # Dense layer 2
    layers.Dense(8, activation='relu'),

    # Dense layer 3 (Output)
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

model_binary.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [19]:
history = model_binary.fit(
    full_train_ds,
    validation_data = test_ds,
    epochs=100,
    callbacks = [
        EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, mode='max'),
        ModelCheckpoint('best_model_binary.h5', monitor='val_accuracy', save_best_only=True, mode='max')]
)

model_binary.summary()

Epoch 1/100
[1m769/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.5439 - loss: 0.6824



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 6ms/step - accuracy: 0.5454 - loss: 0.6816 - val_accuracy: 0.7534 - val_loss: 0.5195
Epoch 2/100
[1m779/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.8328 - loss: 0.4588



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8330 - loss: 0.4586 - val_accuracy: 0.8500 - val_loss: 0.3996
Epoch 3/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8778 - loss: 0.3518 - val_accuracy: 0.8351 - val_loss: 0.4165
Epoch 4/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8995 - loss: 0.2918 - val_accuracy: 0.8452 - val_loss: 0.4131
Epoch 5/100
[1m780/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9116 - loss: 0.2552



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.9116 - loss: 0.2552 - val_accuracy: 0.8717 - val_loss: 0.3546
Epoch 6/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9251 - loss: 0.2243 - val_accuracy: 0.8624 - val_loss: 0.3676
Epoch 7/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9303 - loss: 0.2072 - val_accuracy: 0.8677 - val_loss: 0.3708
Epoch 8/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9445 - loss: 0.1722 - val_accuracy: 0.8674 - val_loss: 0.3821
Epoch 9/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9447 - loss: 0.1702 - val_accuracy: 0.8679 - val_loss: 0.4033
Epoch 10/100
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9463 - loss: 0.1653 

## Multi-class model

### Dataset

In [20]:
# Unzip to a folder
!unzip emotions-goemotions.zip -d emotions_data

Archive:  emotions-goemotions.zip
replace emotions_data/goemotions/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [21]:
dataset = pd.read_csv('emotions_data/goemotions/data/full_dataset/goemotions_1.csv')

In [22]:
dataset.drop(columns=["id","author","subreddit","link_id","parent_id","created_utc","rater_id","example_very_unclear"], inplace=True)

In [23]:
X = vectorizer(dataset['text'].values)

y = dataset.drop(columns=['text'])

In [24]:
X_numpy = X.numpy() if isinstance(X, tf.Tensor) else X
X_train_full, X_test, y_train_full, y_test = train_test_split(X_numpy, y, test_size=0.2, random_state=42)

# Second split: Take 20% of training for validation (16% of original)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full,
    y_train_full,
    test_size=0.2,
    random_state=42
)

### Final Multi-class Model (model 5 in notebook1)

In [25]:
model_multi_class = keras.Sequential([
    layers.Embedding(input_dim=max_vocab, output_dim=128),
    layers.GlobalAveragePooling1D(),

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),

    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),

    layers.Dense(28, activation='sigmoid')
])

model_multi_class.compile(optimizer='adam',
               loss='binary_crossentropy',
               metrics=['AUC'])

In [26]:
history = model_multi_class.fit(X_train_full,
                                y_train_full,
                                epochs=100,
                                batch_size=32,
                                validation_data=(X_test, y_test),
                                callbacks = [
                                    EarlyStopping(monitor='val_AUC', patience=10, restore_best_weights=True),
                                    ModelCheckpoint('best_model_multi.h5', monitor='val_AUC', save_best_only=True, mode='max')]
)
model_multi_class.summary()

Epoch 1/100
[1m1742/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.6719 - loss: 0.1836



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - AUC: 0.6720 - loss: 0.1835 - val_AUC: 0.7307 - val_loss: 0.1576
Epoch 2/100
[1m1739/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7145 - loss: 0.1589



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 4ms/step - AUC: 0.7145 - loss: 0.1589 - val_AUC: 0.7330 - val_loss: 0.1576
Epoch 3/100
[1m1739/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7222 - loss: 0.1583



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.7222 - loss: 0.1583 - val_AUC: 0.7330 - val_loss: 0.1580
Epoch 4/100
[1m1742/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.7232 - loss: 0.1575



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.7233 - loss: 0.1575 - val_AUC: 0.7474 - val_loss: 0.1552
Epoch 5/100
[1m1744/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7361 - loss: 0.1565



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.7361 - loss: 0.1565 - val_AUC: 0.7569 - val_loss: 0.1532
Epoch 6/100
[1m1742/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7490 - loss: 0.1537



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.7490 - loss: 0.1537 - val_AUC: 0.7732 - val_loss: 0.1510
Epoch 7/100
[1m1736/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.7576 - loss: 0.1522



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.7576 - loss: 0.1522 - val_AUC: 0.7812 - val_loss: 0.1487
Epoch 8/100
[1m1743/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7694 - loss: 0.1501



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.7694 - loss: 0.1501 - val_AUC: 0.7855 - val_loss: 0.1487
Epoch 9/100
[1m1745/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.7768 - loss: 0.1487



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.7768 - loss: 0.1487 - val_AUC: 0.7897 - val_loss: 0.1475
Epoch 10/100
[1m1737/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.7815 - loss: 0.1478



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.7815 - loss: 0.1478 - val_AUC: 0.7902 - val_loss: 0.1467
Epoch 11/100
[1m1744/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.7858 - loss: 0.1467



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.7859 - loss: 0.1467 - val_AUC: 0.8012 - val_loss: 0.1445
Epoch 12/100
[1m1739/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.7940 - loss: 0.1453



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - AUC: 0.7940 - loss: 0.1453 - val_AUC: 0.8035 - val_loss: 0.1440
Epoch 13/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.7953 - loss: 0.1447 - val_AUC: 0.7958 - val_loss: 0.1451
Epoch 14/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8037 - loss: 0.1433 - val_AUC: 0.8023 - val_loss: 0.1442
Epoch 15/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - AUC: 0.8071 - loss: 0.1430 - val_AUC: 0.7690 - val_loss: 0.1555
Epoch 16/100
[1m1740/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.8062 - loss: 0.1429



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8062 - loss: 0.1429 - val_AUC: 0.8082 - val_loss: 0.1436
Epoch 17/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8081 - loss: 0.1422 - val_AUC: 0.7895 - val_loss: 0.1475
Epoch 18/100
[1m1745/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.8118 - loss: 0.1414



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - AUC: 0.8118 - loss: 0.1414 - val_AUC: 0.8232 - val_loss: 0.1394
Epoch 19/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8128 - loss: 0.1411 - val_AUC: 0.8152 - val_loss: 0.1417
Epoch 20/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8180 - loss: 0.1398 - val_AUC: 0.8232 - val_loss: 0.1396
Epoch 21/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8199 - loss: 0.1398 - val_AUC: 0.8171 - val_loss: 0.1410
Epoch 22/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8180 - loss: 0.1399 - val_AUC: 0.8225 - val_loss: 0.1403
Epoch 23/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8269 - loss: 0.1380 - val_AUC: 0.8079 - val_loss: 0.1443
Epo



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8293 - loss: 0.1375 - val_AUC: 0.8252 - val_loss: 0.1400
Epoch 26/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8320 - loss: 0.1362 - val_AUC: 0.8071 - val_loss: 0.1436
Epoch 27/100
[1m1747/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.8285 - loss: 0.1371



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8285 - loss: 0.1371 - val_AUC: 0.8333 - val_loss: 0.1370
Epoch 28/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.8340 - loss: 0.1365 - val_AUC: 0.8246 - val_loss: 0.1391
Epoch 29/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8360 - loss: 0.1357 - val_AUC: 0.8278 - val_loss: 0.1381
Epoch 30/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8349 - loss: 0.1359 - val_AUC: 0.8274 - val_loss: 0.1385
Epoch 31/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8398 - loss: 0.1346 - val_AUC: 0.8253 - val_loss: 0.1412
Epoch 32/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8383 - loss: 0.1348 - val_AUC: 0.8252 - val_loss: 0.1390
Ep



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8453 - loss: 0.1332 - val_AUC: 0.8368 - val_loss: 0.1367
Epoch 36/100
[1m1739/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.8475 - loss: 0.1323



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8475 - loss: 0.1323 - val_AUC: 0.8386 - val_loss: 0.1358
Epoch 37/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8482 - loss: 0.1319 - val_AUC: 0.8302 - val_loss: 0.1380
Epoch 38/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8495 - loss: 0.1316 - val_AUC: 0.8321 - val_loss: 0.1365
Epoch 39/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8524 - loss: 0.1308 - val_AUC: 0.8150 - val_loss: 0.1421
Epoch 40/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - AUC: 0.8534 - loss: 0.1307 - val_AUC: 0.8213 - val_loss: 0.1410
Epoch 41/100
[1m1743/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - AUC: 0.8551 - loss: 0.1299



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8551 - loss: 0.1299 - val_AUC: 0.8482 - val_loss: 0.1329
Epoch 42/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8560 - loss: 0.1302 - val_AUC: 0.8470 - val_loss: 0.1329
Epoch 43/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.8581 - loss: 0.1290 - val_AUC: 0.8457 - val_loss: 0.1338
Epoch 44/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8590 - loss: 0.1289 - val_AUC: 0.8461 - val_loss: 0.1333
Epoch 45/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8599 - loss: 0.1286 - val_AUC: 0.8427 - val_loss: 0.1338
Epoch 46/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - AUC: 0.8591 - loss: 0.1292 - val_AUC: 0.8375 - val_loss: 0.1353
Ep



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8612 - loss: 0.1279 - val_AUC: 0.8486 - val_loss: 0.1329
Epoch 48/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8648 - loss: 0.1270 - val_AUC: 0.8479 - val_loss: 0.1325
Epoch 49/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8634 - loss: 0.1277 - val_AUC: 0.8265 - val_loss: 0.1392
Epoch 50/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8634 - loss: 0.1276 - val_AUC: 0.8475 - val_loss: 0.1343
Epoch 51/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - AUC: 0.8657 - loss: 0.1268 - val_AUC: 0.8449 - val_loss: 0.1337
Epoch 52/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8657 - loss: 0.1268 - val_AUC: 0.8447 - val_loss: 0.1343
E



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8663 - loss: 0.1267 - val_AUC: 0.8524 - val_loss: 0.1320
Epoch 54/100
[1m1743/1750[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - AUC: 0.8666 - loss: 0.1264



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8666 - loss: 0.1264 - val_AUC: 0.8529 - val_loss: 0.1314
Epoch 55/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8688 - loss: 0.1261 - val_AUC: 0.8432 - val_loss: 0.1340
Epoch 56/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8680 - loss: 0.1263 - val_AUC: 0.8522 - val_loss: 0.1317
Epoch 57/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8705 - loss: 0.1253 - val_AUC: 0.8461 - val_loss: 0.1342
Epoch 58/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8694 - loss: 0.1257 - val_AUC: 0.8254 - val_loss: 0.1377
Epoch 59/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8702 - loss: 0.1251 - val_AUC: 0.8504 - val_loss: 0.1332
Ep



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8725 - loss: 0.1246 - val_AUC: 0.8536 - val_loss: 0.1321
Epoch 61/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8726 - loss: 0.1246 - val_AUC: 0.8320 - val_loss: 0.1392
Epoch 62/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8712 - loss: 0.1247 - val_AUC: 0.8515 - val_loss: 0.1316
Epoch 63/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8710 - loss: 0.1250 - val_AUC: 0.8526 - val_loss: 0.1314
Epoch 64/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8757 - loss: 0.1235 - val_AUC: 0.8464 - val_loss: 0.1330
Epoch 65/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8736 - loss: 0.1242 - val_AUC: 0.8453 - val_loss: 0.1351
Ep



[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8797 - loss: 0.1218 - val_AUC: 0.8556 - val_loss: 0.1314
Epoch 71/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8796 - loss: 0.1220 - val_AUC: 0.8476 - val_loss: 0.1336
Epoch 72/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8801 - loss: 0.1221 - val_AUC: 0.8026 - val_loss: 0.1473
Epoch 73/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8808 - loss: 0.1213 - val_AUC: 0.8533 - val_loss: 0.1320
Epoch 74/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - AUC: 0.8804 - loss: 0.1219 - val_AUC: 0.8465 - val_loss: 0.1337
Epoch 75/100
[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - AUC: 0.8823 - loss: 0.1212 - val_AUC: 0.8420 - val_loss: 0.1349
Ep

## Merging **Binary** and **Multi class** models

In [27]:
def create_ensemble_model(sentiment_model_path, emotion_model_path):
    # Load the models
    sentiment_model = load_model(sentiment_model_path)
    emotion_model = load_model(emotion_model_path)

    # Freeze the models to prevent training
    sentiment_model.trainable = False
    emotion_model.trainable = False

    # Define new input layers
    sentiment_input = Input(shape=sentiment_model.input_shape[1:], name="sentiment_input")
    emotion_input = Input(shape=emotion_model.input_shape[1:], name="emotion_input")

    # Pass the inputs through the respective models
    sentiment_output = sentiment_model(sentiment_input)
    emotion_output = emotion_model(emotion_input)

    # Create the joint model
    joint_model = Model(
        inputs=[sentiment_input, emotion_input],
        outputs=[sentiment_output, emotion_output]
    )

    return joint_model

In [28]:
joint_model=create_ensemble_model('best_model_binary.h5', 'best_model_multi.h5')

joint_model.summary()



In [30]:
def predict_ensemble_model(model, texts, vectorizer, max_length=300, neutral_threshold=0.3, emotion_threshold=0.15):
    # Tokenize and pad the input texts
    input = vectorizer(texts)

    # Make predictions with the joint model
    predictions = model.predict({
        'sentiment_input': input,
        'emotion_input': input
    })

    # Get the sentiment prediction
    sentiment_prediction = predictions[0]

    # Convert sentiment prediction to 'positive' or 'negative' based on threshold of 0.5
    sentiment_label = "positive" if sentiment_prediction[0] > 0.5 else "negative"

    # Get emotion predictions
    emotion_predictions = predictions[1]

    # Define emotion labels (adjust to your actual labels)
    emotion_labels = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
                      'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
                      'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief',
                      'remorse', 'sadness', 'surprise', 'neutral']

    # Map the emotion predictions to the emotion labels
    emotion_results = {emotion_labels[i]: emotion_predictions[0][i] for i in range(len(emotion_labels))}

    # Check if 'neutral' emotion has score > neutral_threshold
    if emotion_results.get('neutral', 0) >= neutral_threshold:
        # If neutral is above the threshold, only return "neutral"
        return {
            'sentiment': sentiment_label,
            'emotion': ['neutral']
        }

    # Filter emotions: return all emotions > emotion_threshold, excluding 'neutral'
    filtered_emotions = {emotion: score for emotion, score in emotion_results.items() if score > emotion_threshold and emotion != 'neutral'}

    # If no emotions are above the threshold, return only the emotion with the highest score, excluding 'neutral'
    if not filtered_emotions:
        max_emotion = max((emotion_results[key], key) for key in emotion_results if key != 'neutral')
        filtered_emotions = {max_emotion[1]: max_emotion[0]}

    # Return the predictions
    return {
        'sentiment': sentiment_label,  # Sentiment prediction as 'positive' or 'negative'
        'emotion': list(filtered_emotions.keys())  # List of emotions above threshold or best emotion
    }


In [31]:
predict_ensemble_model(joint_model, ["I am so excited!"], vectorizer)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455ms/step


{'sentiment': 'negative', 'emotion': ['admiration', 'excitement', 'joy']}