In [1]:
HIDDEN_SIZE = [512, 256, 128, 128, 64]
BATCH_SIZE = 64
LEARING_RATE = 0.001
EPOCHS = 10

In [2]:
import copy

import pandas as pd
from sklearn.preprocessing import OneHotEncoder

import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Try to use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Read Data

In [4]:
# Load the emotions dataset.
sentenceDf = pd.read_csv("data/emotion-sentences.zip")
sentenceDf.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [5]:
# Load the transformer model.
sentenceTransformer = SentenceTransformer("all-mpnet-base-v2")
sentenceTransformer = sentenceTransformer.to(device)

In [6]:
# Sample the first 5 sentences.
sentences = sentenceDf["Text"].sample(5).tolist()
sentences

['i feel very blessed and loved by the people around me',
 'i am not feeling well or grouchy or lazy ill sometimes forego my bed in favor of our futon couch for a little shut eye',
 'i can really decode but im sorry i have to vomit my feelings out because i am so cranky and everything is getting on my nerves',
 'i tried hard to avoid kim and her insults i tried hard not to feel as though i wasnt really respected by anyone or perhaps i wasnt at all welcome',
 'i get the feeling shes amused by all of this']

In [7]:
# Embed the sentences.
embeddings = sentenceTransformer.encode(sentences)
# Print all the sentences and their embeddings.
for sentence, embedding in zip(sentences, embeddings):
    print(f"Sentence: {sentence}")
    print(f"Embedding: {embedding}")
    print()
embeddings.shape

Sentence: i feel very blessed and loved by the people around me
Embedding: [-2.99173873e-02  9.80187673e-03  2.18353122e-02 -3.14175263e-02
 -1.87817775e-02  8.62663891e-03 -1.38587788e-01 -2.90057622e-02
  2.08032541e-02 -1.31846620e-02 -1.54521447e-02  4.20852900e-02
 -3.81782763e-02 -6.57491684e-02  4.28736992e-02 -2.49728616e-02
  4.60016653e-02  1.47707285e-02 -5.62297776e-02  8.24862905e-03
  2.10896600e-03 -1.47537189e-03  3.66545445e-03  6.82017580e-03
 -2.18335469e-03 -3.52241732e-02  1.74709316e-02  2.54005734e-02
  3.44339311e-02  2.05507074e-02 -6.57962561e-02  5.84634114e-03
  1.68537889e-02 -1.78419761e-02  1.51318488e-06  1.52261986e-03
  1.22014685e-02 -2.38633323e-02 -1.02552325e-02  1.57486834e-02
  2.77551543e-02 -4.38556001e-02  1.76269449e-02 -1.59209166e-02
  8.85003153e-03  3.35256173e-03 -7.51060294e-03  1.48529867e-02
 -1.49118546e-02 -3.64043266e-02 -2.13797558e-02 -3.68046761e-02
 -1.03383057e-01 -2.08260417e-02  5.08377105e-02  2.91156005e-02
  3.07593122e-0

(5, 768)

# Construct Dataset

In [8]:
# Construct the dataset.
class EmotionDataset(Dataset):
    def __init__(self, df: pd.DataFrame, sentenceTransformer: SentenceTransformer, oneHotEncoder: OneHotEncoder):
        sentences = df["Text"].tolist()
        labels = df["Emotion"].tolist()
        # Embed the sentences.
        self.sentences = sentenceTransformer.encode(sentences)
        # One-hot encode the labels.
        self.labels = oneHotEncoder.transform(pd.DataFrame(labels)).toarray()


    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, item):
        sentence = self.sentences[item]
        label = self.labels[item]

        return sentence, label

In [9]:
# Construct the OneHotEncoder.
oneHotEncoder = OneHotEncoder()
emotions = sentenceDf["Emotion"].values.reshape(-1, 1)
oneHotEncoder.fit(emotions)
# Construct the dataset.
dataset = EmotionDataset(sentenceDf, sentenceTransformer, oneHotEncoder)

In [10]:
# Shuffle and split the dataset.
trainSet, valSet = random_split(dataset, [int(len(dataset) * 0.8), len(dataset) - int(len(dataset) * 0.8)])
len(trainSet), len(valSet)

(17167, 4292)

In [11]:
# Prepare the dataloaders.
trainLoader = DataLoader(trainSet, batch_size=BATCH_SIZE, shuffle=True)
valLoader = DataLoader(valSet, batch_size=BATCH_SIZE, shuffle=True)

# Model

In [12]:
# Emotion Classifier Model.
class EmotionClassifier(nn.Module):
    def __init__(
        self,
        inputSize: int,
        h1Size: int,
        h2Size: int,
        h3Size: int,
        h4Size:int,
        h5Size:int,
        outputSize: int
    ):
        super(EmotionClassifier, self).__init__()
        self.fc1 = nn.Linear(inputSize, h1Size)
        self.fc2 = nn.Linear(h1Size, h2Size)
        self.fc3 = nn.Linear(h2Size, h3Size)
        self.fc4 = nn.Linear(h3Size, h4Size)
        self.fc5 = nn.Linear(h4Size, h5Size)
        self.fc6 = nn.Linear(h5Size, outputSize)

    def forward(self, x):
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return x

# Training

In [13]:
# Construct the model.
model = EmotionClassifier(embeddings.shape[1], *HIDDEN_SIZE, len(oneHotEncoder.categories_[0]))
# Define the loss function.
lossFunction = nn.BCEWithLogitsLoss()
# Define the optimizer.
optimizer = optim.Adam(model.parameters(), lr=LEARING_RATE)

# Send the model to the device.
model = model.to(device)

In [14]:
# Train the model.
bestModel = copy.deepcopy(model.state_dict())
bestValLoss = float("inf")
for i in range(EPOCHS):
    # Set the model to training mode.
    model.train()
    trainLoss = 0
    for batch in trainLoader:
        # Get the batch.
        sentences, labels = batch
        # Send the batch to the device.
        sentences = sentences.to(device)
        labels = labels.to(device)
        # Reset the gradients.
        optimizer.zero_grad()
        # Perform the forward pass.
        outputs = model(sentences)
        # Compute the loss.
        loss = lossFunction(outputs, labels)
        # Perform the backward pass.
        loss.backward()
        trainLoss += loss.item()
        # Update the parameters.
        optimizer.step()

    # Set the model to evaluation mode.
    model.eval()
    # Compute the validation loss.
    with torch.no_grad():
        valLoss = 0
        for batch in valLoader:
            # Get the batch.
            sentences, labels = batch
            # Send the batch to the device.
            sentences = sentences.to(device)
            labels = labels.to(device)
            # Perform the forward pass.
            outputs = model(sentences)
            # Compute the loss.
            loss = lossFunction(outputs, labels)
            valLoss += loss.item()
        valLoss /= len(valLoader)
    
    # Check if the validation loss is the best so far.
    if valLoss < bestValLoss:
        bestValLoss = valLoss
        bestModel = copy.deepcopy(model.state_dict())

    print(f"Epoch {i + 1}/{EPOCHS}, Training Loss: {trainLoss}, Validation Loss: {valLoss}")

bestValLoss

Epoch 1/10, Training Loss: 107.82735751016666, Validation Loss: 0.337141580809627
Epoch 2/10, Training Loss: 82.14100502746112, Validation Loss: 0.2757789905095802
Epoch 3/10, Training Loss: 69.3303262920363, Validation Loss: 0.24422984171846426
Epoch 4/10, Training Loss: 58.06548050282443, Validation Loss: 0.23033028509682715
Epoch 5/10, Training Loss: 47.603933027463576, Validation Loss: 0.2359621715941926
Epoch 6/10, Training Loss: 37.41871262988912, Validation Loss: 0.25388282219803693
Epoch 7/10, Training Loss: 27.93452502894636, Validation Loss: 0.26518791567668376
Epoch 8/10, Training Loss: 19.95767183901514, Validation Loss: 0.3264584401785353
Epoch 9/10, Training Loss: 15.67672925761458, Validation Loss: 0.34258848831129995
Epoch 10/10, Training Loss: 11.094100746753321, Validation Loss: 0.3748548755875058


0.23033028509682715

In [15]:
# Get the first 5 sentences.
sampleSentences = sentenceDf["Text"].sample(5).tolist()
sampleLabels = sentenceDf["Emotion"].sample(5).tolist()
# Embed the sentences.
sentenceEmbeddings = sentenceTransformer.encode(sampleSentences)
# Convert the embeddings to tensors.
sentenceEmbeddings = torch.tensor(sentenceEmbeddings, dtype=torch.float32)
sentenceEmbeddings = sentenceEmbeddings.to(device)
# Set the model to evaluation mode.
model.eval()
# Perform the forward pass.
modelOutputs = model(sentenceEmbeddings)
# Get the predictions.
_, predictions = torch.max(modelOutputs, 1)
# Get the labels.
labels = oneHotEncoder.inverse_transform(modelOutputs.cpu().detach().numpy())
# Print the results.
for sentence, sampleLabel, label in zip(sampleSentences, sampleLabels, labels):
    print(f"Sentence: {sentence}")
    print(f"Sample Label: {sampleLabel}")
    print(f"Label: {label[0]}")
    print()

Sentence: i like to watch people do horrible things so i can be outraged at them and feel superior
Sample Label: fear
Label: happy

Sentence: i feel pathetic and the desolation is beyond consolation
Sample Label: fear
Label: sadness

Sentence: i dont want to deny what i feel my body aching for
Sample Label: happy
Label: sadness

Sentence: i hate these feelings in my heart i hate that work stressed me out i hate that cornelius wont let me get my way im frustrated lord
Sample Label: happy
Label: sadness

Sentence: She was offended and took it all persona
Sample Label: anger
Label: anger



In [16]:
# Save the best model.
torch.save(bestModel, "models/emotion-classifier.pt")