In [5]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader

from functions import net, MyDataset, train

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Prep the dataset

In [6]:
eb = pd.read_csv("../../data/clean/emobank_affectivetext_lex.csv")

# Ekman classes
classes = ["anger", "disgust", "fear", "joy", "sadness", "surprise"]
features = ["VADER_neg", "VADER_neu", "VADER_pos", "VADER_compound"\
    , "TextBlob_polarity", "TextBlob_subjectivity", "AFINN"]
print("Number of features: ", len(features))
print("Number of classes: ", len(classes))

labels_map = {
    0: "anger",
    1: "disgust",
    2: "fear",
    3: "joy",
    4: "sadness",
    5: "surprise"
}

# Split into train, test, dev
eb_train = eb[eb["split"] == "train"]
eb_test = eb[eb["split"] == "test"]
eb_dev = eb[eb["split"] == "dev"]

# Create datasets
train_dataset = MyDataset(eb_train[features+classes])
test_dataset = MyDataset(eb_test[features+classes])
dev_dataset = MyDataset(eb_dev[features+classes])

Number of features:  7
Number of classes:  6


In [7]:
model = net(len(features), 100, len(classes), num_hidden=3)
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=1000, shuffle=True)
train_features, train_labels = next(iter(train_loader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
print(train_labels)

Feature batch shape: torch.Size([920, 7])
Labels batch shape: torch.Size([920, 6])
tensor([[44.,  8., 78.,  0., 38.,  8.],
        [38., 17., 33.,  0., 14.,  7.],
        [ 0., 14.,  0., 21.,  0., 58.],
        ...,
        [ 0.,  0.,  5., 59.,  0., 15.],
        [ 0.,  0.,  0., 39.,  0.,  7.],
        [ 0.,  0.,  0., 80.,  2., 24.]])


In [9]:
train(model, train_loader, dev_loader, epochs=1000)

  x = self.softmax(x)


Epoch: 10, Train Loss: 148.0979, Valid Loss: 156.4469
Epoch: 20, Train Loss: 147.1180, Valid Loss: 156.2113
Epoch: 30, Train Loss: 146.2793, Valid Loss: 154.2727
Epoch: 40, Train Loss: 145.4611, Valid Loss: 155.9730
Epoch: 50, Train Loss: 145.1783, Valid Loss: 155.0597
Epoch: 60, Train Loss: 145.3470, Valid Loss: 156.2179
Epoch: 70, Train Loss: 145.2746, Valid Loss: 155.6121
Epoch: 80, Train Loss: 145.2495, Valid Loss: 155.5777
Epoch: 90, Train Loss: 145.1883, Valid Loss: 155.4424
Epoch: 100, Train Loss: 145.1555, Valid Loss: 155.3582
Epoch: 110, Train Loss: 145.2264, Valid Loss: 155.7528
Epoch: 120, Train Loss: 145.0911, Valid Loss: 155.3884
Epoch: 130, Train Loss: 145.0830, Valid Loss: 155.2634
Epoch: 140, Train Loss: 145.2202, Valid Loss: 155.9046
Epoch: 150, Train Loss: 145.0477, Valid Loss: 155.5220
Epoch: 160, Train Loss: 144.9859, Valid Loss: 155.2039
Epoch: 170, Train Loss: 145.0800, Valid Loss: 155.5700
Epoch: 180, Train Loss: 145.0245, Valid Loss: 155.6092
Epoch: 190, Train L

KeyboardInterrupt: 