In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
NUM_CLASSES = 6
CLASSES = ["sadnesss", "joy", "love", "anger", "fear"]

In [3]:
# Load all data
train_data = pd.read_csv("data/training.csv")
test_data = pd.read_csv("data/test.csv")
validation_data = pd.read_csv("data/validation.csv")

# Separate X's and y's from each other
FEATURE_COLUMNS = ["text"]
LABEL_COLUMN = "label"

X_train = train_data[FEATURE_COLUMNS]
Y_train = train_data[LABEL_COLUMN]

X_test = test_data[FEATURE_COLUMNS]
Y_test = test_data[LABEL_COLUMN]

X_val = validation_data[FEATURE_COLUMNS]
Y_val = validation_data[LABEL_COLUMN]

# These are used to run cross validation
X_train_val = pd.concat([X_train, X_val]) 
Y_train_val = pd.concat([Y_train, Y_test])

# These are used to run val and test for Neural Nets
X_val_test = pd.concat([X_val, X_test])
Y_val_test = pd.concat([Y_val, Y_test])

In [9]:
X_train_maxtoks = X_train['text'].str.len().max()
X_test_maxtoks = X_test['text'].str.len().max()
X_val_maxtoks = X_val['text'].str.len().max() 

max_toks = max(X_train_maxtoks, X_test_maxtoks, X_val_maxtoks)
print(max_toks)

300


# Data Loaders

In [110]:
# Pytorch specific constants
from torch.utils.data import TensorDataset, DataLoader
from extractors.chartok import  CharTokenDataset
from utils.neuralnet import NeuralNetwork
from utils.rnn import LSTMNetwork
from utils.transformer import TransformerEncoder
from utils.trainer import training_loop, evaluate
import torch

BATCH_SIZE = 1
LEARNING_RATE = 1e-3

# Standard MLP

In [117]:
# Because an MLP operates on fixed size inputs, we will use the entire fixed size input for this
train_data = CharTokenDataset(X_train, Y_train, tokenizer=None, max_seq_length=max_toks)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

val_data = CharTokenDataset(X_val_test, Y_val_test, tokenizer=None, max_seq_length=max_toks)
val_loader = DataLoader(val_data, batch_size=1, shuffle=True)

In [92]:
mlp = NeuralNetwork(max_toks, [], NUM_CLASSES)
training_loop(mlp, train_loader, val_loader, epochs=10, learning_rate=LEARNING_RATE)

Epoch 1
train_loss = 2.1793, val_loss = 1.6152
Epoch 2
train_loss = 0.8812, val_loss = 1.5902
Epoch 3
train_loss = 2.0608, val_loss = 1.5839
Epoch 4
train_loss = 1.0625, val_loss = 1.5943
Epoch 5
train_loss = 2.3476, val_loss = 1.5831
Epoch 6
train_loss = 0.8281, val_loss = 1.6215
Epoch 7
train_loss = 1.1063, val_loss = 1.5998
Epoch 8
train_loss = 1.3402, val_loss = 1.6421
Epoch 9
train_loss = 1.5472, val_loss = 1.6038
Epoch 10
train_loss = 0.5425, val_loss = 1.6014


In [93]:
evaluate(model=mlp, val_dl=val_loader)

loss = 1.6014
accuracy = 0.3417
f1 = 0.2614


(tensor(1.6014), 0.34175, 0.261355887646227)

# RNN

In [119]:
lstm = LSTMNetwork(10, 20, 10, NUM_CLASSES)
training_loop(mlp, train_loader, val_loader, epochs=10, learning_rate=LEARNING_RATE)

Epoch 1


KeyboardInterrupt: 

# Transformer

In [132]:
tarnsformer = TransformerEncoder(30, NUM_CLASSES, 40, 5, 4, 0.1)

In [136]:
for dat in train_loader:
    x, y = dat
    x = x.long()
    print(tarnsformer.forward(x).shape)

torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Size([6])
torch.Si

KeyboardInterrupt: 

In [None]:
training_loop(mlp, train_loader, val_loader, epochs=10, learning_rate=LEARNING_RATE)