In [1]:
import nltk
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Imports - our files
import utils
import models

# Global definitions - data
DATA_FN = 'data/crowdflower_data.csv'
LABEL_NAMES = ["happiness", "worry", "neutral", "sadness"]

# Global definitions - architecture
EMBEDDING_DIM = 100  # We will use pretrained 100-dimensional GloVe
BATCH_SIZE = 128
NUM_CLASSES = 4
USE_CUDA = torch.cuda.is_available()  # CUDA will be available if you are using the GPU image for this homework


In [2]:
train, dev, test = utils.get_data(DATA_FN)

train_generator, dev_generator, test_generator, embeddings, train_data = utils.vectorize_data(train, dev, test, BATCH_SIZE, 
                                      EMBEDDING_DIM,extension=True)


Random samples from train set
       sentiment                                            content
11774    neutral  @GracieAlbernaz I know told you, and it tells ...
23703    neutral     Making egg tart &amp; Chinese egg pudding. Yum
2516       worry  K now I'm really going to bed even tho I ain't...
10040      worry  @Bensue aww at when she was cryin tho...i want...
2511     neutral  @christianedward it's a good sized family room...
24601  happiness             Back from hospital. Doc says I'll live
4413       worry         5 hours to go and no football again for me
5348       worry  @michellej They don't let the queer boys donat...
14862    neutral  The ballerina family is super nice, albeit a l...
15681      worry  is sitting in her nan's bathroom dying her hai...



Labels are encoded in the following class order:
['happiness' 'neutral' 'sadness' 'worry']



Train: 21976 
Dev: 2747 
Test: 2748





In [3]:
class DenseNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(DenseNetwork, self).__init__()

        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.dense1 = nn.Linear(embed_dim, hidden_dim) 
        self.dense2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()     

    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        x = self.embedding(x)
        # TODO: 2) Take the sum of all word embeddings in a sentence
        x = torch.sum(x,dim=1).float()
        # TODO: 3) Feed the result into 2-layer feedforward network which produces a 4-vector of values,
        # TODO: one for each class
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        return x
        
net = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

DenseNetwork(
  (embedding): Embedding(17426, 100)
  (dense1): Linear(in_features=100, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=4, bias=True)
  (relu): ReLU()
)


In [4]:
HIDDEN_DIM = 64
model = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, HIDDEN_DIM, embeddings)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
  
EPOCHS = 20
losses = []
for iepoch in range(EPOCHS): 
    for train_batch, train_label in train_generator:
        # Compute the loss
        loss = criterion(model(train_batch),train_label)

        # Zero the gradients
        model.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

    total_loss = 0
    for ibatch, ilabel in dev_generator:
        dev_loss = criterion(model(ibatch), ilabel)
        total_loss += dev_loss
    print(iepoch, total_loss)
    losses.append(total_loss)
    if iepoch > 1 and losses[-2]-total_loss < 0.01:
        break

0 tensor(27.3214, grad_fn=<AddBackward0>)
1 tensor(26.6059, grad_fn=<AddBackward0>)
2 tensor(26.2968, grad_fn=<AddBackward0>)
3 tensor(26.1510, grad_fn=<AddBackward0>)
4 tensor(26.0410, grad_fn=<AddBackward0>)
5 tensor(25.9272, grad_fn=<AddBackward0>)
6 tensor(25.8584, grad_fn=<AddBackward0>)
7 tensor(25.8214, grad_fn=<AddBackward0>)
8 tensor(25.8470, grad_fn=<AddBackward0>)


In [6]:
def test_model(model, loss_fn, test_generator):
    """
    Evaluate the performance of a model on the development set, providing the loss and macro F1 score.
    :param model: a model that performs 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param test_generator: a DataLoader that provides batches of the testing set
    """
    gold = []
    predicted = []

    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()

    model.eval()

    # Iterate over batches in the test dataset
    with torch.no_grad():
        for X_b, y_b in test_generator:
            # Predict
            y_pred = model(X_b)

            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())

            loss += loss_fn(y_pred.double(), y_b.long()).data

    # Print total loss and macro F1 score
    print("Test loss: ")
    print(loss)
    print("F-score: ")
    print(f1_score(gold, predicted, average='macro'))

test_model(model, criterion, test_generator)

Test loss: 
tensor([25.9882])
F-score: 
0.42962728871157085


In [6]:
# test average
TEMP_FILE = "temporary_data.pkl"  # if you set FRESH_START to false, the program will look here for your data, etc.

In [7]:
# load the data and embeddings from file
try:
    with open(TEMP_FILE, "rb") as f:
        print("Loading DataLoaders and embeddings from file....")
        train_generator, dev_generator, test_generator, embeddings, train_data = pickle.load(f)
except FileNotFoundError:
    raise FileNotFoundError("You need to have saved your data with FRESH_START=True once in order to load it!")

Loading DataLoaders and embeddings from file....


In [10]:
class DenseNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(DenseNetwork, self).__init__()

        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.dense1 = nn.Linear(embed_dim, hidden_dim) 
        self.dense2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()     

    def get_len(self, x):
        x_len = []
        for ix in x:
            if ix[-1] != 0:
                x_len.append(len(ix))
            else:
                x_len.append((ix==0).nonzero()[0])           
        return x_len
    
    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        x_lengths = self.get_len(x)
        x = self.embedding(x)
        # TODO: 2) Take the average of all non-zero word embeddings in a sentence
        x = torch.sum(x,dim=1).float()
        print(np.shape(x),x[0][0])
#         for i, l in enumerate(x_lengths):
#             selected[i,:] = out[i,l-1,:]
        # TODO: 3) Feed the result into 2-layer feedforward network which produces a 4-vector of values,
        # TODO: one for each class
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        return x
        
net = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

DenseNetwork(
  (embedding): Embedding(17635, 100)
  (dense1): Linear(in_features=100, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=4, bias=True)
  (relu): ReLU()
)


In [11]:
HIDDEN_DIM = 64
model = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, HIDDEN_DIM, embeddings)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
  
EPOCHS = 20
losses = []
for iepoch in range(EPOCHS): 
    for train_batch, train_label in train_generator:
        # Compute the loss
        loss = criterion(model(train_batch),train_label)

        # Zero the gradients
        model.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

    total_loss = 0
    for ibatch, ilabel in dev_generator:
        dev_loss = criterion(model(ibatch), ilabel)
        total_loss += dev_loss
    print(iepoch, total_loss)
    losses.append(total_loss)
    if iepoch > 1 and losses[-2]-total_loss < 0.01:
        break

torch.Size([128, 100]) tensor(0.1555)
torch.Size([128, 100]) tensor(-1.2391)
torch.Size([128, 100]) tensor(-0.2113)
torch.Size([128, 100]) tensor(1.9578)
torch.Size([128, 100]) tensor(5.0507)
torch.Size([128, 100]) tensor(1.8431)
torch.Size([128, 100]) tensor(0.5287)
torch.Size([128, 100]) tensor(-1.3350)
torch.Size([128, 100]) tensor(1.1977)
torch.Size([128, 100]) tensor(5.1284)
torch.Size([128, 100]) tensor(1.6595)
torch.Size([128, 100]) tensor(-0.9256)
torch.Size([128, 100]) tensor(0.1800)
torch.Size([128, 100]) tensor(2.1997)
torch.Size([128, 100]) tensor(0.5544)
torch.Size([128, 100]) tensor(6.5173)
torch.Size([128, 100]) tensor(0.0321)
torch.Size([128, 100]) tensor(1.7648)
torch.Size([128, 100]) tensor(2.3302)
torch.Size([128, 100]) tensor(1.7088)
torch.Size([128, 100]) tensor(-0.8299)
torch.Size([128, 100]) tensor(1.4307)
torch.Size([128, 100]) tensor(3.3567)
torch.Size([128, 100]) tensor(0.8609)
torch.Size([128, 100]) tensor(-0.0458)
torch.Size([128, 100]) tensor(2.2149)
torch.

torch.Size([128, 100]) tensor(-0.0458)
torch.Size([128, 100]) tensor(2.2149)
torch.Size([128, 100]) tensor(0.5178)
torch.Size([128, 100]) tensor(3.0948)
torch.Size([128, 100]) tensor(1.3340)
torch.Size([128, 100]) tensor(1.4519)
torch.Size([128, 100]) tensor(6.7984)
torch.Size([128, 100]) tensor(1.4171)
torch.Size([128, 100]) tensor(1.0816)
torch.Size([128, 100]) tensor(0.5460)
torch.Size([128, 100]) tensor(2.5361)
torch.Size([128, 100]) tensor(-0.7157)
torch.Size([128, 100]) tensor(0.2548)
torch.Size([128, 100]) tensor(1.5103)
torch.Size([128, 100]) tensor(-1.4412)
torch.Size([128, 100]) tensor(-0.7461)
torch.Size([128, 100]) tensor(0.8173)
torch.Size([128, 100]) tensor(1.4540)
torch.Size([128, 100]) tensor(1.6138)
torch.Size([128, 100]) tensor(2.6057)
torch.Size([128, 100]) tensor(0.1431)
torch.Size([128, 100]) tensor(-0.7390)
torch.Size([128, 100]) tensor(0.2321)
torch.Size([128, 100]) tensor(0.0519)
torch.Size([128, 100]) tensor(-0.7285)
torch.Size([128, 100]) tensor(6.4575)
torch.

torch.Size([128, 100]) tensor(-1.2797)
torch.Size([128, 100]) tensor(4.1692)
torch.Size([128, 100]) tensor(0.7871)
torch.Size([128, 100]) tensor(1.5960)
torch.Size([128, 100]) tensor(3.8636)
torch.Size([128, 100]) tensor(4.2217)
torch.Size([128, 100]) tensor(2.0985)
torch.Size([128, 100]) tensor(-0.1306)
torch.Size([128, 100]) tensor(0.4450)
torch.Size([128, 100]) tensor(0.1065)
torch.Size([128, 100]) tensor(1.7230)
torch.Size([128, 100]) tensor(3.0205)
torch.Size([128, 100]) tensor(0.0032)
torch.Size([128, 100]) tensor(2.8376)
torch.Size([128, 100]) tensor(-0.6622)
torch.Size([128, 100]) tensor(-0.6913)
torch.Size([128, 100]) tensor(3.5930)
torch.Size([128, 100]) tensor(0.5037)
torch.Size([128, 100]) tensor(1.9366)
torch.Size([128, 100]) tensor(-0.2682)
torch.Size([128, 100]) tensor(-0.3279)
torch.Size([128, 100]) tensor(1.1681)
torch.Size([128, 100]) tensor(-0.3278)
torch.Size([128, 100]) tensor(-0.2357)
torch.Size([128, 100]) tensor(1.2907)
torch.Size([128, 100]) tensor(2.1214)
torc

torch.Size([128, 100]) tensor(-0.2357)
torch.Size([128, 100]) tensor(1.2907)
torch.Size([128, 100]) tensor(2.1214)
torch.Size([128, 100]) tensor(0.5922)
torch.Size([128, 100]) tensor(0.3316)
torch.Size([128, 100]) tensor(3.4774)
torch.Size([128, 100]) tensor(5.6904)
torch.Size([128, 100]) tensor(2.5100)
torch.Size([128, 100]) tensor(0.5353)
torch.Size([128, 100]) tensor(-0.1916)
torch.Size([128, 100]) tensor(1.4908)
torch.Size([128, 100]) tensor(1.3715)
torch.Size([128, 100]) tensor(-0.1371)
torch.Size([128, 100]) tensor(0.0859)
torch.Size([128, 100]) tensor(3.4906)
torch.Size([128, 100]) tensor(-0.6579)
torch.Size([128, 100]) tensor(1.3288)
torch.Size([128, 100]) tensor(-0.8159)
torch.Size([128, 100]) tensor(0.8912)
torch.Size([128, 100]) tensor(1.6187)
torch.Size([128, 100]) tensor(1.3635)
torch.Size([128, 100]) tensor(1.0613)
torch.Size([128, 100]) tensor(1.2692)
torch.Size([128, 100]) tensor(1.5282)
torch.Size([128, 100]) tensor(-1.0156)
torch.Size([128, 100]) tensor(1.8170)
torch.

torch.Size([128, 100]) tensor(1.4807)
torch.Size([128, 100]) tensor(1.9581)
torch.Size([128, 100]) tensor(0.5381)
torch.Size([128, 100]) tensor(3.2582)
torch.Size([128, 100]) tensor(-0.2429)
torch.Size([128, 100]) tensor(0.3783)
torch.Size([128, 100]) tensor(5.0983)
torch.Size([128, 100]) tensor(3.1755)
torch.Size([128, 100]) tensor(1.7397)
torch.Size([128, 100]) tensor(0.6821)
torch.Size([128, 100]) tensor(0.3759)
torch.Size([128, 100]) tensor(0.4635)
torch.Size([128, 100]) tensor(2.1699)
torch.Size([128, 100]) tensor(4.4448)
torch.Size([128, 100]) tensor(-0.9911)
torch.Size([128, 100]) tensor(1.7119)
torch.Size([128, 100]) tensor(-0.1443)
torch.Size([128, 100]) tensor(0.1495)
torch.Size([128, 100]) tensor(-1.3001)
torch.Size([128, 100]) tensor(1.2222)
torch.Size([128, 100]) tensor(1.6302)
torch.Size([128, 100]) tensor(2.7703)
torch.Size([128, 100]) tensor(1.6650)
torch.Size([128, 100]) tensor(4.0752)
torch.Size([128, 100]) tensor(2.4281)
torch.Size([128, 100]) tensor(-1.4641)
torch.S

torch.Size([128, 100]) tensor(-1.5595)
torch.Size([128, 100]) tensor(0.4473)
torch.Size([128, 100]) tensor(2.2759)
torch.Size([128, 100]) tensor(2.4283)
torch.Size([128, 100]) tensor(0.9154)
torch.Size([128, 100]) tensor(1.8619)
torch.Size([128, 100]) tensor(3.6206)
torch.Size([128, 100]) tensor(2.1592)
torch.Size([128, 100]) tensor(1.6432)
torch.Size([128, 100]) tensor(0.6801)
torch.Size([128, 100]) tensor(-1.4367)
torch.Size([128, 100]) tensor(1.2752)
torch.Size([128, 100]) tensor(0.5567)
torch.Size([128, 100]) tensor(0.7263)
torch.Size([128, 100]) tensor(-0.1889)
torch.Size([128, 100]) tensor(2.2312)
torch.Size([128, 100]) tensor(1.5403)
torch.Size([128, 100]) tensor(0.9936)
torch.Size([128, 100]) tensor(4.8920)
torch.Size([128, 100]) tensor(1.3912)
torch.Size([128, 100]) tensor(3.9290)
torch.Size([128, 100]) tensor(2.3986)
torch.Size([128, 100]) tensor(3.4365)
torch.Size([128, 100]) tensor(0.4652)
torch.Size([128, 100]) tensor(1.6598)
torch.Size([128, 100]) tensor(-0.3277)
torch.Si

torch.Size([128, 100]) tensor(1.1831)
torch.Size([128, 100]) tensor(1.7043)
torch.Size([128, 100]) tensor(2.8121)
torch.Size([128, 100]) tensor(2.3103)
torch.Size([128, 100]) tensor(1.9119)
torch.Size([128, 100]) tensor(-1.1777)
torch.Size([128, 100]) tensor(0.8612)
torch.Size([128, 100]) tensor(3.8812)
torch.Size([128, 100]) tensor(1.5911)
torch.Size([128, 100]) tensor(6.6012)
torch.Size([128, 100]) tensor(2.9386)
torch.Size([128, 100]) tensor(1.2754)
torch.Size([128, 100]) tensor(5.7955)
torch.Size([128, 100]) tensor(4.3996)
torch.Size([128, 100]) tensor(1.0782)
torch.Size([128, 100]) tensor(-0.3753)
torch.Size([128, 100]) tensor(1.0525)
torch.Size([128, 100]) tensor(1.2198)
torch.Size([128, 100]) tensor(1.4162)
torch.Size([59, 100]) tensor(3.7453)
6 tensor(25.7366, grad_fn=<AddBackward0>)
torch.Size([128, 100]) tensor(0.1555)
torch.Size([128, 100]) tensor(-1.2391)
torch.Size([128, 100]) tensor(-0.2113)
torch.Size([128, 100]) tensor(1.9578)
torch.Size([128, 100]) tensor(5.0507)
torch

torch.Size([128, 100]) tensor(0.1555)
torch.Size([128, 100]) tensor(-1.2391)
torch.Size([128, 100]) tensor(-0.2113)
torch.Size([128, 100]) tensor(1.9578)
torch.Size([128, 100]) tensor(5.0507)
torch.Size([128, 100]) tensor(1.8431)
torch.Size([128, 100]) tensor(0.5287)
torch.Size([128, 100]) tensor(-1.3350)
torch.Size([128, 100]) tensor(1.1977)
torch.Size([128, 100]) tensor(5.1284)
torch.Size([128, 100]) tensor(1.6595)
torch.Size([128, 100]) tensor(-0.9256)
torch.Size([128, 100]) tensor(0.1800)
torch.Size([128, 100]) tensor(2.1997)
torch.Size([128, 100]) tensor(0.5544)
torch.Size([128, 100]) tensor(6.5173)
torch.Size([128, 100]) tensor(0.0321)
torch.Size([128, 100]) tensor(1.7648)
torch.Size([128, 100]) tensor(2.3302)
torch.Size([128, 100]) tensor(1.7088)
torch.Size([128, 100]) tensor(-0.8299)
torch.Size([128, 100]) tensor(1.4307)
torch.Size([128, 100]) tensor(3.3567)
torch.Size([128, 100]) tensor(0.8609)
torch.Size([128, 100]) tensor(-0.0458)
torch.Size([128, 100]) tensor(2.2149)
torch.

torch.Size([128, 100]) tensor(1.4519)
torch.Size([128, 100]) tensor(6.7984)
torch.Size([128, 100]) tensor(1.4171)
torch.Size([128, 100]) tensor(1.0816)
torch.Size([128, 100]) tensor(0.5460)
torch.Size([128, 100]) tensor(2.5361)
torch.Size([128, 100]) tensor(-0.7157)
torch.Size([128, 100]) tensor(0.2548)
torch.Size([128, 100]) tensor(1.5103)
torch.Size([128, 100]) tensor(-1.4412)
torch.Size([128, 100]) tensor(-0.7461)
torch.Size([128, 100]) tensor(0.8173)
torch.Size([128, 100]) tensor(1.4540)
torch.Size([128, 100]) tensor(1.6138)
torch.Size([128, 100]) tensor(2.6057)
torch.Size([128, 100]) tensor(0.1431)
torch.Size([128, 100]) tensor(-0.7390)
torch.Size([128, 100]) tensor(0.2321)
torch.Size([128, 100]) tensor(0.0519)
torch.Size([128, 100]) tensor(-0.7285)
torch.Size([128, 100]) tensor(6.4575)
torch.Size([128, 100]) tensor(1.7679)
torch.Size([128, 100]) tensor(1.5290)
torch.Size([128, 100]) tensor(5.0665)
torch.Size([128, 100]) tensor(-0.2796)
torch.Size([128, 100]) tensor(3.5752)
torch.