In [1]:
import nltk
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Imports - our files
import utils
import models

# Global definitions - data
DATA_FN = 'data/crowdflower_data.csv'
LABEL_NAMES = ["happiness", "worry", "neutral", "sadness"]

# Global definitions - architecture
EMBEDDING_DIM = 100  # We will use pretrained 100-dimensional GloVe
BATCH_SIZE = 128
NUM_CLASSES = 4
USE_CUDA = torch.cuda.is_available()  # CUDA will be available if you are using the GPU image for this homework
TEMP_FILE = "temporary_data.pkl"

In [2]:
# load the data and embeddings from file
try:
    with open(TEMP_FILE, "rb") as f:
        print("Loading DataLoaders and embeddings from file....")
        train_generator, dev_generator, test_generator, embeddings, train_data = pickle.load(f)
except FileNotFoundError:
    raise FileNotFoundError("You need to have saved your data with FRESH_START=True once in order to load it!")

Loading DataLoaders and embeddings from file....


In [4]:
class DenseNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(DenseNetwork, self).__init__()

        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.dense1 = nn.Linear(embed_dim, hidden_dim) 
        self.dense2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()     

    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        x = self.embedding(x)
        # TODO: 2) Take the sum of all word embeddings in a sentence
        x = torch.sum(x,dim=1).float()
        # TODO: 3) Feed the result into 2-layer feedforward network which produces a 4-vector of values,
        # TODO: one for each class
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        return x
        
net = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

DenseNetwork(
  (embedding): Embedding(17635, 100)
  (dense1): Linear(in_features=100, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=4, bias=True)
  (relu): ReLU()
)


In [5]:
HIDDEN_DIM = 64
model = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, HIDDEN_DIM, embeddings)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
  
EPOCHS = 20
losses = []
for iepoch in range(EPOCHS): 
    for train_batch, train_label in train_generator:
        # Compute the loss
        loss = criterion(model(train_batch),train_label)

        # Zero the gradients
        model.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

    total_loss = 0
    for ibatch, ilabel in dev_generator:
        dev_loss = criterion(model(ibatch), ilabel)
        total_loss += dev_loss
    print(iepoch, total_loss)
    losses.append(total_loss)
    if iepoch > 1 and losses[-2]-total_loss < 0.01:
        break

0 tensor(26.8893, grad_fn=<AddBackward0>)
1 tensor(26.2983, grad_fn=<AddBackward0>)
2 tensor(25.9858, grad_fn=<AddBackward0>)
3 tensor(25.8591, grad_fn=<AddBackward0>)
4 tensor(25.7640, grad_fn=<AddBackward0>)
5 tensor(25.6948, grad_fn=<AddBackward0>)
6 tensor(25.6656, grad_fn=<AddBackward0>)
7 tensor(25.6230, grad_fn=<AddBackward0>)
8 tensor(25.6038, grad_fn=<AddBackward0>)
9 tensor(25.6043, grad_fn=<AddBackward0>)


In [6]:
def test_model(model, loss_fn, test_generator):
    """
    Evaluate the performance of a model on the development set, providing the loss and macro F1 score.
    :param model: a model that performs 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param test_generator: a DataLoader that provides batches of the testing set
    """
    gold = []
    predicted = []

    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()

    model.eval()

    # Iterate over batches in the test dataset
    with torch.no_grad():
        for X_b, y_b in test_generator:
            # Predict
            y_pred = model(X_b)

            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())

            loss += loss_fn(y_pred.double(), y_b.long()).data

    # Print total loss and macro F1 score
    print("Test loss: ")
    print(loss)
    print("F-score: ")
    print(f1_score(gold, predicted, average='macro'))

test_model(model, criterion, test_generator)

Test loss: 
tensor([25.9882])
F-score: 
0.42962728871157085


In [65]:
class DenseNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(DenseNetwork, self).__init__()

        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.dense1 = nn.Linear(embed_dim, hidden_dim) 
        self.dense2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()     

    def get_len(self, x):
        x_len = []
        for ix in x:
            if ix[-1] != 0:
                x_len.append(len(ix)*1.0)
            else:
                x_len.append((ix==0).nonzero()[0])           
        return x_len
    
    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        x_lengths = self.get_len(x)
        x = self.embedding(x)
        # TODO: 2) Take the average of all non-zero word embeddings in a sentence
        y = torch.zeros(x.size(0), x.size(2), dtype=torch.float)
        for i in range(x.size(0)):
            for j in range(x.size(2)):
                y[i][j] = x[i,:,j].sum()/x_lengths[i]
#         for i, l in enumerate(x_lengths):
#             selected[i,:] = out[i,l-1,:]
        # TODO: 3) Feed the result into 2-layer feedforward network which produces a 4-vector of values,
        # TODO: one for each class
        y = self.dense1(y)
        y = self.relu(y)
        y = self.dense2(y)
        return y
        
net = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

DenseNetwork(
  (embedding): Embedding(17635, 100)
  (dense1): Linear(in_features=100, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=4, bias=True)
  (relu): ReLU()
)


In [66]:
HIDDEN_DIM = 64
model = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, HIDDEN_DIM, embeddings)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
  
EPOCHS = 20
losses = []
for iepoch in range(EPOCHS): 
    for train_batch, train_label in train_generator:
        # Compute the loss
        loss = criterion(model(train_batch),train_label)

        # Zero the gradients
        model.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

    total_loss = 0
    print('yes')
    for ibatch, ilabel in dev_generator:
        dev_loss = criterion(model(ibatch), ilabel)
        total_loss += dev_loss
    print(iepoch, total_loss)
    losses.append(total_loss)
    if iepoch > 1 and losses[-2]-total_loss < 0.01:
        break

yes
0 tensor(29.8221, grad_fn=<AddBackward0>)
yes
1 tensor(29.6444, grad_fn=<AddBackward0>)
yes
2 tensor(29.5903, grad_fn=<AddBackward0>)
yes
3 tensor(29.5680, grad_fn=<AddBackward0>)
yes
4 tensor(29.5561, grad_fn=<AddBackward0>)
yes
5 tensor(29.5489, grad_fn=<AddBackward0>)


In [67]:
def test_model(model, loss_fn, test_generator):
    """
    Evaluate the performance of a model on the development set, providing the loss and macro F1 score.
    :param model: a model that performs 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param test_generator: a DataLoader that provides batches of the testing set
    """
    gold = []
    predicted = []

    # Keep track of the loss
    loss = torch.zeros(1)  # requires_grad = False by default; float32 by default
    if USE_CUDA:
        loss = loss.cuda()

    model.eval()

    # Iterate over batches in the test dataset
    with torch.no_grad():
        for X_b, y_b in test_generator:
            # Predict
            y_pred = model(X_b)

            # Save gold and predicted labels for F1 score - take the argmax to convert to class labels
            gold.extend(y_b.cpu().detach().numpy())
            predicted.extend(y_pred.argmax(1).cpu().detach().numpy())

            loss += loss_fn(y_pred.double(), y_b.long()).data

    # Print total loss and macro F1 score
    print("Test loss: ")
    print(loss)
    print("F-score: ")
    print(f1_score(gold, predicted, average='macro'))

test_model(model, criterion, test_generator)

Test loss: 
tensor([29.4282])
F-score: 
0.22087838416284739


  'precision', 'predicted', average, warn_for)


In [59]:
def get_len(x):
    x_len = []
    for ix in x:
        if ix[-1] != 0:
            x_len.append(len(ix)*1.0)
        else:
            x_len.append((ix==0).nonzero()[0])           
    return x_len

In [64]:
np.shape(train_batch)
x_len = get_len(train_batch)
embedding = nn.Embedding.from_pretrained(embeddings)
x = embedding(train_batch)
print(np.shape(x))
y = torch.zeros(x.size(0), x.size(2), dtype=torch.float)

for i in range(x.size(0)):
    for j in range(x.size(2)):
        #print(i,j)
        y[i][j] = x[i,:,j].sum()/x_len[i]

torch.Size([128, 91, 100])


In [55]:
x[40,:,0]

tensor([-6.2256e-01,  2.1085e-01,  4.5296e-01,  1.8764e-01,  9.8591e-02,
         1.8764e-01,  1.9244e-01,  1.8764e-01,  1.4220e-01,  1.8764e-01,
        -2.0246e-01,  1.8764e-01,  1.8764e-01, -2.2716e-01,  1.8764e-01,
         1.8764e-01,  8.2029e-02,  1.8764e-01,  1.8764e-01,  1.8764e-01,
         2.3903e-01,  5.3169e-01,  1.8764e-01,  1.8764e-01,  1.8764e-01,
         9.8591e-02,  1.8764e-01,  1.8764e-01,  1.8764e-01,  1.8764e-01,
         1.8764e-01,  1.8764e-01,  1.8764e-01,  1.8764e-01,  1.8764e-01,
         1.8764e-01,  1.8764e-01, -6.6233e-04,  1.4220e-01,  1.8764e-01,
         1.8764e-01,  1.8764e-01,  5.1249e-01,  1.8764e-01,  1.4220e-01,
         1.8764e-01,  1.8764e-01,  1.8764e-01,  1.8764e-01, -1.6737e-01,
         1.8764e-01,  1.8764e-01,  9.8591e-02,  1.8764e-01,  9.8591e-02,
         1.8764e-01,  2.0315e-01,  1.8764e-01,  1.8764e-01,  7.7639e-01,
         1.8764e-01,  5.1249e-01,  1.8764e-01,  1.4220e-01,  1.8764e-01,
         9.8591e-02,  1.8764e-01,  1.8764e-01,  5.1

In [62]:
x_len[40]#.float()

91.0

In [51]:
y = torch.sum(x,dim=1).float()
y[0][0],y[0][1],y[0][2]#/19

(tensor(1.3715), tensor(0.4451), tensor(-0.4311))