In [1]:
#%run hw2.py

In [1]:
import nltk
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Imports - our files
import utils
import models

# Global definitions - data
DATA_FN = 'data/crowdflower_data.csv'
LABEL_NAMES = ["happiness", "worry", "neutral", "sadness"]

# Global definitions - architecture
EMBEDDING_DIM = 100  # We will use pretrained 100-dimensional GloVe
BATCH_SIZE = 128
NUM_CLASSES = 4
USE_CUDA = torch.cuda.is_available()  # CUDA will be available if you are using the GPU image for this homework

# Global definitions - saving and loading data
FRESH_START = True  # set this to false after running once with True to just load your preprocessed data from file
#                     (good for debugging)
TEMP_FILE = "temporary_data.pkl"  # if you set FRESH_START to false, the program will look here for your data, etc.

In [2]:
# load the data and embeddings from file
try:
    with open(TEMP_FILE, "rb") as f:
        print("Loading DataLoaders and embeddings from file....")
        train_generator, dev_generator, test_generator, embeddings, train_data = pickle.load(f)
except FileNotFoundError:
    raise FileNotFoundError("You need to have saved your data with FRESH_START=True once in order to load it!")

Loading DataLoaders and embeddings from file....


In [19]:
# test embeddings
# ebs = nn.Embedding.from_pretrained(embeddings)
# train1 = ebs(train_batch)
# print(np.shape(train1))
# train11 = torch.sum(train1,dim=1)
# print(np.shape(train11))

torch.Size([128, 91, 100])
torch.Size([128, 100])


In [46]:
class DenseNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(DenseNetwork, self).__init__()

        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.dense1 = nn.Linear(embed_dim, hidden_dim) 
        self.dense2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()     

    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        # TODO: 2) Take the sum of all word embeddings in a sentence; and
        # TODO: 3) Feed the result into 2-layer feedforward network which produces a 4-vector of values,
        # TODO: one for each class
        x = self.embedding(x)
        x = torch.sum(x,dim=1).float()
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        return self.relu(x)
        
net = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

DenseNetwork(
  (embedding): Embedding(17635, 100)
  (dense1): Linear(in_features=100, out_features=64, bias=True)
  (dense2): Linear(in_features=64, out_features=4, bias=True)
  (relu): ReLU()
)


In [37]:
np.shape(net(train_batch))

torch.Size([128, 100])


torch.Size([128, 4])

In [51]:
HIDDEN_DIM = 64
model = DenseNetwork(EMBEDDING_DIM, NUM_CLASSES, HIDDEN_DIM, embeddings)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
  
EPOCHS = 20
for iepoch in range(EPOCHS): 
    for train_batch, train_label in train_generator:
        # Compute and print loss
        loss = criterion(model(train_batch),train_label)
        #print(loss.item()) 

        # Zero the gradients
        model.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

    total_loss = 0
    for ibatch, ilabel in dev_generator:
        dev_loss = criterion(model(ibatch), ilabel)
        total_loss += dev_loss
    print(iepoch, total_loss)
    

0 tensor(29.6667, grad_fn=<AddBackward0>)
1 tensor(29.2319, grad_fn=<AddBackward0>)
2 tensor(29.0510, grad_fn=<AddBackward0>)
3 tensor(28.9368, grad_fn=<AddBackward0>)
4 tensor(28.9302, grad_fn=<AddBackward0>)
5 tensor(28.1167, grad_fn=<AddBackward0>)
6 tensor(27.2309, grad_fn=<AddBackward0>)
7 tensor(26.8562, grad_fn=<AddBackward0>)
8 tensor(26.7246, grad_fn=<AddBackward0>)
9 tensor(26.7146, grad_fn=<AddBackward0>)
10 tensor(26.6744, grad_fn=<AddBackward0>)
11 tensor(26.7139, grad_fn=<AddBackward0>)
12 tensor(26.6655, grad_fn=<AddBackward0>)
13 tensor(26.7404, grad_fn=<AddBackward0>)
14 tensor(26.7706, grad_fn=<AddBackward0>)
15 tensor(26.7183, grad_fn=<AddBackward0>)
16 tensor(26.8233, grad_fn=<AddBackward0>)
17 tensor(26.7668, grad_fn=<AddBackward0>)
18 tensor(26.7847, grad_fn=<AddBackward0>)
19 tensor(26.7913, grad_fn=<AddBackward0>)


In [17]:
np.shape(embeddings)

torch.Size([17635, 100])

In [15]:
train_generator

<torch.utils.data.dataloader.DataLoader at 0x106df65f8>

In [None]:
def train_model(model, loss_fn, optimizer, train_generator, dev_generator):
    """
    Perform the actual training of the model based on the train and dev sets.
    :param model: one of your models, to be trained to perform 4-way emotion classification
    :param loss_fn: a function that can calculate loss between the predicted and gold labels
    :param optimizer: a created optimizer you will use to update your model weights
    :param train_generator: a DataLoader that provides batches of the training set
    :param dev_generator: a DataLoader that provides batches of the development set
    :return model, the trained model
    """
    
    
    for epoch in range(50):
        # Forward Propagation
        y_pred = model(x)
        # Compute and print loss
        loss = criterion(y_pred, y)
        print('epoch: ', epoch,' loss: ', loss.item())
        # Zero the gradients
        optimizer.zero_grad()

        # perform a backward pass (backpropagation)
        loss.backward()

        # Update the parameters
        optimizer.step()

In [56]:
class RecurrentNetwork(nn.Module):
    def __init__(self, embed_dim, output_dim, hidden_dim, weight):
        super(RecurrentNetwork, self).__init__()
        
        ########## YOUR CODE HERE ##########
        # TODO: Here, create any layers and attributes your network needs.
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.lstm1 = nn.LSTM(embed_dim, hidden_dim) 
        self.lstm2 = nn.LSTM(hidden_dim, output_dim)
        self.relu = nn.ReLU() 

    # x is a PaddedSequence for an RNN
    def forward(self, x):
        ########## YOUR CODE HERE ##########
        # TODO: Fill in the forward pass of your neural network.
        # TODO: (The backward pass will be performed by PyTorch magic for you!)
        # TODO: Your architecture should...
        # TODO: 1) Put the words through an Embedding layer (which was initialized with the pretrained embeddings);
        # TODO: 2) Feed the sequence of embeddings through a 2-layer RNN; and
        # TODO: 3) Feed the last output state into a dense layer to become a 4-vector of values, one for each class
        x = self.embedding(x).float()
        x = self.lstm1(x)
        x = self.relu(x)
        x = self.lstm2(x)
        return self.relu(x)
        
net = RecurrentNetwork(EMBEDDING_DIM, NUM_CLASSES, 64, embeddings)
print(net)

RecurrentNetwork(
  (embedding): Embedding(17635, 100)
  (lstm1): LSTM(100, 64)
  (lstm2): LSTM(64, 4)
  (relu): ReLU()
)


In [57]:
np.shape(net(train_batch))

TypeError: relu(): argument 'input' (position 1) must be Tensor, not tuple