# 1. Preparation and Preprocessing 
- Install pytoch 0.4.1
- Cuda version: 9.2
- Load data and preprocessing for training

In [1]:
# NVIDIA profiling tool for the available GPU
!nvidia-smi

Mon Oct 29 18:18:09 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.44                 Driver Version: 396.44                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

tcmalloc: large alloc 1073750016 bytes == 0x57dc6000 @  0x7fae79e6f2a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070


In [3]:
# Use PyTorch to check versions, CUDA version and cuDNN

import torch

print("PyTorch version: ")
print(torch.__version__)
print("CUDA Version: ")
print(torch.version.cuda)
print("cuDNN version is: ")
print(torch.backends.cudnn.version())

PyTorch version: 
0.4.1
CUDA Version: 
9.2.148
cuDNN version is: 
7104


In [4]:
!nvidia-smi

Thu Oct 25 16:46:11 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.44                 Driver Version: 396.44                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    70W / 149W |    280MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [4]:
#connect with google drive to load pre-trained embedding 
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#load necessary library
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle as pkl

In [0]:
# Load pre-trained embedding and add <pad> and <unk>
PAD_IDX = 0
UNK_IDX = 1
def load_ft(words_to_load):
    with open('/content/drive/My Drive/Colab Notebooks/wiki-news-300d-1M.vec') as f:
        loaded_embeddings = np.zeros((words_to_load+2, 300))
        token2id = {}
        token2id['<pad>'] = PAD_IDX 
        token2id['<unk>'] = UNK_IDX
        id2token = []
        
        for i, line in enumerate(f):
            if i >= words_to_load: 
                break
            s = line.split()
            loaded_embeddings[i+2, :] = np.asarray(s[1:])
            id2token.append(s[0])
            token2id[s[0]] = i+2
    id2token = ['<pad>', '<unk>'] + id2token
    return token2id, id2token, loaded_embeddings

In [0]:
# call load_ft to load fast_text embedding
token2id, id2token,ft_emb = load_ft(500000)

In [17]:
token2id['<unk>']

1

In [19]:
token2id['UNK']

77811

In [16]:
token2id['unk']

307965

In [15]:
token2id['UNK']

77811

In [0]:
def df2idx(fname):
    df = pd.read_csv(fname, sep="\t", index_col=False )
    # change the label to numerical value
    df.loc[df['label'] == 'entailment', 'label'] = 0
    df.loc[df['label'] == 'contradiction', 'label'] = 1
    df.loc[df['label'] == 'neutral', 'label'] = 2
    # convert token to idx
    df['sent1_idx']  = df.apply (lambda row:[token2id[token] if token in token2id else token2id['UNK'] for token in row.sentence1.split()],axis=1)
    df['sent2_idx']  = df.apply (lambda row:[token2id[token] if token in token2id else token2id['UNK'] for token in row.sentence2.split()],axis=1)
    # convert df to data list and label list
    indexed_data = list(zip(df.sent1_idx, df.sent2_idx))
    label = np.array(df.label)
    return indexed_data, label, df

In [0]:
val_data, val_targets, val_df = df2idx("/content/snli_val.tsv")
train_data, train_targets, train_df = df2idx("/content/snli_train.tsv")

In [0]:
# Get a subset of training set to calculate the training accuracy
import random
subset_idx = random.sample(range(100000), 10000)
subset_train_data = [train_data[i] for i in subset_idx]
subset_train_targets = [train_targets[i] for i in subset_idx]

In [11]:
# Get the max length of sentence in training set
print('The max length of sentence 1 is {}'.format(max([len(train_data[i][0]) for i in range(len(train_data))])))
print('The max length of sentence 2 is {}'.format(max([len(train_data[i][1]) for i in range(len(train_data))])))


The max length of sentence 1 is 82
The max length of sentence 2 is 41


In [0]:
MAX_SENTENCE1_LENGTH = 82
MAX_SENTENCE2_LENGTH = 41

import numpy as np
import torch
from torch.utils.data import Dataset

class NewsGroupDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, data_list, target_list):
        """
        @param data_list: list of newsgroup tokens 
        @param target_list: list of newsgroup targets 

        """
        self.data_list = data_list
        self.target_list = target_list
        assert (len(self.data_list) == len(self.target_list))

    def __len__(self):
        return len(self.data_list)
        
    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        
        token1_idx = self.data_list[key][0][:MAX_SENTENCE1_LENGTH]
        token2_idx = self.data_list[key][1][:MAX_SENTENCE2_LENGTH]
        label = self.target_list[key]
        return [token1_idx, token2_idx, len(token1_idx),len(token2_idx),label]

def newsgroup_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    token1_data_list = []
    token2_data_list = []
    label_list = []
    token1_length_list = []
    token2_length_list = []
    #print("collate batch: ", batch[0][0])
    #batch[0][0] = batch[0][0][:MAX_SENTENCE_LENGTH]
    for datum in batch:
        label_list.append(datum[4])
        token1_length_list.append(datum[2])
        token2_length_list.append(datum[3])
    # padding
    for datum in batch:
        token1_padded_vec = np.pad(np.array(datum[0]), 
                                pad_width=((0,MAX_SENTENCE1_LENGTH-datum[2])), 
                                mode="constant", constant_values=0)
        token2_padded_vec = np.pad(np.array(datum[1]), 
                                pad_width=((0,MAX_SENTENCE2_LENGTH-datum[3])), 
                                mode="constant", constant_values=0)
        token1_data_list.append(token1_padded_vec)
        token2_data_list.append(token2_padded_vec)
    return [torch.from_numpy(np.array(token1_data_list)), torch.LongTensor(token1_length_list),
            torch.from_numpy(np.array(token2_data_list)), torch.LongTensor(token2_length_list),
            torch.LongTensor(label_list)]


In [0]:
BATCH_SIZE = 32
val_dataset = NewsGroupDataset(val_data, val_targets)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=True)

train_dataset =  NewsGroupDataset(train_data, train_targets)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=True)
subset_train_dataset =  NewsGroupDataset(subset_train_data, subset_train_targets)
subset_train_loader = torch.utils.data.DataLoader(dataset=subset_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=True)

In [0]:
class RNN(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes, pre_trained_emb):
        # RNN Accepts the following hyperparams:
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # pre_trained_emb : pre_trained embedding matrix. The shape of it can provide the embedding size and vocabulay size
        super(RNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        # embedding module
        self.embedding = nn.Embedding(pre_trained_emb.shape[0], pre_trained_emb.shape[1], padding_idx=PAD_IDX)
        # create bi-directional GRU in pytorch(batch_first: the first dim is batch, 2nd is sequence dim, 3rd is embedding dim)
        self.rnn = nn.GRU(pre_trained_emb.shape[1], hidden_size,num_layers, bidirectional=True, batch_first = True) 
        # create decoder layer 
        self.linear1 = nn.Linear(hidden_size*4,hidden_size )
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def init_weights(self, is_static=True):
        self.embedding.weight = nn.Parameter(torch.from_numpy(pre_trained_emb).float())
        if is_static:
            self.embedding.weight.requires_grad = False
    
    
    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden = torch.randn(2 * self.num_layers, batch_size, self.hidden_size)
        
        return hidden

    def forward(self, token1_data, token1_lengths, token2_data, token2_lengths):
        # reset hidden state

        batch_size, token1_seq_len = token1_data.size()
        token2_seq_len = token2_data.size()[1]
#### main part of RNN ###########################
        self.hidden = self.init_hidden(batch_size)
        #get the sorted index based on sentent length
        _, token1_idx_sort = torch.sort(token1_lengths, dim=0, descending=True)
        _, token1_idx_unsort = torch.sort(token1_idx_sort, dim=0)
        token1_lengths = token1_lengths[token1_idx_sort]
        _, token2_idx_sort = torch.sort(token2_lengths, dim=0, descending=True)
        _, token2_idx_unsort = torch.sort(token2_idx_sort, dim=0)
        token2_lengths = token2_lengths[token2_idx_sort]
        # Sort input data
        token1_rnn = token1_data.index_select(0, token1_idx_sort)
        token2_rnn = token2_data.index_select(0, token2_idx_sort)
        
        # get embedding of two sentences
        embed_sent1 = self.embedding(token1_rnn)
        embed_sent2 = self.embedding(token2_rnn)
        
        # pack padded sequence
        # transform the tensor in pytorch into the padded sequence . pytorch want the sequence in the descending order
        embed_sent1 = torch.nn.utils.rnn.pack_padded_sequence(embed_sent1, token1_lengths, batch_first=True)
        embed_sent2 = torch.nn.utils.rnn.pack_padded_sequence(embed_sent2, token2_lengths, batch_first=True)
        use_cuda = True
        if use_cuda and torch.cuda.is_available():
            self.hidden = self.hidden.cuda()
            
        # fprop though RNN # the rnn_out varaible is size of batch size by the sequence length by the hidden dimension
#         rnn_out1, _ = self.rnn(embed_sent1, self.hidden) 
#         rnn_out2, _ = self.rnn(embed_sent2, self.hidden) 
        _, hidden_out1 = self.rnn(embed_sent1, self.hidden) 
        _, hidden_out2 = self.rnn(embed_sent2, self.hidden) 
        hidden_out1 = torch.cat((hidden_out1[0], hidden_out1[1]),dim = 1)
        hidden_out2 = torch.cat((hidden_out2[0], hidden_out2[1]),dim = 1)
        #unsort 
        hidden_out1 = hidden_out1.index_select(0, token1_idx_unsort)
        hidden_out2 = hidden_out2.index_select(0, token2_idx_unsort)
        # undo packing
#         rnn_out1, _ = torch.nn.utils.rnn.pad_packed_sequence(rnn_out1, batch_first=True)
#         rnn_out2, _ = torch.nn.utils.rnn.pad_packed_sequence(rnn_out2, batch_first=True)
#         #unsort
#         rnn_out1 = rnn_out1.index_select(0, token1_idx_unsort)
#         rnn_out2 = rnn_out2.index_select(0, token2_idx_unsort)
        # concatenate two encoded sentences
        out_cat = torch.cat((hidden_out1, hidden_out2), dim = 1)
        # sum hidden activations of RNN across time
        #out_cat = torch.sum(out_cat, dim=1)
####### main part #########################
        hidden1 = self.linear1(out_cat)
        hidden1 = self.relu(hidden1)
        out = self.linear2(hidden1)
        preds = F.log_softmax(out, 1)
        return preds


In [0]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data1, lengths1,data2, lengths2, labels in loader:
#         data1 = Variable(data1)  
#         lengths1 = Variable(lengths1)
#         data2 = Variable(data2)  
#         lengths2 = Variable(lengths2)# Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
#         labels = Variable(labels)
        if use_cuda and torch.cuda.is_available():
            data1 = data1.cuda()
            lengths1  = lengths1.cuda()
            data2 = data2.cuda()
            lengths2  = lengths2.cuda()
            labels = labels.cuda()
        data1_batch, lengths1_batch,data2_batch, lengths2_batch, label_batch = data1, lengths1, data2, lengths2,labels
        outputs =model(data1_batch, lengths1_batch,data2_batch, lengths2_batch)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)

In [0]:
def train_model(loader, model):
  criterion = torch.nn.NLLLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  total_step = len(loader)
  #train_loss_ls = []
  val_acc_ls = []
  train_acc_ls = []
  for epoch in range(num_epochs):
      #loss_batch = []
      for i, (data1, lengths1, data2, lengths2, labels) in enumerate(loader):
  #         data1 = Variable(data1)  
  #         lengths1 = Variable(lengths1)
  #         data2 = Variable(data2)  
  #         lengths2 = Variable(lengths2)# Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
  #         labels = Variable(labels)
          if use_cuda and torch.cuda.is_available():
              data1 = data1.cuda()
              lengths1  = lengths1.cuda()
              data2 = data2.cuda()
              lengths2  = lengths2.cuda()
              labels = labels.cuda()
          model.train()
          optimizer.zero_grad()
          # Forward pass
          outputs = model(data1, lengths1, data2, lengths2)
          predicted = outputs.max(1, keepdim=True)[1]
          loss = criterion(outputs, labels)
          #loss_batch.append(loss.item())
          # Backward and optimize
          loss.backward()
          optimizer.step()
          # validate every 100 iterations
          if i > 0 and i % 400 == 0:
              # validate
              #train_loss = loss_batch[i]
              val_acc = test_model(val_loader, model)
              train_acc = test_model(subset_train_loader, model)
              train_acc_ls.append(train_acc)
              #train_loss_ls.append(train_loss)
              val_acc_ls.append(val_acc)
              print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}, Training Acc: {}'.format(
                         epoch+1, num_epochs, i+1, len(loader), val_acc, train_acc))
  #torch.save(model_object.state_dict(), 'params_{}.pkl'.format())
#model_object.load_state_dict(torch.load('params.pkl'))
  return  val_acc_ls, train_acc_ls
  #return train_loss_ls, val_acc_ls, train_acc_ls



# 2. RNN Tuning

## 2.1 Hidden Size Tuning for RNN
- Ways of interacting two sentences: Concatenation 
- Learning_rate 3e-4
- Number of Epochs: 10
- Embedding Weights: Freeze All
- hidden size list (50, 100, 200, 300, 400 )

In [0]:
learning_rate = 3e-4
num_epochs = 10 # number epoch to train
use_cuda = True

def find_best_hiddensize_RNN(hidden_ls):
  performance = {}
  for hidden_size in hidden_ls:
    print('---------RNN_HIDDEN_SIZE: {}-------------'.format(hidden_size))
    model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
      model.cuda()
     # Criterion and Optimizer
    #
    
    train_loss, val_acc = train_model(train_loader,model)
    performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
    torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
  return performance

In [0]:
# Can call this function directly while it takes some time. 
# Since the runtime on google colab is often disconnected for a long-time run, I chose to run them one by one just in case. 

hidden_ls = [50,100,200,300,400]
hidden_record = find_best_hiddensize_RNN(hidden_ls)


---------RNN_HIDDEN_SIZE: 100-------------
Epoch: [1/10], Step: [401/3125], Validation Acc: 51.2, Training Loss: 0.9001179337501526
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.6, Training Loss: 1.1211072206497192
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.3, Training Loss: 0.8554650545120239
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.7, Training Loss: 0.9426496624946594
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.3, Training Loss: 0.9723080992698669
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.0, Training Loss: 0.8321115374565125
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.9, Training Loss: 0.8341527581214905
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.1, Training Loss: 0.7639567255973816
Epoch: [2/10], Step: [801/3125], Validation Acc: 62.1, Training Loss: 0.6325594782829285
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.0, Training Loss: 0.7023106217384338
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.6, Train

In [53]:
hidden_size = 50
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
trainacc_performance['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'RNN_hidden_size_50.pkl' 'drive/My Drive/Colab Notebooks/RNN_hidden_size_50.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 46.9, Training Acc: 47.66
Epoch: [1/10], Step: [801/3125], Validation Acc: 53.2, Training Acc: 55.25
Epoch: [1/10], Step: [1201/3125], Validation Acc: 55.9, Training Acc: 59.03
Epoch: [1/10], Step: [1601/3125], Validation Acc: 57.6, Training Acc: 60.68
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.9, Training Acc: 61.47
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.3, Training Acc: 62.66
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.1, Training Acc: 64.06
Epoch: [2/10], Step: [401/3125], Validation Acc: 60.7, Training Acc: 65.81
Epoch: [2/10], Step: [801/3125], Validation Acc: 63.5, Training Acc: 66.63
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.5, Training Acc: 66.9
Epoch: [2/10], Step: [1601/3125], Validation Acc: 61.1, Training Acc: 67.35
Epoch: [2/10], Step: [2001/3125], Validation Acc: 61.5, Training Acc: 68.25
Epoch: [2/10], Step: [2401/3125], Validation Acc: 61.0, Training Acc: 68.86
Epoch: [2/10], St

In [19]:
performance = {}
hidden_size = 50
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 46.9, Training Loss: 1.0772716999053955
Epoch: [1/10], Step: [801/3125], Validation Acc: 52.7, Training Loss: 1.0260114669799805
Epoch: [1/10], Step: [1201/3125], Validation Acc: 56.8, Training Loss: 0.8532036542892456
Epoch: [1/10], Step: [1601/3125], Validation Acc: 56.2, Training Loss: 0.7398672103881836
Epoch: [1/10], Step: [2001/3125], Validation Acc: 58.2, Training Loss: 0.8313394784927368
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.8, Training Loss: 0.7919907569885254
Epoch: [1/10], Step: [2801/3125], Validation Acc: 59.1, Training Loss: 1.002533197402954
Epoch: [2/10], Step: [401/3125], Validation Acc: 60.4, Training Loss: 0.7861840724945068
Epoch: [2/10], Step: [801/3125], Validation Acc: 62.0, Training Loss: 0.6704171895980835
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.3, Training Loss: 0.5599439144134521
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.6, Training Loss: 0.8423176407814026
Epoch: [2/10], 

In [43]:
trainacc_performance = {}
hidden_size = 100
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
trainacc_performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc,train_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 51.0, Training Loss: 0.9422149062156677, Training Acc: 52.81
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.8, Training Loss: 0.827469527721405, Training Acc: 56.88
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.5, Training Loss: 1.1401127576828003, Training Acc: 60.38
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.2, Training Loss: 0.8711609244346619, Training Acc: 61.74
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.9, Training Loss: 0.8310251235961914, Training Acc: 63.7
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.5, Training Loss: 1.101127028465271, Training Acc: 64.72
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.4, Training Loss: 0.7982498407363892, Training Acc: 65.83
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.3, Training Loss: 0.8344654440879822, Training Acc: 67.8
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.7, Training Loss: 0.8579233884811401, Training Acc: 68.5
Epoch: [2/

In [0]:
!cp 'RNN_hidden_size_100.pkl' 'drive/My Drive/Colab Notebooks/RNN_hidden_size_100.pkl'

In [21]:
#performance = {}
hidden_size = 100
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 50.6, Training Loss: 1.0688800811767578
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.0, Training Loss: 0.9318221807479858
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.6, Training Loss: 0.7519932985305786
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.5, Training Loss: 1.0058008432388306
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.0, Training Loss: 0.7431007623672485
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.2, Training Loss: 0.8491858243942261
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.7, Training Loss: 0.5690383315086365
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.4, Training Loss: 0.7876433730125427
Epoch: [2/10], Step: [801/3125], Validation Acc: 63.1, Training Loss: 0.6197104454040527
Epoch: [2/10], Step: [1201/3125], Validation Acc: 65.1, Training Loss: 0.6464472413063049
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.4, Training Loss: 0.7454239130020142
Epoch: [2/10],

In [46]:
hidden_size = 200
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
trainacc_performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc,train_acc)
!cp 'RNN_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks/RNN_hidden_size_200.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 56.4, Training Loss: 1.0357942581176758, Training Acc: 54.7
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.7, Training Loss: 0.8202587366104126, Training Acc: 58.87
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.6, Training Loss: 0.8594374060630798, Training Acc: 61.84
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.1, Training Loss: 0.9310346841812134, Training Acc: 63.12
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.1, Training Loss: 0.8988821506500244, Training Acc: 64.58
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.9, Training Loss: 0.9715330600738525, Training Acc: 66.64
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.1, Training Loss: 0.7604234218597412, Training Acc: 67.11
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.5, Training Loss: 0.8899407982826233, Training Acc: 69.59
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.8, Training Loss: 0.642221987247467, Training Acc: 70.25
Epoch: 

In [26]:
hidden_size = 200
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 52.9, Training Loss: 0.8373058438301086
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.3, Training Loss: 0.9558172225952148
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.7, Training Loss: 0.9582279324531555
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.2, Training Loss: 0.9013748168945312
Epoch: [1/10], Step: [2001/3125], Validation Acc: 63.1, Training Loss: 0.8715951442718506
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.1, Training Loss: 0.8525784015655518
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.4, Training Loss: 0.7277835607528687
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.6, Training Loss: 0.5997782349586487
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.7, Training Loss: 0.6879258155822754
Epoch: [2/10], Step: [1201/3125], Validation Acc: 64.1, Training Loss: 0.6123311519622803
Epoch: [2/10], Step: [1601/3125], Validation Acc: 65.1, Training Loss: 0.5418102145195007
Epoch: [2/10],

In [50]:
hidden_size = 300
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
trainacc_performance['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'RNN_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks/RNN_hidden_size_300.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 52.3, Training Acc: 53.19
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.2, Training Acc: 58.33
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.4, Training Acc: 60.96
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.1, Training Acc: 62.99
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.5, Training Acc: 64.47
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.1, Training Acc: 65.6
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.6, Training Acc: 66.84
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.3, Training Acc: 69.19
Epoch: [2/10], Step: [801/3125], Validation Acc: 63.8, Training Acc: 69.9
Epoch: [2/10], Step: [1201/3125], Validation Acc: 65.0, Training Acc: 70.17
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.9, Training Acc: 71.52
Epoch: [2/10], Step: [2001/3125], Validation Acc: 65.0, Training Acc: 72.42
Epoch: [2/10], Step: [2401/3125], Validation Acc: 65.6, Training Acc: 73.37
Epoch: [2/10], Ste

In [30]:
hidden_size = 300
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 52.7, Training Loss: 0.9874268174171448
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.8, Training Loss: 1.0813761949539185
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.5, Training Loss: 0.8890181183815002
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.4, Training Loss: 0.8045153617858887
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.7, Training Loss: 0.9623076319694519
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.7, Training Loss: 0.8783605694770813
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.7, Training Loss: 0.8318474292755127
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.1, Training Loss: 0.6349196434020996
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.0, Training Loss: 0.60161292552948
Epoch: [2/10], Step: [1201/3125], Validation Acc: 65.2, Training Loss: 0.8527363538742065
Epoch: [2/10], Step: [1601/3125], Validation Acc: 66.1, Training Loss: 0.6099053621292114
Epoch: [2/10], S

In [51]:
hidden_size = 400
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
trainacc_performance['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'RNN_hidden_size_400.pkl' 'drive/My Drive/Colab Notebooks/RNN_hidden_size_400.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 55.2, Training Acc: 55.72
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.8, Training Acc: 59.27
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.0, Training Acc: 61.13
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.0, Training Acc: 63.1
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.1, Training Acc: 64.68
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.4, Training Acc: 66.29
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.7, Training Acc: 66.57
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.5, Training Acc: 68.92
Epoch: [2/10], Step: [801/3125], Validation Acc: 65.1, Training Acc: 69.58
Epoch: [2/10], Step: [1201/3125], Validation Acc: 64.9, Training Acc: 71.48
Epoch: [2/10], Step: [1601/3125], Validation Acc: 65.6, Training Acc: 71.96
Epoch: [2/10], Step: [2001/3125], Validation Acc: 66.3, Training Acc: 72.48
Epoch: [2/10], Step: [2401/3125], Validation Acc: 66.2, Training Acc: 73.63
Epoch: [2/10], St

In [0]:
import pickle
f = open("rnn_hidden_size_record_new.pkl","wb")
pickle.dump(trainacc_performance,f)
f.close()

In [0]:
from google.colab import files
files.download('rnn_hidden_size_record_new.pkl') 

In [31]:
hidden_size = 400
model = RNN(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
train_loss, val_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'RNN_hidden_size_{}.pkl'.format(hidden_size))
performance['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 53.1, Training Loss: 0.9983417987823486
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.5, Training Loss: 1.0401420593261719
Epoch: [1/10], Step: [1201/3125], Validation Acc: 60.4, Training Loss: 0.7093570828437805
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.6, Training Loss: 0.8327824473381042
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.7, Training Loss: 0.7499194741249084
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.1, Training Loss: 0.8026942014694214
Epoch: [1/10], Step: [2801/3125], Validation Acc: 64.5, Training Loss: 0.8780301809310913
Epoch: [2/10], Step: [401/3125], Validation Acc: 65.7, Training Loss: 0.5154721736907959
Epoch: [2/10], Step: [801/3125], Validation Acc: 66.0, Training Loss: 0.8337960243225098
Epoch: [2/10], Step: [1201/3125], Validation Acc: 64.4, Training Loss: 0.5293896198272705
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.7, Training Loss: 0.7270818948745728
Epoch: [2/10],

In [0]:
files.download('RNN_hidden_size_50.pkl')
files.download('RNN_hidden_size_100.pkl') 
files.download('RNN_hidden_size_200.pkl') 
files.download('RNN_hidden_size_300.pkl')
files.download('RNN_hidden_size_400.pkl')

## 2.2 Concatenate two encoded sentences with element-wise multiplication for RNN
- Instead of concatenation of two encoded sentences, do element-wise multiplication
- Freeze all embedding weights
- Also tuning hidden size
- Hidden size list (100, 200, 300, 400, 800)

In [0]:
class RNN_mul(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes, pre_trained_emb):
        # RNN Accepts the following hyperparams
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # pre_trained_emb: pre_trained fast text results
        super(RNN_mul, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        # embedding module
        self.embedding = nn.Embedding(pre_trained_emb.shape[0], pre_trained_emb.shape[1], padding_idx=PAD_IDX)
        # create RNN in pytorch(batch_first: the first dim is batch, 2nd is sequence dim, 3rd is embedding dim)
        self.rnn = nn.GRU(pre_trained_emb.shape[1], hidden_size,num_layers, bidirectional=True, batch_first = True) 
        
        self.linear1 = nn.Linear(hidden_size*2,hidden_size )
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    def init_weights(self, is_static=True):
        self.embedding.weight = nn.Parameter(torch.from_numpy(pre_trained_emb).float())
        if is_static:
            self.embedding.weight.requires_grad = False
    
    
    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden = torch.randn(2 * self.num_layers, batch_size, self.hidden_size)
        
        return hidden

    def forward(self, token1_data, token1_lengths, token2_data, token2_lengths):
        # reset hidden state

        batch_size, token1_seq_len = token1_data.size()
        token2_seq_len = token2_data.size()[1]
#### main part of RNN ###########################
        self.hidden = self.init_hidden(batch_size)
        #sort 
        _, token1_idx_sort = torch.sort(token1_lengths, dim=0, descending=True)
        _, token1_idx_unsort = torch.sort(token1_idx_sort, dim=0)
        token1_lengths = token1_lengths[token1_idx_sort]
        _, token2_idx_sort = torch.sort(token2_lengths, dim=0, descending=True)
        _, token2_idx_unsort = torch.sort(token2_idx_sort, dim=0)
        token2_lengths = token2_lengths[token2_idx_sort]
        # Sort x
        token1_rnn = token1_data.index_select(0, token1_idx_sort)
        token2_rnn = token2_data.index_select(0, token2_idx_sort)
        
        # get embedding of characters
        embed_sent1 = self.embedding(token1_rnn)
        embed_sent2 = self.embedding(token2_rnn)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        #embed.weight = nn.Parameter(torch.from_numpy(pre_trained_emb))
        
        #embed = m * embed + (1-m) * embed.clone().detch()
       # embed.weight.data.copy_(torch.from_numpy(pre_trained_emb))
        # pack padded sequence
        # transform the tensor in pytorch into the padded sequence . pytorch want the sequence in the descending order
        embed_sent1 = torch.nn.utils.rnn.pack_padded_sequence(embed_sent1, token1_lengths, batch_first=True)
        embed_sent2 = torch.nn.utils.rnn.pack_padded_sequence(embed_sent2, token2_lengths, batch_first=True)
        use_cuda = True
        if use_cuda and torch.cuda.is_available():
#           embed_sent1 = embed_sent1.cuda()
#           embed_sent2 = embed_sent1.cuda()
            self.hidden = self.hidden.cuda()
            
        # fprop though RNN # the rnn_out varaible is size of batch size by the sequence length by the hidden dimension
#         rnn_out1, _ = self.rnn(embed_sent1, self.hidden) 
#         rnn_out2, _ = self.rnn(embed_sent2, self.hidden) 
        _, hidden_out1 = self.rnn(embed_sent1, self.hidden) 
        _, hidden_out2 = self.rnn(embed_sent2, self.hidden) 
        hidden_out1 = torch.cat((hidden_out1[0], hidden_out1[1]),dim = 1)
        hidden_out2 = torch.cat((hidden_out2[0], hidden_out2[1]),dim = 1)
        #unsort 
        hidden_out1 = hidden_out1.index_select(0, token1_idx_unsort)
        hidden_out2 = hidden_out2.index_select(0, token2_idx_unsort)
       
        out_cat = torch.mul(hidden_out1, hidden_out2)
        # sum hidden activations of RNN across time
        #out_cat = torch.sum(out_cat, dim=1)
####### main part #########################
        hidden1 = self.linear1(out_cat)
        hidden1 = self.relu(hidden1)
        out = self.linear2(hidden1)
        preds = F.log_softmax(out, 1)
        return preds


In [58]:
rnn_mul_trainacc = {}
hidden_size = 300
model = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'mul_RNN_hidden_size_{}.pkl'.format(hidden_size))
rnn_mul_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'mul_RNN_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks/mul_RNN_hidden_size_300.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 49.7, Training Acc: 53.46
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.2, Training Acc: 59.21
Epoch: [1/10], Step: [1201/3125], Validation Acc: 60.9, Training Acc: 63.63
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.6, Training Acc: 65.59
Epoch: [1/10], Step: [2001/3125], Validation Acc: 63.3, Training Acc: 67.25
Epoch: [1/10], Step: [2401/3125], Validation Acc: 64.6, Training Acc: 70.18
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.8, Training Acc: 70.79
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.9, Training Acc: 74.25
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.5, Training Acc: 75.72
Epoch: [2/10], Step: [1201/3125], Validation Acc: 66.9, Training Acc: 77.26
Epoch: [2/10], Step: [1601/3125], Validation Acc: 65.3, Training Acc: 78.54
Epoch: [2/10], Step: [2001/3125], Validation Acc: 65.7, Training Acc: 79.24
Epoch: [2/10], Step: [2401/3125], Validation Acc: 67.3, Training Acc: 80.76
Epoch: [2/10], S

In [30]:
record_multiply_rnn = {}
hidden_size = 300
model_mul = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_mul.cuda()
train_loss, val_acc = train_model(train_loader,model_mul)
torch.save(model_mul.state_dict(), 'RNN_mul_hidden_size_{}.pkl'.format(hidden_size))
record_multiply_rnn['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 50.7, Training Loss: 1.0234849452972412
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.1, Training Loss: 0.9339804649353027
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.1, Training Loss: 0.950441837310791
Epoch: [1/10], Step: [1601/3125], Validation Acc: 63.1, Training Loss: 0.8755513429641724
Epoch: [1/10], Step: [2001/3125], Validation Acc: 64.7, Training Loss: 0.8076320886611938
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.8, Training Loss: 0.6440338492393494
Epoch: [1/10], Step: [2801/3125], Validation Acc: 67.3, Training Loss: 0.6421260833740234
Epoch: [2/10], Step: [401/3125], Validation Acc: 67.4, Training Loss: 0.6005825996398926
Epoch: [2/10], Step: [801/3125], Validation Acc: 66.9, Training Loss: 0.7194742560386658
Epoch: [2/10], Step: [1201/3125], Validation Acc: 67.8, Training Loss: 0.603550374507904
Epoch: [2/10], Step: [1601/3125], Validation Acc: 66.3, Training Loss: 0.39446747303009033
Epoch: [2/10], 

In [34]:
# The validation accuracy of RNN model with multipliation encoded sentences when hidden size 300
test_model(val_loader, model_mul)

66.9

In [0]:
g = open("rnn_mul_hidden_size300_record.pkl","wb")
pkl.dump(record_multiply_rnn,g)
g.close()

In [0]:
from google.colab import files
files.download('rnn_mul_hidden_size300_record.pkl')

In [0]:
!cp 'RNN_mul_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks'

In [19]:
rnn_mul_trainacc = {}
hidden_size = 400
model = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'mul_RNN_hidden_size_{}.pkl'.format(hidden_size))
rnn_mul_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'mul_RNN_hidden_size_400.pkl' 'drive/My Drive/Colab Notebooks/mul_RNN_hidden_size_400.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 53.0, Training Acc: 51.86
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.3, Training Acc: 58.62
Epoch: [1/10], Step: [1201/3125], Validation Acc: 61.9, Training Acc: 63.07
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.6, Training Acc: 64.89
Epoch: [1/10], Step: [2001/3125], Validation Acc: 63.0, Training Acc: 67.54
Epoch: [1/10], Step: [2401/3125], Validation Acc: 67.0, Training Acc: 69.63
Epoch: [1/10], Step: [2801/3125], Validation Acc: 66.4, Training Acc: 70.55
Epoch: [2/10], Step: [401/3125], Validation Acc: 68.2, Training Acc: 74.14
Epoch: [2/10], Step: [801/3125], Validation Acc: 67.1, Training Acc: 75.22
Epoch: [2/10], Step: [1201/3125], Validation Acc: 67.7, Training Acc: 76.43
Epoch: [2/10], Step: [1601/3125], Validation Acc: 66.5, Training Acc: 78.43
Epoch: [2/10], Step: [2001/3125], Validation Acc: 68.5, Training Acc: 79.79
Epoch: [2/10], Step: [2401/3125], Validation Acc: 70.1, Training Acc: 81.55
Epoch: [2/10], S

In [0]:
import pickle
f = open("rnn_mul_hidden_new1.pkl","wb")
pickle.dump(rnn_mul_trainacc,f)
f.close()

In [0]:
from google.colab import files
files.download('rnn_mul_hidden_new1.pkl')

In [63]:
rnn_mul_trainacc.keys()

dict_keys(['hidden_size_300'])

In [35]:
hidden_size = 400
model_mul = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_mul.cuda()
train_loss, val_acc = train_model(train_loader,model_mul)
torch.save(model_mul.state_dict(), 'RNN_mul_hidden_size_{}.pkl'.format(hidden_size))
record_multiply_rnn['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 53.1, Training Loss: 1.0472723245620728
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.8, Training Loss: 0.9839993119239807
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.0, Training Loss: 0.8641213178634644
Epoch: [1/10], Step: [1601/3125], Validation Acc: 62.6, Training Loss: 0.8164116740226746
Epoch: [1/10], Step: [2001/3125], Validation Acc: 63.7, Training Loss: 0.7026777863502502
Epoch: [1/10], Step: [2401/3125], Validation Acc: 64.1, Training Loss: 0.697812020778656
Epoch: [1/10], Step: [2801/3125], Validation Acc: 65.7, Training Loss: 0.8754838109016418
Epoch: [2/10], Step: [401/3125], Validation Acc: 67.4, Training Loss: 0.8912612199783325
Epoch: [2/10], Step: [801/3125], Validation Acc: 68.6, Training Loss: 0.7119446992874146
Epoch: [2/10], Step: [1201/3125], Validation Acc: 69.4, Training Loss: 0.4001390337944031
Epoch: [2/10], Step: [1601/3125], Validation Acc: 69.7, Training Loss: 0.7165548801422119
Epoch: [2/10], 

In [39]:
#val acc of hiddensize 400 with multiplication concat
test_model(val_loader, model_mul)

68.1

In [0]:
g = open("rnn_mul_record2.pkl","wb")
pkl.dump(record_multiply_rnn,g)
g.close()
from google.colab import files
files.download('rnn_mul_record2.pkl')

In [0]:
!cp 'RNN_mul_hidden_size_400.pkl' 'drive/My Drive/Colab Notebooks'

In [24]:
hidden_size = 200
model = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'mul_RNN_hidden_size_{}.pkl'.format(hidden_size))
rnn_mul_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'mul_RNN_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks/mul_RNN_hidden_size_200.pkl'


Epoch: [1/10], Step: [401/3125], Validation Acc: 50.4, Training Acc: 49.4
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.7, Training Acc: 57.46
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.9, Training Acc: 60.49
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.7, Training Acc: 63.97
Epoch: [1/10], Step: [2001/3125], Validation Acc: 62.6, Training Acc: 66.23
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.7, Training Acc: 66.92
Epoch: [1/10], Step: [2801/3125], Validation Acc: 65.4, Training Acc: 69.13
Epoch: [2/10], Step: [401/3125], Validation Acc: 64.5, Training Acc: 72.41
Epoch: [2/10], Step: [801/3125], Validation Acc: 65.4, Training Acc: 73.62
Epoch: [2/10], Step: [1201/3125], Validation Acc: 67.4, Training Acc: 74.59
Epoch: [2/10], Step: [1601/3125], Validation Acc: 64.7, Training Acc: 76.63
Epoch: [2/10], Step: [2001/3125], Validation Acc: 67.8, Training Acc: 78.02
Epoch: [2/10], Step: [2401/3125], Validation Acc: 67.6, Training Acc: 78.73
Epoch: [2/10], St

In [0]:
import pickle
f = open("rnn_mul_hidden_new2.pkl","wb")
pickle.dump(rnn_mul_trainacc,f)
f.close()

In [0]:
files.download('rnn_mul_hidden_new2.pkl')

In [42]:
hidden_size = 200
model_mul_200= RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_mul_200.cuda()
train_loss, val_acc = train_model(train_loader,model_mul_200)
torch.save(model_mul_200.state_dict(), 'RNN_mul_hidden_size_{}.pkl'.format(hidden_size))
record_multiply_rnn['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 46.2, Training Loss: 1.0637015104293823
Epoch: [1/10], Step: [801/3125], Validation Acc: 53.4, Training Loss: 1.0498462915420532
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.6, Training Loss: 0.8259854912757874
Epoch: [1/10], Step: [1601/3125], Validation Acc: 62.2, Training Loss: 0.9250548481941223
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.1, Training Loss: 0.8727205395698547
Epoch: [1/10], Step: [2401/3125], Validation Acc: 63.6, Training Loss: 0.77269446849823
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.6, Training Loss: 0.9557252526283264
Epoch: [2/10], Step: [401/3125], Validation Acc: 66.1, Training Loss: 0.6130464673042297
Epoch: [2/10], Step: [801/3125], Validation Acc: 66.3, Training Loss: 0.6182383894920349
Epoch: [2/10], Step: [1201/3125], Validation Acc: 67.9, Training Loss: 0.798732578754425
Epoch: [2/10], Step: [1601/3125], Validation Acc: 66.7, Training Loss: 0.6599235534667969
Epoch: [2/10], St

In [43]:
#val acc of hiddensize 200 with multiplication concat
test_model(val_loader, model_mul_200)

65.4

In [0]:
g = open("rnn_mul_record3.pkl","wb")
pkl.dump(record_multiply_rnn,g)
g.close()
from google.colab import files
files.download('rnn_mul_record3.pkl')

In [0]:
!cp 'RNN_mul_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks'

In [0]:
files.download('RNN_mul_hidden_size_200.pkl')

In [27]:
hidden_size = 100
model = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'mul_RNN_hidden_size_{}.pkl'.format(hidden_size))
rnn_mul_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'mul_RNN_hidden_size_100.pkl' 'drive/My Drive/Colab Notebooks/mul_RNN_hidden_size_100.pkl'


Epoch: [1/10], Step: [401/3125], Validation Acc: 47.1, Training Acc: 47.24
Epoch: [1/10], Step: [801/3125], Validation Acc: 52.0, Training Acc: 51.82
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.0, Training Acc: 57.01
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.1, Training Acc: 59.94
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.1, Training Acc: 61.78
Epoch: [1/10], Step: [2401/3125], Validation Acc: 63.9, Training Acc: 64.51
Epoch: [1/10], Step: [2801/3125], Validation Acc: 63.4, Training Acc: 65.81
Epoch: [2/10], Step: [401/3125], Validation Acc: 64.1, Training Acc: 67.75
Epoch: [2/10], Step: [801/3125], Validation Acc: 65.4, Training Acc: 69.22
Epoch: [2/10], Step: [1201/3125], Validation Acc: 66.0, Training Acc: 69.1
Epoch: [2/10], Step: [1601/3125], Validation Acc: 65.3, Training Acc: 72.23
Epoch: [2/10], Step: [2001/3125], Validation Acc: 66.4, Training Acc: 73.0
Epoch: [2/10], Step: [2401/3125], Validation Acc: 64.8, Training Acc: 73.5
Epoch: [2/10], Step

In [0]:
import pickle
f = open("rnn_mul_hidden_new3.pkl","wb")
pickle.dump(rnn_mul_trainacc,f)
f.close()

In [0]:
files.download("rnn_mul_hidden_new3.pkl")

In [47]:
hidden_size = 100
model_mul_100= RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_mul_100.cuda()
train_loss, val_acc = train_model(train_loader,model_mul_100)
torch.save(model_mul_100.state_dict(), 'RNN_mul_hidden_size_{}.pkl'.format(hidden_size))
record_multiply_rnn['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 42.8, Training Loss: 0.9908033013343811
Epoch: [1/10], Step: [801/3125], Validation Acc: 52.2, Training Loss: 1.0574074983596802
Epoch: [1/10], Step: [1201/3125], Validation Acc: 55.0, Training Loss: 0.7142316102981567
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.3, Training Loss: 0.8137086033821106
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.8, Training Loss: 0.7809378504753113
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.2, Training Loss: 0.8366739749908447
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.7, Training Loss: 0.817129909992218
Epoch: [2/10], Step: [401/3125], Validation Acc: 63.6, Training Loss: 0.6973512768745422
Epoch: [2/10], Step: [801/3125], Validation Acc: 65.4, Training Loss: 0.5673294067382812
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.9, Training Loss: 0.9167680740356445
Epoch: [2/10], Step: [1601/3125], Validation Acc: 65.7, Training Loss: 0.686924934387207
Epoch: [2/10], S

In [48]:
#val acc of hiddensize 100 with multiplication concat
test_model(val_loader, model_mul_100)

65.7

In [0]:
g = open("rnn_mul_record4.pkl","wb")
pkl.dump(record_multiply_rnn,g)
g.close()
from google.colab import files
files.download('rnn_mul_record4.pkl')

In [0]:
!cp 'RNN_mul_hidden_size_100.pkl' 'drive/My Drive/Colab Notebooks'

In [0]:
files.download('RNN_mul_hidden_size_100.pkl')

In [0]:
learning_rate = 3e-4
num_epochs = 10 # number epoch to train
use_cuda = True

In [30]:
hidden_size = 800
model = RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'mul_RNN_hidden_size_{}.pkl'.format(hidden_size))
rnn_mul_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'mul_RNN_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks/mul_RNN_hidden_size_800.pkl'


Epoch: [1/10], Step: [401/3125], Validation Acc: 51.0, Training Acc: 52.17
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.3, Training Acc: 58.69
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.9, Training Acc: 62.34
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.7, Training Acc: 65.39
Epoch: [1/10], Step: [2001/3125], Validation Acc: 62.9, Training Acc: 66.22
Epoch: [1/10], Step: [2401/3125], Validation Acc: 63.8, Training Acc: 68.84
Epoch: [1/10], Step: [2801/3125], Validation Acc: 65.8, Training Acc: 70.7
Epoch: [2/10], Step: [401/3125], Validation Acc: 65.6, Training Acc: 74.06
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.8, Training Acc: 76.89
Epoch: [2/10], Step: [1201/3125], Validation Acc: 66.8, Training Acc: 78.56
Epoch: [2/10], Step: [1601/3125], Validation Acc: 67.3, Training Acc: 79.52
Epoch: [2/10], Step: [2001/3125], Validation Acc: 68.7, Training Acc: 80.45
Epoch: [2/10], Step: [2401/3125], Validation Acc: 68.4, Training Acc: 82.4
Epoch: [2/10], Ste

In [0]:
import pickle
f = open("rnn_mul_hidden_new4.pkl","wb")
pickle.dump(rnn_mul_trainacc,f)
f.close()

In [0]:
files.download("rnn_mul_hidden_new4.pkl")

In [22]:
record_mul_800 = {}
hidden_size = 800
model_mul_800= RNN_mul(hidden_size = hidden_size, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_mul_800.cuda()
train_loss, val_acc = train_model(train_loader,model_mul_800)
torch.save(model_mul_800.state_dict(), 'drive/My Drive/Colab Notebook/RNN_mul_hidden_size_{}.pkl'.format(hidden_size))
record_mul_800['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 50.9, Training Loss: 0.9593042731285095
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.7, Training Loss: 0.8576378226280212
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.1, Training Loss: 0.7772582173347473
Epoch: [1/10], Step: [1601/3125], Validation Acc: 64.3, Training Loss: 0.8571440577507019
Epoch: [1/10], Step: [2001/3125], Validation Acc: 65.5, Training Loss: 0.9116935133934021
Epoch: [1/10], Step: [2401/3125], Validation Acc: 66.8, Training Loss: 0.740634560585022
Epoch: [1/10], Step: [2801/3125], Validation Acc: 64.5, Training Loss: 0.8019828796386719
Epoch: [2/10], Step: [401/3125], Validation Acc: 66.9, Training Loss: 0.7925124764442444
Epoch: [2/10], Step: [801/3125], Validation Acc: 68.3, Training Loss: 0.6408252716064453
Epoch: [2/10], Step: [1201/3125], Validation Acc: 67.9, Training Loss: 0.5875497460365295
Epoch: [2/10], Step: [1601/3125], Validation Acc: 69.1, Training Loss: 0.7827160358428955
Epoch: [2/10], 

FileNotFoundError: ignored

In [0]:
torch.save(model_mul_800.state_dict(), 'RNN_mul_hidden_size_{}.pkl'.format(hidden_size))


In [0]:
!cp 'RNN_mul_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks'

In [0]:
record_mul_800['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

In [27]:
#val acc of hiddensize 800 with multiplication concat
val_acc_rnn_mul800 = test_model(val_loader, model_mul_800)
val_acc_rnn_mul800

68.7

In [29]:
rnn_mul_100 = RNN_mul(hidden_size = 100,  num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_mul_100.cuda()
rnn_mul_100.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_100.pkl'))
test_model(val_loader, rnn_mul_100)


64.6

In [0]:
g = open("rnn_mul_800_record.pkl","wb")
pkl.dump(record_mul_800,g)
g.close()
from google.colab import files
files.download('rnn_mul_800_record.pkl')

# 3. CNN Tuning

## 3.1 Hidden Size Tuning for CNN
- Ways of interacting two encoded sentences: concatenation
- Weights of embedding : freeze all
- Kernel size: 3
- Hidden size list: (50, 100, 200, 300, 400, 800)

In [0]:
class CNN(nn.Module):
    def __init__(self,  hidden_size, kernel_size, padding_size, num_layers, num_classes, pre_trained_emb):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size, self.kernel_size ,self.padding_size= num_layers, hidden_size,kernel_size,padding_size
        self.embedding = nn.Embedding(pre_trained_emb.shape[0], pre_trained_emb.shape[1], padding_idx=PAD_IDX)
        #emb_size is the size of imput, hidden_size is the size of output. kernel_size is like the window size, 
        # the kernel size 3 here means read 3 words/chars once
        self.conv1 = nn.Conv1d( pre_trained_emb.shape[1], hidden_size, kernel_size, padding=padding_size)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=padding_size)

        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        #self.maxpooling = nn.MaxPool1d()
    def init_weights(self, is_static=True):
        self.embedding.weight = nn.Parameter(torch.from_numpy(pre_trained_emb).float())
        if is_static:
            self.embedding.weight.requires_grad = False
            
    def forward(self, token1_data, token1_lengths,token2_data, token2_lengths):
        batch_size, token1_seq_len = token1_data.size()
        _,token2_seq_len = token2_data.size()

        embed_sent1 = self.embedding(token1_data)
        embed_sent2 = self.embedding(token2_data)
        # the convolusional module in pytorch expects the input of size  batch size by the hidden size by the sequence length
        hidden_sent1 = self.conv1(embed_sent1.transpose(1,2)).transpose(1,2)
        hidden_sent2 = self.conv1(embed_sent2.transpose(1,2)).transpose(1,2)
        # relu expect 2-d tensor as input , merging the 0th and 1st dim together
        hidden_sent1 = F.relu(hidden_sent1.contiguous().view(-1, hidden_sent1.size(-1))).view(batch_size, token1_seq_len, hidden_sent1.size(-1))
        hidden_sent2 = F.relu(hidden_sent2.contiguous().view(-1, hidden_sent2.size(-1))).view(batch_size, token2_seq_len, hidden_sent2.size(-1))
        hidden_sent1 = self.conv2(hidden_sent1.transpose(1,2)).transpose(1,2)
        hidden_sent2 = self.conv2(hidden_sent2.transpose(1,2)).transpose(1,2)
        hidden_sent1 = F.relu(hidden_sent1.contiguous().view(-1, hidden_sent1.size(-1))).view(batch_size, token1_seq_len, hidden_sent1.size(-1))
        hidden_sent2 = F.relu(hidden_sent2.contiguous().view(-1, hidden_sent2.size(-1))).view(batch_size, token2_seq_len, hidden_sent2.size(-1))
        # max-pooling over time
        hidden_sent1 = F.max_pool1d(hidden_sent1.transpose(1,2), kernel_size = token1_seq_len ).transpose(1,2)
        hidden_sent2 = F.max_pool1d(hidden_sent2.transpose(1,2), kernel_size = token2_seq_len ).transpose(1,2)
        hidden = torch.cat((hidden_sent1, hidden_sent2), dim=2).squeeze()
        fc1_out = self.linear1(hidden)
        fc1_out = self.relu(fc1_out)
        fc2_out = self.linear2(fc1_out)
        preds = F.log_softmax(fc2_out, 1)
        return preds
        

In [0]:
learning_rate = 3e-4
num_epochs = 10 # number epoch to train
use_cuda = True

In [35]:
cnn_hidden_trainacc = {}
hidden_size = 50
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'new_CNN_hidden_size_50.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_50.pkl'


Epoch: [1/10], Step: [401/3125], Validation Acc: 46.7, Training Acc: 48.26
Epoch: [1/10], Step: [801/3125], Validation Acc: 55.7, Training Acc: 54.09
Epoch: [1/10], Step: [1201/3125], Validation Acc: 56.7, Training Acc: 56.79
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.6, Training Acc: 59.12
Epoch: [1/10], Step: [2001/3125], Validation Acc: 57.2, Training Acc: 58.85
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.0, Training Acc: 61.27
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.1, Training Acc: 62.53
Epoch: [2/10], Step: [401/3125], Validation Acc: 59.9, Training Acc: 63.28
Epoch: [2/10], Step: [801/3125], Validation Acc: 60.2, Training Acc: 63.76
Epoch: [2/10], Step: [1201/3125], Validation Acc: 60.9, Training Acc: 64.54
Epoch: [2/10], Step: [1601/3125], Validation Acc: 58.9, Training Acc: 64.91
Epoch: [2/10], Step: [2001/3125], Validation Acc: 61.6, Training Acc: 65.93
Epoch: [2/10], Step: [2401/3125], Validation Acc: 61.7, Training Acc: 66.68
Epoch: [2/10], S

In [0]:
import pickle
f = open("cnn_cat_hidden50_trainacc.pkl","wb")
pickle.dump(cnn_hidden_trainacc,f)
f.close()

In [0]:
files.download('cnn_cat_hidden50_trainacc.pkl')

In [29]:
performance_CNN = {}
hidden_size = 50
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3,padding_size = 1, num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 49.5, Training Loss: 1.0595282316207886
Epoch: [1/10], Step: [801/3125], Validation Acc: 52.7, Training Loss: 1.0235836505889893
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.3, Training Loss: 0.9650046825408936
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.3, Training Loss: 0.7270699143409729
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.9, Training Loss: 1.0607911348342896
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.6, Training Loss: 0.9951475858688354
Epoch: [1/10], Step: [2801/3125], Validation Acc: 58.7, Training Loss: 0.9968999624252319
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.7, Training Loss: 0.7215070128440857
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.3, Training Loss: 0.880528450012207
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.1, Training Loss: 0.8688835501670837
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.2, Training Loss: 0.7485660910606384
Epoch: [2/10], 

In [0]:
import pickle
g = open("cnn_hidden_size_record.pkl","wb")
pickle.dump(performance_CNN,g)
g.close()

In [0]:
files.download('cnn_hidden_size_record.pkl') 

In [38]:
#cnn_hidden_trainacc = {}
hidden_size = 100
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'new_CNN_hidden_size_100.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_100.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 54.4, Training Acc: 50.87
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.0, Training Acc: 56.65
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.7, Training Acc: 59.14
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.0, Training Acc: 61.24
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.7, Training Acc: 62.72
Epoch: [1/10], Step: [2401/3125], Validation Acc: 58.9, Training Acc: 62.62
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.0, Training Acc: 63.9
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.9, Training Acc: 66.34
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.6, Training Acc: 66.81
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.0, Training Acc: 67.41
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.6, Training Acc: 67.9
Epoch: [2/10], Step: [2001/3125], Validation Acc: 63.2, Training Acc: 68.46
Epoch: [2/10], Step: [2401/3125], Validation Acc: 62.3, Training Acc: 69.51
Epoch: [2/10], Ste

In [24]:
#cnn_hidden_trainacc = {}
hidden_size = 100
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_100.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_100.pkl'


Epoch: [1/10], Step: [401/3125], Validation Acc: 52.4, Training Acc: 52.5
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.5, Training Acc: 55.94
Epoch: [1/10], Step: [1201/3125], Validation Acc: 59.1, Training Acc: 60.15
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.9, Training Acc: 61.05
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.0, Training Acc: 62.46
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.2, Training Acc: 63.42
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.1, Training Acc: 64.02
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.1, Training Acc: 65.4
Epoch: [2/10], Step: [801/3125], Validation Acc: 62.3, Training Acc: 66.78
Epoch: [2/10], Step: [1201/3125], Validation Acc: 60.5, Training Acc: 67.52
Epoch: [2/10], Step: [1601/3125], Validation Acc: 61.2, Training Acc: 68.4
Epoch: [2/10], Step: [2001/3125], Validation Acc: 62.7, Training Acc: 68.46
Epoch: [2/10], Step: [2401/3125], Validation Acc: 62.8, Training Acc: 69.23
Epoch: [2/10], Step

In [0]:
import pickle
g = open("cnn_hidden100_acc.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden100_acc.pkl') 

In [30]:
cnn_hidden_trainacc.keys()

dict_keys(['hidden_size_400'])

In [18]:
performance_CNN = {}
hidden_size = 100
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3,padding_size = 1, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 53.0, Training Loss: 1.0692249536514282
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.0, Training Loss: 0.8814099431037903
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.8, Training Loss: 0.8362947702407837
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.6, Training Loss: 0.9681096076965332
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.3, Training Loss: 0.8451114892959595
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.8, Training Loss: 1.1132365465164185
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.9, Training Loss: 0.8936682939529419
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.2, Training Loss: 1.004610538482666
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.4, Training Loss: 0.8382100462913513
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.0, Training Loss: 0.9411917924880981
Epoch: [2/10], Step: [1601/3125], Validation Acc: 61.7, Training Loss: 0.8374106884002686
Epoch: [2/10], 

In [0]:
from google.colab import files
files.download('CNN_hidden_size_100.pkl') 

In [41]:
#cnn_hidden_trainacc = {}
hidden_size = 200
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'new_CNN_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_200.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 56.5, Training Acc: 55.09
Epoch: [1/10], Step: [801/3125], Validation Acc: 58.0, Training Acc: 58.7
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.0, Training Acc: 59.35
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.8, Training Acc: 61.11
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.1, Training Acc: 61.93
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.6, Training Acc: 63.65
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.1, Training Acc: 64.68
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.6, Training Acc: 66.44
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.0, Training Acc: 66.09
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.7, Training Acc: 68.66
Epoch: [2/10], Step: [1601/3125], Validation Acc: 64.6, Training Acc: 68.99
Epoch: [2/10], Step: [2001/3125], Validation Acc: 64.2, Training Acc: 69.58
Epoch: [2/10], Step: [2401/3125], Validation Acc: 63.9, Training Acc: 70.83
Epoch: [2/10], St

In [20]:
#cnn_hidden_trainacc = {}
hidden_size = 200
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_200.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 54.5, Training Acc: 53.86
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.6, Training Acc: 57.58
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.0, Training Acc: 60.43
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.4, Training Acc: 61.97
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.8, Training Acc: 63.04
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.3, Training Acc: 65.21
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.8, Training Acc: 65.16
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.5, Training Acc: 67.37
Epoch: [2/10], Step: [801/3125], Validation Acc: 63.7, Training Acc: 68.3
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.8, Training Acc: 68.74
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.6, Training Acc: 69.41
Epoch: [2/10], Step: [2001/3125], Validation Acc: 63.0, Training Acc: 69.75
Epoch: [2/10], Step: [2401/3125], Validation Acc: 63.6, Training Acc: 70.77
Epoch: [2/10], St

NameError: ignored

In [0]:
cnn_hidden_trainacc = {}
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)

In [0]:
import pickle
g = open("cnn_hidden200_acc.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden200_acc.pkl') 

In [20]:
hidden_size = 200
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3, padding_size = 1, num_layers=1, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 55.8, Training Loss: 0.858955979347229
Epoch: [1/10], Step: [801/3125], Validation Acc: 59.4, Training Loss: 1.11115562915802
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.4, Training Loss: 1.0620146989822388
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.1, Training Loss: 0.6764976978302002
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.3, Training Loss: 0.8525142669677734
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.0, Training Loss: 0.8522081971168518
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.8, Training Loss: 1.0003868341445923
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.4, Training Loss: 0.6956936120986938
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.9, Training Loss: 0.9119006395339966
Epoch: [2/10], Step: [1201/3125], Validation Acc: 60.9, Training Loss: 0.6165597438812256
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.0, Training Loss: 0.6860286593437195
Epoch: [2/10], St

In [36]:
cnn_hidden_trainacc = {}
hidden_size = 300
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_300.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 54.7, Training Acc: 54.9
Epoch: [1/10], Step: [801/3125], Validation Acc: 58.5, Training Acc: 58.54
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.0, Training Acc: 61.23
Epoch: [1/10], Step: [1601/3125], Validation Acc: 61.8, Training Acc: 61.9
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.3, Training Acc: 63.78
Epoch: [1/10], Step: [2401/3125], Validation Acc: 62.0, Training Acc: 64.95
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.7, Training Acc: 65.8
Epoch: [2/10], Step: [401/3125], Validation Acc: 64.9, Training Acc: 67.56
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.7, Training Acc: 67.64
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.0, Training Acc: 68.88
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.9, Training Acc: 70.0
Epoch: [2/10], Step: [2001/3125], Validation Acc: 63.0, Training Acc: 69.84
Epoch: [2/10], Step: [2401/3125], Validation Acc: 64.8, Training Acc: 71.12
Epoch: [2/10], Step:

In [0]:
import pickle
g = open("cnn_hidden300_acc.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden300_acc.pkl')

In [31]:
cnn_hidden_trainacc.keys()

dict_keys(['hidden_size_400'])

In [21]:
performance_CNN = {}
hidden_size = 300
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3, padding_size = 1, num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 55.4, Training Loss: 0.9462689161300659
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.0, Training Loss: 0.9292481541633606
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.5, Training Loss: 1.0269328355789185
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.2, Training Loss: 0.7789084911346436
Epoch: [1/10], Step: [2001/3125], Validation Acc: 58.8, Training Loss: 0.8680815696716309
Epoch: [1/10], Step: [2401/3125], Validation Acc: 58.0, Training Loss: 0.8236707448959351
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.2, Training Loss: 0.8517476916313171
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.3, Training Loss: 0.4673631489276886
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.5, Training Loss: 0.9887857437133789
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.3, Training Loss: 0.4829244315624237
Epoch: [2/10], Step: [1601/3125], Validation Acc: 63.1, Training Loss: 0.7256178855895996
Epoch: [2/10],

In [0]:
!cp 'CNN_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks/CNN_hidden_size_300.pkl'

In [23]:
test_model(val_loader, model_CNN)

63.3

In [19]:
performance_CNN = {}
hidden_size = 300
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3, padding_size = 1, num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 52.6, Training Loss: 0.957032322883606
Epoch: [1/10], Step: [801/3125], Validation Acc: 57.9, Training Loss: 0.8670604228973389
Epoch: [1/10], Step: [1201/3125], Validation Acc: 56.0, Training Loss: 0.9471521973609924
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.4, Training Loss: 1.1565369367599487
Epoch: [1/10], Step: [2001/3125], Validation Acc: 58.9, Training Loss: 0.9107176065444946
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.2, Training Loss: 0.8247095942497253
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.0, Training Loss: 0.7467164397239685
Epoch: [2/10], Step: [401/3125], Validation Acc: 60.7, Training Loss: 1.0081230401992798
Epoch: [2/10], Step: [801/3125], Validation Acc: 62.8, Training Loss: 0.7596130967140198
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.1, Training Loss: 0.8079662322998047
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.5, Training Loss: 0.6784083843231201
Epoch: [2/10], 

In [22]:
cnn_hidden_trainacc = {}
hidden_size = 400
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
!cp 'new_CNN_hidden_size_400.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_400.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 56.7, Training Acc: 55.46
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.6, Training Acc: 57.83
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.6, Training Acc: 61.93
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.0, Training Acc: 63.68
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.1, Training Acc: 62.72
Epoch: [1/10], Step: [2401/3125], Validation Acc: 63.0, Training Acc: 65.97
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.1, Training Acc: 66.7
Epoch: [2/10], Step: [401/3125], Validation Acc: 62.8, Training Acc: 68.86
Epoch: [2/10], Step: [801/3125], Validation Acc: 64.0, Training Acc: 69.41
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.1, Training Acc: 69.44
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.7, Training Acc: 70.78
Epoch: [2/10], Step: [2001/3125], Validation Acc: 62.5, Training Acc: 71.28
Epoch: [2/10], Step: [2401/3125], Validation Acc: 62.4, Training Acc: 72.15
Epoch: [2/10], St

In [0]:
import pickle
g = open("cnn_hidden_size400_new.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden_size400_new.pkl') 

In [20]:
hidden_size = 400
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3, padding_size = 1, num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 52.9, Training Loss: 0.9149646162986755
Epoch: [1/10], Step: [801/3125], Validation Acc: 58.2, Training Loss: 0.8825314044952393
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.9, Training Loss: 0.9381951689720154
Epoch: [1/10], Step: [1601/3125], Validation Acc: 60.6, Training Loss: 0.9207388162612915
Epoch: [1/10], Step: [2001/3125], Validation Acc: 62.2, Training Loss: 0.8168049454689026
Epoch: [1/10], Step: [2401/3125], Validation Acc: 63.7, Training Loss: 1.0177844762802124
Epoch: [1/10], Step: [2801/3125], Validation Acc: 60.0, Training Loss: 0.7530525922775269
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.4, Training Loss: 0.9100489020347595
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.5, Training Loss: 0.9704999923706055
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.1, Training Loss: 0.7635511755943298
Epoch: [2/10], Step: [1601/3125], Validation Acc: 64.0, Training Loss: 0.6446985006332397
Epoch: [2/10],

In [27]:
#cnn_hidden_trainacc = {}
hidden_size = 800
model = CNN(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_800.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 56.2, Training Acc: 56.14
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.5, Training Acc: 59.08
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.7, Training Acc: 59.51
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.6, Training Acc: 62.61
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.6, Training Acc: 63.21
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.6, Training Acc: 65.99
Epoch: [1/10], Step: [2801/3125], Validation Acc: 59.6, Training Acc: 65.73
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.2, Training Acc: 67.47
Epoch: [2/10], Step: [801/3125], Validation Acc: 60.5, Training Acc: 68.99
Epoch: [2/10], Step: [1201/3125], Validation Acc: 62.5, Training Acc: 69.76
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.2, Training Acc: 70.28
Epoch: [2/10], Step: [2001/3125], Validation Acc: 63.0, Training Acc: 71.54
Epoch: [2/10], Step: [2401/3125], Validation Acc: 63.0, Training Acc: 70.75
Epoch: [2/10], S

In [30]:
cnn_hidden_trainacc.keys()

dict_keys(['hidden_size_200', 'hidden_size_100', 'hidden_size_800'])

In [0]:
import pickle
g = open("cnn_hidden_size800_new.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
import pickle
g = open("cnn_hidden_size800_new.pkl","wb")
pickle.dump(cnn_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden_size800_new.pkl') 

In [28]:
hidden_size = 800
model_CNN = CNN(hidden_size = hidden_size, kernel_size = 3, padding_size = 1, num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_hidden_size_{}.pkl'.format(hidden_size))
performance_CNN['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 56.0, Training Loss: 0.9951490759849548
Epoch: [1/10], Step: [801/3125], Validation Acc: 55.7, Training Loss: 0.8658526539802551
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.4, Training Loss: 0.8258790373802185
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.8, Training Loss: 0.9195840954780579
Epoch: [1/10], Step: [2001/3125], Validation Acc: 61.0, Training Loss: 0.8972375988960266
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.5, Training Loss: 0.7824311256408691
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.6, Training Loss: 0.7511696219444275
Epoch: [2/10], Step: [401/3125], Validation Acc: 61.7, Training Loss: 0.7476624846458435
Epoch: [2/10], Step: [801/3125], Validation Acc: 62.7, Training Loss: 0.7221182584762573
Epoch: [2/10], Step: [1201/3125], Validation Acc: 66.8, Training Loss: 0.4947746992111206
Epoch: [2/10], Step: [1601/3125], Validation Acc: 64.7, Training Loss: 0.9293842315673828
Epoch: [2/10],

In [0]:
model_cnn = CNN(hidden_size = 300, kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('CNN_hidden_size_300.pkl'))

In [25]:
# VAL ACC for CNN with hidden_size 300 and kernel_size 3 :65.6
test_model(val_loader, model_cnn)

65.6

In [26]:
# VAL ACC for CNN with hidden_size 400 and kernel_size 3: 64.9
model_cnn = CNN(hidden_size = 400, kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('CNN_hidden_size_400.pkl'))
test_model(val_loader, model_cnn)

64.9

In [29]:
# VAL ACC for CNN with hidden_size 800 and kernel_size 3: 65.0
model_cnn = CNN(hidden_size = 800, kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('CNN_hidden_size_800.pkl'))
test_model(val_loader, model_cnn)

65.0

In [0]:
g = open("cnn_hidden_size_record3.pkl","wb")
pkl.dump(performance_CNN,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_hidden_size_record3.pkl') 

## 3.2  Kernel Size Tuning for CNN
- Hidden size 400
- Kernel size list (3, 5, 7)

In [25]:
cnn_kernel_trainacc = {}
kernel_size = 5
model = CNN(hidden_size = 400,  kernel_size = kernel_size,padding_size = 2,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_kernel_size_{}.pkl'.format(hidden_size))
cnn_kernel_trainacc['kernel_size_{}'.format(kernel_size)] = (val_acc,train_acc)


Epoch: [1/10], Step: [401/3125], Validation Acc: 52.7, Training Acc: 55.51
Epoch: [1/10], Step: [801/3125], Validation Acc: 54.2, Training Acc: 57.6
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.7, Training Acc: 61.96
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.6, Training Acc: 61.85
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.6, Training Acc: 63.64
Epoch: [1/10], Step: [2401/3125], Validation Acc: 61.4, Training Acc: 64.76
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.3, Training Acc: 65.74
Epoch: [2/10], Step: [401/3125], Validation Acc: 60.8, Training Acc: 68.41
Epoch: [2/10], Step: [801/3125], Validation Acc: 60.7, Training Acc: 69.43
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.0, Training Acc: 69.92
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.4, Training Acc: 71.25
Epoch: [2/10], Step: [2001/3125], Validation Acc: 63.9, Training Acc: 72.21
Epoch: [2/10], Step: [2401/3125], Validation Acc: 62.7, Training Acc: 72.06
Epoch: [2/10], St

In [0]:
g = open("cnn_kernel5_acc.pkl","wb")
pkl.dump(cnn_kernel_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_kernel5_acc.pkl')

In [0]:
!cp 'new_CNN_kernel_size_400.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_kernel_size_5.pkl'

In [38]:
cnn_kernel_record = {}
kernel_size = 5
model_CNN = CNN(hidden_size = 400, kernel_size = kernel_size, padding_size = 2,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_kernel_size_{}.pkl'.format(kernel_size))
cnn_kernel_record['kernel_size_{}'.format(kernel_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 53.2, Training Loss: 0.8358578681945801
Epoch: [1/10], Step: [801/3125], Validation Acc: 55.7, Training Loss: 0.8203444480895996
Epoch: [1/10], Step: [1201/3125], Validation Acc: 57.4, Training Loss: 0.7594694495201111
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.2, Training Loss: 0.8315712809562683
Epoch: [1/10], Step: [2001/3125], Validation Acc: 58.7, Training Loss: 0.8887002468109131
Epoch: [1/10], Step: [2401/3125], Validation Acc: 59.7, Training Loss: 0.8540970087051392
Epoch: [1/10], Step: [2801/3125], Validation Acc: 61.0, Training Loss: 0.7540554404258728
Epoch: [2/10], Step: [401/3125], Validation Acc: 59.4, Training Loss: 0.913895845413208
Epoch: [2/10], Step: [801/3125], Validation Acc: 61.2, Training Loss: 0.7809163928031921
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.2, Training Loss: 0.6057379841804504
Epoch: [2/10], Step: [1601/3125], Validation Acc: 60.0, Training Loss: 0.6565442681312561
Epoch: [2/10], 

In [44]:
# VAL ACC for CNN with hidden_size 400 and kernel_size 5: 62
model_cnn = CNN(hidden_size = 400, kernel_size = 5, padding_size = 2, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('CNN_kernel_size_5.pkl'))
test_model(val_loader, model_cnn)

62.0

In [29]:
#cnn_kernel_trainacc = {}
kernel_size = 7
model = CNN(hidden_size = 400,  kernel_size = kernel_size,padding_size = 3,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
torch.save(model.state_dict(), 'new_CNN_kernel_size_{}.pkl'.format(kernel_size))
cnn_kernel_trainacc['kernel_size_{}'.format(kernel_size)] = (val_acc,train_acc)
!cp 'new_CNN_kernel_size_7.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_kernel_size_7.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 51.8, Training Acc: 53.61
Epoch: [1/10], Step: [801/3125], Validation Acc: 56.5, Training Acc: 57.82
Epoch: [1/10], Step: [1201/3125], Validation Acc: 56.4, Training Acc: 60.85
Epoch: [1/10], Step: [1601/3125], Validation Acc: 57.0, Training Acc: 61.7
Epoch: [1/10], Step: [2001/3125], Validation Acc: 57.3, Training Acc: 63.5
Epoch: [1/10], Step: [2401/3125], Validation Acc: 58.4, Training Acc: 65.04
Epoch: [1/10], Step: [2801/3125], Validation Acc: 59.7, Training Acc: 65.01
Epoch: [2/10], Step: [401/3125], Validation Acc: 59.5, Training Acc: 67.71
Epoch: [2/10], Step: [801/3125], Validation Acc: 59.8, Training Acc: 68.77
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.4, Training Acc: 70.16
Epoch: [2/10], Step: [1601/3125], Validation Acc: 60.7, Training Acc: 70.65
Epoch: [2/10], Step: [2001/3125], Validation Acc: 61.4, Training Acc: 71.64
Epoch: [2/10], Step: [2401/3125], Validation Acc: 62.5, Training Acc: 72.45
Epoch: [2/10], Ste

In [0]:
g = open("cnn_kernel7_acc.pkl","wb")
pkl.dump(cnn_kernel_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_kernel7_acc.pkl')

In [32]:
cnn_kernel_record1 = {}
kernel_size = 7
model_CNN = CNN(hidden_size = 400, kernel_size = kernel_size, padding_size = 3,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_CNN.cuda()
train_loss, val_acc = train_model(train_loader,model_CNN)
torch.save(model_CNN.state_dict(), 'CNN_kernel_size_{}.pkl'.format(kernel_size))
cnn_kernel_record1['kernel_size_{}'.format(kernel_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 54.9, Training Loss: 0.9650066494941711
Epoch: [1/10], Step: [801/3125], Validation Acc: 55.6, Training Loss: 0.9134538769721985
Epoch: [1/10], Step: [1201/3125], Validation Acc: 58.2, Training Loss: 1.0140962600708008
Epoch: [1/10], Step: [1601/3125], Validation Acc: 59.1, Training Loss: 0.8418033123016357
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.0, Training Loss: 0.7065867781639099
Epoch: [1/10], Step: [2401/3125], Validation Acc: 60.8, Training Loss: 0.8253891468048096
Epoch: [1/10], Step: [2801/3125], Validation Acc: 62.6, Training Loss: 0.9053332209587097
Epoch: [2/10], Step: [401/3125], Validation Acc: 60.8, Training Loss: 0.9090737104415894
Epoch: [2/10], Step: [801/3125], Validation Acc: 60.6, Training Loss: 0.6993919014930725
Epoch: [2/10], Step: [1201/3125], Validation Acc: 61.2, Training Loss: 0.8176190257072449
Epoch: [2/10], Step: [1601/3125], Validation Acc: 62.7, Training Loss: 0.5847613215446472
Epoch: [2/10],

In [0]:
!cp 'CNN_kernel_size_7.pkl' 'drive/My Drive/Colab Notebooks/CNN_kernel_size_7.pkl'

In [35]:
test_model(val_loader, model_CNN)

63.8

In [0]:
g = open("cnn_kernel_size_record1.pkl","wb")
pkl.dump(cnn_kernel_record1,g)
g.close()

In [0]:
files.download('cnn_kernel_size_record1.pkl')

In [27]:
# VAL ACC for CNN with hidden_size 400 and kernel_size 7: 65
model_cnn = CNN(hidden_size = 800, kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/CNN_hidden_size_800.pkl'))
test_model(val_loader, model_cnn)

65.0

In [0]:
# VAL ACC for CNN with hidden_size 400 and kernel_size 5: 62
model_cnn = CNN(hidden_size = 400, kernel_size = 5, padding_size = 2, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model_cnn.cuda()
model_cnn.load_state_dict(torch.load('CNN_kernel_size_5.pkl'))
test_model(val_loader, model_cnn)

In [0]:
!cp 'CNN_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks'

In [0]:
g = open("cnn_kernel_size_record.pkl","wb")
pkl.dump(cnn_kernel_record,g)
g.close()

In [0]:
files.download('cnn_kernel_size_record.pkl')

## 3.3 Concatenate two encoded sentences with element-wise multiplication for CNN
- Kernel size: 3
- Hidden size list: (200, 300, 400)

In [0]:
class CNN_mul(nn.Module):
    def __init__(self,  hidden_size, kernel_size, padding_size, num_layers, num_classes, pre_trained_emb):

        super(CNN_mul, self).__init__()

        self.num_layers, self.hidden_size, self.kernel_size ,self.padding_size= num_layers, hidden_size,kernel_size,padding_size
        self.embedding = nn.Embedding(pre_trained_emb.shape[0], pre_trained_emb.shape[1], padding_idx=PAD_IDX)
        #emb_size is the size of imput, hidden_size is the size of output. kernel_size is like the window size, 
        # the kernel size 3 here means read 3 words/chars once
        self.conv1 = nn.Conv1d( pre_trained_emb.shape[1], hidden_size, kernel_size, padding=padding_size)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size, padding=padding_size)

        self.linear1 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        #self.maxpooling = nn.MaxPool1d()
    def init_weights(self, is_static=True):
        self.embedding.weight = nn.Parameter(torch.from_numpy(pre_trained_emb).float())
        if is_static:
            self.embedding.weight.requires_grad = False
            
    def forward(self, token1_data, token1_lengths,token2_data, token2_lengths):
        batch_size, token1_seq_len = token1_data.size()
        _,token2_seq_len = token2_data.size()

        embed_sent1 = self.embedding(token1_data)
        embed_sent2 = self.embedding(token2_data)
        # the convolusional module in pytorch expects the input of size  batch size by the hidden size by the sequence length
        hidden_sent1 = self.conv1(embed_sent1.transpose(1,2)).transpose(1,2)
        hidden_sent2 = self.conv1(embed_sent2.transpose(1,2)).transpose(1,2)
        # relu expect 2-d tensor as input , merging the 0th and 1st dim together
        hidden_sent1 = F.relu(hidden_sent1.contiguous().view(-1, hidden_sent1.size(-1))).view(batch_size, token1_seq_len, hidden_sent1.size(-1))
        hidden_sent2 = F.relu(hidden_sent2.contiguous().view(-1, hidden_sent2.size(-1))).view(batch_size, token2_seq_len, hidden_sent2.size(-1))
        hidden_sent1 = self.conv2(hidden_sent1.transpose(1,2)).transpose(1,2)
        hidden_sent2 = self.conv2(hidden_sent2.transpose(1,2)).transpose(1,2)
        hidden_sent1 = F.relu(hidden_sent1.contiguous().view(-1, hidden_sent1.size(-1))).view(batch_size, token1_seq_len, hidden_sent1.size(-1))
        hidden_sent2 = F.relu(hidden_sent2.contiguous().view(-1, hidden_sent2.size(-1))).view(batch_size, token2_seq_len, hidden_sent2.size(-1))
        # max-pooling over time
        hidden_sent1 = F.max_pool1d(hidden_sent1.transpose(1,2), kernel_size = token1_seq_len ).transpose(1,2)
        hidden_sent2 = F.max_pool1d(hidden_sent2.transpose(1,2), kernel_size = token2_seq_len ).transpose(1,2)
        hidden = torch.mul(hidden_sent1, hidden_sent2).squeeze()
        fc1_out = self.linear1(hidden)
        fc1_out = self.relu(fc1_out)
        fc2_out = self.linear2(fc1_out)
        preds = F.log_softmax(fc2_out, 1)
        return preds

In [32]:
cnn_mul_hidden_trainacc = {}
hidden_size = 200
model = CNN_mul(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_mul_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_800.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 50.7, Training Acc: 48.94
Epoch: [1/10], Step: [801/3125], Validation Acc: 51.5, Training Acc: 53.75
Epoch: [1/10], Step: [1201/3125], Validation Acc: 54.2, Training Acc: 54.47
Epoch: [1/10], Step: [1601/3125], Validation Acc: 54.4, Training Acc: 57.73
Epoch: [1/10], Step: [2001/3125], Validation Acc: 57.3, Training Acc: 58.21
Epoch: [1/10], Step: [2401/3125], Validation Acc: 55.0, Training Acc: 58.61
Epoch: [1/10], Step: [2801/3125], Validation Acc: 56.4, Training Acc: 59.22
Epoch: [2/10], Step: [401/3125], Validation Acc: 58.3, Training Acc: 62.3
Epoch: [2/10], Step: [801/3125], Validation Acc: 59.3, Training Acc: 63.44
Epoch: [2/10], Step: [1201/3125], Validation Acc: 59.2, Training Acc: 65.45
Epoch: [2/10], Step: [1601/3125], Validation Acc: 59.2, Training Acc: 66.02
Epoch: [2/10], Step: [2001/3125], Validation Acc: 58.6, Training Acc: 65.25
Epoch: [2/10], Step: [2401/3125], Validation Acc: 59.7, Training Acc: 67.46
Epoch: [2/10], St

In [0]:
import pickle
g = open("cnn_mul_hidden200_acc.pkl","wb")
pickle.dump(cnn_mul_hidden_trainacc,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_mul_hidden200_acc.pkl')

In [67]:
# VAL ACC for CNN with hidden_size 200 and kernel_size 3:
cnn_mul_record = {}
hidden_size = 200
cnn_mul_200 = CNN_mul(hidden_size = hidden_size ,kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  cnn_mul_200.cuda()
train_loss, val_acc = train_model(train_loader,cnn_mul_200)
torch.save(cnn_mul_200.state_dict(), 'CNN_mul_hidden_size_{}.pkl'.format(hidden_size))
cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 48.7, Training Loss: 1.1032335758209229
Epoch: [1/10], Step: [801/3125], Validation Acc: 53.0, Training Loss: 0.9255911111831665
Epoch: [1/10], Step: [1201/3125], Validation Acc: 55.2, Training Loss: 0.9379119873046875
Epoch: [1/10], Step: [1601/3125], Validation Acc: 55.4, Training Loss: 0.9146648049354553
Epoch: [1/10], Step: [2001/3125], Validation Acc: 57.5, Training Loss: 1.0605806112289429
Epoch: [1/10], Step: [2401/3125], Validation Acc: 56.3, Training Loss: 0.9028449654579163
Epoch: [1/10], Step: [2801/3125], Validation Acc: 59.3, Training Loss: 1.19395911693573
Epoch: [2/10], Step: [401/3125], Validation Acc: 59.4, Training Loss: 0.732001006603241
Epoch: [2/10], Step: [801/3125], Validation Acc: 60.8, Training Loss: 0.7924373149871826
Epoch: [2/10], Step: [1201/3125], Validation Acc: 60.4, Training Loss: 0.9057833552360535
Epoch: [2/10], Step: [1601/3125], Validation Acc: 59.0, Training Loss: 0.5876357555389404
Epoch: [2/10], St

In [68]:
test_model(val_loader, cnn_mul_200)

63.2

In [35]:
#cnn_mul_hidden_trainacc = {}
hidden_size = 300
model = CNN_mul(hidden_size = hidden_size,  kernel_size = 3,padding_size = 1,num_layers=2, num_classes=3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  model.cuda()
val_acc,train_acc = train_model(train_loader,model)
#torch.save(model.state_dict(), 'new_CNN_hidden_size_{}.pkl'.format(hidden_size))
cnn_mul_hidden_trainacc['hidden_size_{}'.format(hidden_size)] = (val_acc,train_acc)
#!cp 'new_CNN_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks/new_CNN_hidden_size_800.pkl'

Epoch: [1/10], Step: [401/3125], Validation Acc: 48.1, Training Acc: 49.33
Epoch: [1/10], Step: [801/3125], Validation Acc: 50.1, Training Acc: 51.78
Epoch: [1/10], Step: [1201/3125], Validation Acc: 53.6, Training Acc: 55.51
Epoch: [1/10], Step: [1601/3125], Validation Acc: 58.0, Training Acc: 58.67
Epoch: [1/10], Step: [2001/3125], Validation Acc: 54.5, Training Acc: 58.84
Epoch: [1/10], Step: [2401/3125], Validation Acc: 58.7, Training Acc: 60.88
Epoch: [1/10], Step: [2801/3125], Validation Acc: 57.6, Training Acc: 61.97
Epoch: [2/10], Step: [401/3125], Validation Acc: 58.6, Training Acc: 64.06
Epoch: [2/10], Step: [801/3125], Validation Acc: 56.4, Training Acc: 62.9
Epoch: [2/10], Step: [1201/3125], Validation Acc: 60.6, Training Acc: 66.19
Epoch: [2/10], Step: [1601/3125], Validation Acc: 60.0, Training Acc: 65.24
Epoch: [2/10], Step: [2001/3125], Validation Acc: 60.6, Training Acc: 67.81
Epoch: [2/10], Step: [2401/3125], Validation Acc: 58.9, Training Acc: 66.96
Epoch: [2/10], St

In [0]:
import pickle
g = open("cnn_mul_hidden300_acc.pkl","wb")
pickle.dump(cnn_mul_hidden_trainacc,g)
g.close()

In [0]:
files.download('cnn_mul_hidden300_acc.pkl')

In [69]:
# VAL ACC for CNN with hidden_size 300 and kernel_size 3:
#cnn_mul_record = {}
hidden_size = 300
cnn_mul_300 = CNN_mul(hidden_size = hidden_size ,kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  cnn_mul_300.cuda()
train_loss, val_acc = train_model(train_loader,cnn_mul_300)
torch.save(cnn_mul_300.state_dict(), 'CNN_mul_hidden_size_{}.pkl'.format(hidden_size))
cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 45.7, Training Loss: 1.0349533557891846
Epoch: [1/10], Step: [801/3125], Validation Acc: 51.4, Training Loss: 1.0663421154022217
Epoch: [1/10], Step: [1201/3125], Validation Acc: 51.7, Training Loss: 1.0119922161102295
Epoch: [1/10], Step: [1601/3125], Validation Acc: 53.1, Training Loss: 0.9978107213973999
Epoch: [1/10], Step: [2001/3125], Validation Acc: 56.1, Training Loss: 0.9159250259399414
Epoch: [1/10], Step: [2401/3125], Validation Acc: 57.7, Training Loss: 0.9301472902297974
Epoch: [1/10], Step: [2801/3125], Validation Acc: 56.2, Training Loss: 0.7586804032325745
Epoch: [2/10], Step: [401/3125], Validation Acc: 58.8, Training Loss: 0.9810324311256409
Epoch: [2/10], Step: [801/3125], Validation Acc: 59.7, Training Loss: 0.8196980953216553
Epoch: [2/10], Step: [1201/3125], Validation Acc: 63.9, Training Loss: 0.7884206175804138
Epoch: [2/10], Step: [1601/3125], Validation Acc: 60.5, Training Loss: 0.8009560704231262
Epoch: [2/10],

In [70]:
test_model(val_loader, cnn_mul_300)

63.7

In [0]:
!cp 'CNN_mul_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks'
!cp 'CNN_mul_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks'

In [71]:
# VAL ACC for CNN with hidden_size 400 and kernel_size 3:
#cnn_mul_record = {}
hidden_size = 400
cnn_mul_400 = CNN_mul(hidden_size = hidden_size ,kernel_size = 3, padding_size = 1, num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  cnn_mul_400.cuda()
train_loss, val_acc = train_model(train_loader,cnn_mul_400)
torch.save(cnn_mul_400.state_dict(), 'CNN_mul_hidden_size_{}.pkl'.format(hidden_size))
cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)

Epoch: [1/10], Step: [401/3125], Validation Acc: 46.9, Training Loss: 1.0549224615097046
Epoch: [1/10], Step: [801/3125], Validation Acc: 52.8, Training Loss: 0.8810384273529053
Epoch: [1/10], Step: [1201/3125], Validation Acc: 52.8, Training Loss: 0.9858664870262146
Epoch: [1/10], Step: [1601/3125], Validation Acc: 57.3, Training Loss: 1.0246943235397339
Epoch: [1/10], Step: [2001/3125], Validation Acc: 55.5, Training Loss: 0.9618517160415649
Epoch: [1/10], Step: [2401/3125], Validation Acc: 54.5, Training Loss: 1.0884836912155151
Epoch: [1/10], Step: [2801/3125], Validation Acc: 59.2, Training Loss: 0.8771393299102783
Epoch: [2/10], Step: [401/3125], Validation Acc: 58.1, Training Loss: 0.8243652582168579
Epoch: [2/10], Step: [801/3125], Validation Acc: 58.1, Training Loss: 0.8500962257385254
Epoch: [2/10], Step: [1201/3125], Validation Acc: 59.9, Training Loss: 0.8621939420700073
Epoch: [2/10], Step: [1601/3125], Validation Acc: 59.9, Training Loss: 0.6971622705459595
Epoch: [2/10],

In [72]:
test_model(val_loader, cnn_mul_400)

62.1

In [0]:
test_model(val_loader, cnn_mul_800)

In [0]:
g = open("cnn_mul_record.pkl","wb")
pkl.dump(cnn_mul_record,g)
g.close()

In [0]:
from google.colab import files
files.download('cnn_mul_record.pkl')

In [0]:
!cp 'CNN_mul_hidden_size_200.pkl' 'drive/My Drive/Colab Notebooks'
!cp 'CNN_mul_hidden_size_300.pkl' 'drive/My Drive/Colab Notebooks'
!cp 'CNN_mul_hidden_size_400.pkl' 'drive/My Drive/Colab Notebooks'
#!cp 'CNN_mul_hidden_size_800.pkl' 'drive/My Drive/Colab Notebooks'

# 4. Pick the best model and find 3 correct and 3 incorrect examples

In [22]:
rnn_mul_800 = RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_mul_800.cuda()
rnn_mul_800.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
test_model(val_loader, rnn_mul_800)

69.9

In [0]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data1, lengths1,data2, lengths2, labels in loader:
        data1_batch, lengths1_batch,data2_batch, lengths2_batch, label_batch = data1, lengths1, data2, lengths2,labels
        outputs =model(data1_batch, lengths1_batch,data2_batch, lengths2_batch)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)

In [0]:
#val_dataset_best = NewsGroupDataset(val_data_indices[0:200], y_val[0:200])
val_loader_best = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=200,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)


In [35]:
rnn_mul_800.eval()
for data1, lengths1,data2, lengths2, labels in val_loader_best:
  if use_cuda and torch.cuda.is_available():
    data1 = data1.cuda()
    lengths1  = lengths1.cuda()
    data2 = data2.cuda()
    lengths2  = lengths2.cuda()
    labels = labels.cuda()
  data1_batch, lengths1_batch,data2_batch, lengths2_batch, label_batch = data1, lengths1, data2, lengths2,labels
  outputs = rnn_mul_800(data1_batch, lengths1_batch,data2_batch, lengths2_batch)
  predicted = outputs.max(1, keepdim=True)[1]
  
  wrong_idx = [i[0] for i in (predicted.eq(labels.view_as(predicted))==0).nonzero().cpu().numpy()]
  wrong_predict = [predicted.flatten().cpu().numpy()[i] for i in wrong_idx]
  true_label = [label_batch.flatten().cpu().numpy()[i] for i in wrong_idx]
  break

print ('Wrong index :', wrong_idx)
print('Wrong predict: ', wrong_predict)
print('True label: ', true_label)
        

Wrong index : [1, 5, 7, 10, 15, 23, 25, 26, 30, 36, 39, 42, 48, 49, 52, 56, 58, 60, 65, 70, 73, 76, 89, 90, 91, 92, 95, 101, 105, 108, 109, 112, 116, 119, 120, 121, 124, 125, 129, 131, 137, 139, 142, 145, 147, 161, 163, 165, 166, 168, 170, 171, 174, 175, 177, 178, 180, 184, 185, 190, 191, 193, 196, 198]
Wrong predict:  [2, 2, 2, 2, 2, 1, 0, 0, 1, 0, 2, 2, 1, 0, 2, 0, 0, 0, 2, 1, 1, 0, 2, 1, 2, 1, 2, 0, 2, 1, 2, 2, 2, 2, 0, 2, 2, 0, 2, 1, 1, 1, 1, 2, 2, 2, 0, 1, 2, 0, 0, 1, 0, 0, 2, 0, 2, 2, 2, 1, 2, 0, 1, 0]
True label:  [0, 0, 1, 0, 0, 0, 2, 1, 2, 2, 1, 0, 2, 1, 0, 1, 1, 1, 1, 2, 0, 2, 1, 0, 1, 2, 1, 2, 1, 2, 0, 1, 1, 0, 1, 0, 0, 2, 0, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 0, 2, 0, 0, 1, 0, 0, 1, 2, 2]


In [36]:
len(wrong_idx)

64

### Incorrect 1

In [51]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[1][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[1][1] if i!=0]))
print('True Label:',  val_targets[1])
print('Wrong Predict:', wrong_predict[wrong_idx.index(1)])

Sent1:
  Four people sit on a subway two read books , one looks at a cellphone and is wearing knee high boots .
Sent2:
  Multiple people are on a subway together , with each of them doing their own thing .
True Label: 0
Wrong Predict: 2


### Incorrect 2

In [52]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[23][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[23][1] if i!=0]))
print('True Label:',  val_targets[23])
print('Wrong Predict:', wrong_predict[wrong_idx.index(23)])

Sent1:
  People walk amongst a traffic jam in a crowded city .
Sent2:
  The traffic is halted and people are walking between the cars .
True Label: 0
Wrong Predict: 1


### Incorrect 3

In [54]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[36][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[36][1] if i!=0]))
print('True Label:',  val_targets[36])
print('Wrong Predict:', wrong_predict[wrong_idx.index(36)])

Sent1:
  A woman walking a dog on a leash at the beach , trailing behind as a pug follows another unseen woman .
Sent2:
  a large woman walks a dog
True Label: 2
Wrong Predict: 0


### Correct 1

In [59]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[2][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[2][1] if i!=0]))
print('True Label:',  val_targets[2])
print('Predict:', predicted.flatten().cpu().numpy()[2])

Sent1:
  bicycles stationed while a group of people socialize .
Sent2:
  People get together near a stand of bicycles .
True Label: 0
Predict: 0


### Correct 2

In [60]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[27][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[27][1] if i!=0]))
print('True Label:',  val_targets[27])
print('Predict:', predicted.flatten().cpu().numpy()[27])

Sent1:
  A chef speaks into a microphone about a mixed salad contained in a bowl with a decorative lid .
Sent2:
  a chef speaks into a microphone near a bowl
True Label: 0
Predict: 0


### Correct 3

In [62]:
print('Sent1:\n ', ' '.join([id2token[i] for i in val_data[199][0] if i!=0]))
print('Sent2:\n ', ' '.join([id2token[i] for i in val_data[199][1] if i!=0]))
print('True Label:',  val_targets[199])
print('Predict:', predicted.flatten().cpu().numpy()[199])

Sent1:
  Children and adults swim in large pool with red staircase .
Sent2:
  Children and adults are standing outside of a gym .
True Label: 1
Predict: 1


# Evaluation on _mnli val_

In [38]:
test_df = pd.read_csv('mnli_val.tsv', sep="\t", index_col=False )
test_df.head()

Unnamed: 0,sentence1,sentence2,label,genre
0,"'Not entirely , ' I snapped , harsher than int...",I spoke more harshly than I wanted to .,entailment,fiction
1,cook and then the next time it would be my tur...,I would cook and then the next turn would be h...,contradiction,telephone
2,The disorder hardly seemed to exist before the...,The disorder did n't seem to be as common when...,entailment,slate
3,"The Report and Order , in large part , adopts ...",The Report and Order ignores recommendations f...,contradiction,government
4,"IDPA 's OIG 's mission is to prevent , detect ...",IDPA 's OIG 's mission is clear and cares abou...,entailment,government


In [42]:
test_df['genre'].unique()

array(['fiction', 'telephone', 'slate', 'government', 'travel'],
      dtype=object)

In [0]:
def df2idx_mnli(fname,genre):
    df = pd.read_csv(fname, sep="\t", index_col=False )
    df = df[df.genre == genre]
    # change the label to numerical value
    df.loc[df['label'] == 'entailment', 'label'] = 0
    df.loc[df['label'] == 'contradiction', 'label'] = 1
    df.loc[df['label'] == 'neutral', 'label'] = 2
    # convert token to idx
    df['sent1_idx']  = df.apply (lambda row:[token2id[token] if token in token2id else UNK_IDX for token in row.sentence1.split()],axis=1)
    df['sent2_idx']  = df.apply (lambda row:[token2id[token] if token in token2id else UNK_IDX for token in row.sentence2.split()],axis=1)
    # convert df to data list and label list
    indexed_data = list(zip(df.sent1_idx, df.sent2_idx))
    label = np.array(df.label)
    return indexed_data, label, df

In [0]:
fic_test_data, fic_test_targets,_= df2idx_mnli("mnli_val.tsv", 'fiction')
tel_test_data, tel_test_targets,_= df2idx_mnli("mnli_val.tsv", 'telephone')
sla_test_data, sla_test_targets,_= df2idx_mnli("mnli_val.tsv", 'slate')
gov_test_data, gov_test_targets,_= df2idx_mnli("mnli_val.tsv", 'government')
tra_test_data, tra_test_targets,_= df2idx_mnli("mnli_val.tsv", 'travel')

In [0]:
fic_train_data, fic_train_targets,_= df2idx_mnli("mnli_train.tsv", 'fiction')
tel_train_data, tel_train_targets,_= df2idx_mnli("mnli_train.tsv", 'telephone')
sla_train_data, sla_train_targets,_= df2idx_mnli("mnli_train.tsv", 'slate')
gov_train_data, gov_train_targets,_= df2idx_mnli("mnli_train.tsv", 'government')
tra_train_data, tra_train_targets,_= df2idx_mnli("mnli_train.tsv", 'travel')

In [0]:
fic_test_dataset =  NewsGroupDataset(fic_test_data, fic_test_targets)
fic_test_loader = torch.utils.data.DataLoader(dataset=fic_test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
tel_test_dataset =  NewsGroupDataset(tel_test_data, tel_test_targets)
tel_test_loader = torch.utils.data.DataLoader(dataset=tel_test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
sla_test_dataset =  NewsGroupDataset(sla_test_data, sla_test_targets)
sla_test_loader = torch.utils.data.DataLoader(dataset=sla_test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
gov_test_dataset =  NewsGroupDataset(gov_test_data, gov_test_targets)
gov_test_loader = torch.utils.data.DataLoader(dataset=gov_test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
tra_test_dataset =  NewsGroupDataset(tra_test_data, tra_test_targets)
tra_test_loader = torch.utils.data.DataLoader(dataset=tra_test_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)

In [0]:
fic_train_dataset =  NewsGroupDataset(fic_train_data, fic_train_targets)
fic_train_loader = torch.utils.data.DataLoader(dataset=fic_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
tel_train_dataset =  NewsGroupDataset(tel_train_data, tel_train_targets)
tel_train_loader = torch.utils.data.DataLoader(dataset=tel_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
sla_train_dataset =  NewsGroupDataset(sla_train_data, sla_train_targets)
sla_train_loader = torch.utils.data.DataLoader(dataset=sla_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
gov_train_dataset =  NewsGroupDataset(gov_train_data, gov_train_targets)
gov_train_loader = torch.utils.data.DataLoader(dataset=gov_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)
tra_train_dataset =  NewsGroupDataset(tra_train_data, tra_train_targets)
tra_train_loader = torch.utils.data.DataLoader(dataset=tra_train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=newsgroup_collate_func,
                                           shuffle=False)

## 5.1 Evaluation of RNN

### 5.1.1 Load Best RNN Model
- hidden size :800
- concatenate two encoded sentences with element-wise multiplication

In [0]:
rnn_mul_800 = RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_mul_800.cuda()
rnn_mul_800.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
#test_model(val_loader, rnn_mul_800)

### 5.1.2 Evaluate across genres and generate val acc table 

In [0]:
rnn_val_acc_genre = []
for data_loader in [fic_test_loader, tel_test_loader, sla_test_loader, gov_test_loader, tra_test_loader]:
  rnn_val_acc_genre.append(test_model(data_loader, rnn_mul_800))
genres = ['fiction', 'telephone', 'slate', 'government', 'travel']
rnn_mnli_acc = pd.DataFrame(data = {'Genre': genres, 'Validation Accuracy':rnn_val_acc_genre})

In [84]:
rnn_mnli_acc

Unnamed: 0,Genre,Validation Accuracy
0,fiction,43.21608
1,telephone,42.885572
2,slate,40.219561
3,government,42.125984
4,travel,41.955193


In [0]:
rnn_mnli_acc.to_csv('MNLI_rnn_acc.csv')

## 5.2 Evaluation of CNN

###5.2.1 Load Best CNN Model 
- hidden size: 
- concatenate two encoded sentences with element-wise multiplication

In [0]:
cnn_300 = CNN(hidden_size = 300, kernel_size = 3, padding_size = 1,num_layers = 2, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  cnn_300.cuda()
cnn_300.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/CNN_hidden_size_300.pkl'))
#test_model(val_loader, cnn_300)

### 5.2.2 Evaluate across genres and generate val acc table

In [0]:
cnn_val_acc_genre = []
for data_loader in [fic_test_loader, tel_test_loader, sla_test_loader, gov_test_loader, tra_test_loader]:
  cnn_val_acc_genre.append(test_model(data_loader, cnn_300))
genres = ['fiction', 'telephone', 'slate', 'government', 'travel']
cnn_mnli_acc = pd.DataFrame(data = {'Genre': genres, 'Validation Accuracy':cnn_val_acc_genre})

In [30]:
cnn_mnli_acc

Unnamed: 0,Genre,Validation Accuracy
0,fiction,38.79397
1,telephone,41.293532
2,slate,41.417166
3,government,39.271654
4,travel,41.344196


In [0]:
cnn_mnli_acc.to_csv('MNLI_cnn_acc.csv')

# Fine-Tuning MultiNLI

In [0]:
def fine_tuning(train_loader,val_loader, model):
  criterion = torch.nn.NLLLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  total_step = len(train_loader)
  train_loss_ls = []
  val_acc_ls = []
  for epoch in range(num_epochs):
      loss_batch = []
      for i, (data1, lengths1, data2, lengths2, labels) in enumerate(train_loader):
  #         data1 = Variable(data1)  
  #         lengths1 = Variable(lengths1)
  #         data2 = Variable(data2)  
  #         lengths2 = Variable(lengths2)# Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
  #         labels = Variable(labels)
          if use_cuda and torch.cuda.is_available():
              data1 = data1.cuda()
              lengths1  = lengths1.cuda()
              data2 = data2.cuda()
              lengths2  = lengths2.cuda()
              labels = labels.cuda()
          model.train()
          optimizer.zero_grad()
          # Forward pass
          outputs = model(data1, lengths1, data2, lengths2)
          predicted = outputs.max(1, keepdim=True)[1]
          loss = criterion(outputs, labels)
          loss_batch.append(loss.item())
          # Backward and optimize
          loss.backward()
          optimizer.step()
          # validate every 100 iterations
          if i > 0 and i % 50 == 0:
              # validate
              train_loss = loss_batch[i]
              val_acc = test_model(val_loader, model)
              train_loss_ls.append(train_loss)
              val_acc_ls.append(val_acc)
              print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}, Training Loss: {}'.format(
                         epoch+1, num_epochs, i+1, len(train_loader), val_acc,train_loss))
  #torch.save(model_object.state_dict(), 'params_{}.pkl'.format())
#model_object.load_state_dict(torch.load('params.pkl'))
  return train_loss_ls, val_acc_ls



In [0]:
num_epochs = 10
fine_tuning_dic = {}

In [49]:
rnn_fic = RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_fic.cuda()
rnn_fic.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
fine_tuning_dic['fic'] = fine_tuning(fic_train_loader,fic_test_loader,rnn_fic )
torch.save(rnn_fic.state_dict(), 'RNN_finetuning_FIC.pkl')
#cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
#!cp 'RNN_finetuning_FIC.pkl' 'drive/My Drive/Colab Notebooks/RNN_finetuning_FIC.pkl'

Epoch: [1/10], Step: [51/120], Validation Acc: 41.30653266331658, Training Loss: 1.3316197395324707
Epoch: [1/10], Step: [101/120], Validation Acc: 41.4070351758794, Training Loss: 1.5383548736572266
Epoch: [2/10], Step: [51/120], Validation Acc: 43.71859296482412, Training Loss: 0.49553167819976807
Epoch: [2/10], Step: [101/120], Validation Acc: 43.81909547738694, Training Loss: 0.7056248784065247
Epoch: [3/10], Step: [51/120], Validation Acc: 43.91959798994975, Training Loss: 0.261760950088501
Epoch: [3/10], Step: [101/120], Validation Acc: 42.61306532663317, Training Loss: 0.2711864709854126
Epoch: [4/10], Step: [51/120], Validation Acc: 45.42713567839196, Training Loss: 0.08623187243938446
Epoch: [4/10], Step: [101/120], Validation Acc: 44.120603015075375, Training Loss: 0.028541721403598785
Epoch: [5/10], Step: [51/120], Validation Acc: 44.120603015075375, Training Loss: 0.0163022018969059
Epoch: [5/10], Step: [101/120], Validation Acc: 44.321608040201006, Training Loss: 0.0212509

In [50]:
test_model(fic_test_loader,rnn_fic)

46.33165829145729

In [51]:
num_epochs = 5
rnn_tel = RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_tel.cuda()
rnn_tel.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
fine_tuning_dic['tel'] = fine_tuning(tel_train_loader,tel_test_loader,rnn_tel )
torch.save(rnn_tel.state_dict(), 'RNN_finetuning_TEL.pkl')
#cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
!cp 'RNN_finetuning_TEL.pkl' 'drive/My Drive/Colab Notebooks/RNN_finetuning_TEL.pkl'

Epoch: [1/5], Step: [51/134], Validation Acc: 43.28358208955224, Training Loss: 1.8829325437545776
Epoch: [1/5], Step: [101/134], Validation Acc: 42.6865671641791, Training Loss: 1.1916170120239258
Epoch: [2/5], Step: [51/134], Validation Acc: 46.3681592039801, Training Loss: 0.5325435400009155
Epoch: [2/5], Step: [101/134], Validation Acc: 45.57213930348259, Training Loss: 0.7811086773872375
Epoch: [3/5], Step: [51/134], Validation Acc: 46.069651741293534, Training Loss: 0.33982160687446594
Epoch: [3/5], Step: [101/134], Validation Acc: 45.97014925373134, Training Loss: 0.15582451224327087
Epoch: [4/5], Step: [51/134], Validation Acc: 46.766169154228855, Training Loss: 0.26185742020606995
Epoch: [4/5], Step: [101/134], Validation Acc: 44.875621890547265, Training Loss: 0.03331920504570007
Epoch: [5/5], Step: [51/134], Validation Acc: 46.3681592039801, Training Loss: 0.021506931632757187
Epoch: [5/5], Step: [101/134], Validation Acc: 46.865671641791046, Training Loss: 0.008745975792407

In [52]:
test_model(tel_test_loader,rnn_tel)

45.57213930348259

In [53]:
num_epochs = 5
rnn_sla= RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_sla.cuda()
rnn_sla.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
fine_tuning_dic['sla'] = fine_tuning(sla_train_loader,sla_test_loader,rnn_sla )
torch.save(rnn_sla.state_dict(), 'RNN_finetuning_SLA.pkl')
#cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
!cp 'RNN_finetuning_SLA.pkl' 'drive/My Drive/Colab Notebooks/RNN_finetuning_SLA.pkl'

Epoch: [1/5], Step: [51/126], Validation Acc: 43.41317365269461, Training Loss: 1.3365195989608765
Epoch: [1/5], Step: [101/126], Validation Acc: 39.52095808383233, Training Loss: 1.0557008981704712
Epoch: [2/5], Step: [51/126], Validation Acc: 43.41317365269461, Training Loss: 0.5040886402130127
Epoch: [2/5], Step: [101/126], Validation Acc: 42.51497005988024, Training Loss: 0.6487178206443787
Epoch: [3/5], Step: [51/126], Validation Acc: 43.213572854291414, Training Loss: 0.16986176371574402
Epoch: [3/5], Step: [101/126], Validation Acc: 42.315369261477045, Training Loss: 0.16872018575668335
Epoch: [4/5], Step: [51/126], Validation Acc: 42.21556886227545, Training Loss: 0.025206491351127625
Epoch: [4/5], Step: [101/126], Validation Acc: 42.21556886227545, Training Loss: 0.032765839248895645
Epoch: [5/5], Step: [51/126], Validation Acc: 41.417165668662676, Training Loss: 0.01117626577615738
Epoch: [5/5], Step: [101/126], Validation Acc: 42.315369261477045, Training Loss: 0.00666425377

In [55]:
test_model(sla_test_loader,rnn_sla)

41.01796407185629

In [60]:
num_epochs = 5
rnn_gov= RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_gov.cuda()
rnn_gov.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
fine_tuning_dic['gov'] = fine_tuning(gov_train_loader,gov_test_loader,rnn_gov )
torch.save(rnn_gov.state_dict(), 'RNN_finetuning_GOV.pkl')
#cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
!cp 'RNN_finetuning_GOV.pkl' 'drive/My Drive/Colab Notebooks/RNN_finetuning_GOV.pkl'

Epoch: [1/5], Step: [51/122], Validation Acc: 45.57086614173228, Training Loss: 1.6344153881072998
Epoch: [1/5], Step: [101/122], Validation Acc: 46.06299212598425, Training Loss: 1.332634449005127
Epoch: [2/5], Step: [51/122], Validation Acc: 45.767716535433074, Training Loss: 0.4796903729438782
Epoch: [2/5], Step: [101/122], Validation Acc: 48.22834645669291, Training Loss: 0.9034083485603333
Epoch: [3/5], Step: [51/122], Validation Acc: 47.34251968503937, Training Loss: 0.18312059342861176
Epoch: [3/5], Step: [101/122], Validation Acc: 46.45669291338583, Training Loss: 0.22560104727745056
Epoch: [4/5], Step: [51/122], Validation Acc: 47.24409448818898, Training Loss: 0.033569324761629105
Epoch: [4/5], Step: [101/122], Validation Acc: 48.22834645669291, Training Loss: 0.02359027788043022
Epoch: [5/5], Step: [51/122], Validation Acc: 46.75196850393701, Training Loss: 0.011223532259464264
Epoch: [5/5], Step: [101/122], Validation Acc: 46.94881889763779, Training Loss: 0.013730734586715

In [59]:
test_model(gov_test_loader,rnn_gov)

48.326771653543304

In [61]:
num_epochs = 5
rnn_tra= RNN_mul(hidden_size = 800, num_layers = 1, num_classes = 3, pre_trained_emb = ft_emb)
use_cuda = True
if use_cuda and torch.cuda.is_available():
  rnn_tra.cuda()
rnn_tra.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/RNN_mul_hidden_size_800.pkl'))
fine_tuning_dic['tra'] = fine_tuning(tra_train_loader,tra_test_loader,rnn_gov )
torch.save(rnn_tra.state_dict(), 'RNN_finetuning_TRA.pkl')
#cnn_mul_record['hidden_size_{}'.format(hidden_size)] = (train_loss, val_acc)
!cp 'RNN_finetuning_TRA.pkl' 'drive/My Drive/Colab Notebooks/RNN_finetuning_TRA.pkl'

Epoch: [1/5], Step: [51/125], Validation Acc: 43.890020366598776, Training Loss: 1.2621415853500366
Epoch: [1/5], Step: [101/125], Validation Acc: 46.13034623217923, Training Loss: 0.9454455375671387
Epoch: [2/5], Step: [51/125], Validation Acc: 47.45417515274949, Training Loss: 0.35416141152381897
Epoch: [2/5], Step: [101/125], Validation Acc: 45.31568228105906, Training Loss: 0.5583660006523132
Epoch: [3/5], Step: [51/125], Validation Acc: 46.537678207739305, Training Loss: 0.09738357365131378
Epoch: [3/5], Step: [101/125], Validation Acc: 45.824847250509166, Training Loss: 0.04340731352567673
Epoch: [4/5], Step: [51/125], Validation Acc: 48.16700610997963, Training Loss: 0.012604370713233948
Epoch: [4/5], Step: [101/125], Validation Acc: 47.14867617107943, Training Loss: 0.0083855539560318
Epoch: [5/5], Step: [51/125], Validation Acc: 47.86150712830957, Training Loss: 0.008099891245365143
Epoch: [5/5], Step: [101/125], Validation Acc: 47.55600814663951, Training Loss: 0.004204504191

In [62]:
test_model(tra_test_loader,rnn_tra)

43.4826883910387