In [1]:
import csv 
import pandas as pd
import io

def load_vectors(fname):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    for line in fin:
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = [float(i) for i in tokens[1:]]
    return data

vecs = load_vectors('wiki-news-300d-1M.vec')

In [2]:
import numpy as np
vecs['<pad>'] = list(np.zeros((300)))
vecs['<unk>'] = list(np.random.normal(scale=0.4, size=(300, )))

In [3]:
train = pd.read_csv('snli_train.tsv', delimiter='\t')
premise = []
hyp = []
target = []
for i in range(len(train)):
    premise.append(train.iloc[i]['sentence1'].split())
    hyp.append(train.iloc[i]['sentence2'].split())
    target.append(train.iloc[i]['label'])
    
label_dcit = {'entailment': 0,  'neutral': 1, 'contradiction': 2}
labels = []
for label in target:
    labels.append(label_dcit[label])

In [4]:
valid = pd.read_csv('snli_val.tsv', delimiter='\t')
premise_val = []
hyp_val = []
target_val = []
for i in range(len(valid)):
    premise_val.append(valid.iloc[i]['sentence1'].split())
    hyp_val.append(valid.iloc[i]['sentence2'].split())
    target_val.append(valid.iloc[i]['label'])
    
label_dcit = {'entailment': 0,  'neutral': 1, 'contradiction': 2}
labels_val = []
for label in target_val:
    labels_val.append(label_dcit[label])

In [5]:
PAD_IDX = 0
UNK_IDX = 1
def build_vocab(sentences):
    # create vocab of words
    max_sent_len = max([len(sent[0]) for sent in sentences])
    word_dict = {}
    for sent in sentences:
        for word in sent:
            if word not in word_dict:
                word_dict[word] = ''
    word_set = list(set(word_dict))
    id2char = word_set
    char2id = dict(zip(word_set, range(2,2+len(word_set))))
    id2char = ['<pad>', '<unk>'] + id2char
    char2id['<pad>'] = PAD_IDX
    char2id['<unk>'] = UNK_IDX
    return char2id, id2char, max_sent_len

In [6]:
char2id, id2char, max_sent_len = build_vocab(sentences = premise + hyp)

In [7]:
from torch.utils.data import Dataset
class VocabDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, s1, s2, label, char2id):

        self.s1, self.s2, self.target_list = s1, s2, label
        assert (len(self.s1) == len(self.target_list) == len(self.s2))
        self.char2id = char2id


    def __len__(self):
        return len(self.s1)

    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        s1_idx = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.s1[key][:max_sent_len]]
        s2_idx = [self.char2id[c] if c in self.char2id.keys() else UNK_IDX  for c in self.s2[key][:max_sent_len]]
        label = self.target_list[key]
        return [s1_idx, len(s1_idx),s2_idx, len(s2_idx), label]

In [8]:
def collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all
    data have the same length
    """
    label_list = []
    s1 = []
    s1_len = []
    s2 = []
    s2_len = []

    for datum in batch:
        label_list.append(datum[4])
        s1_len.append(datum[1])
        s2_len.append(datum[3])
    # padding
    for datum in batch:
        padded_vec_s1 = np.pad(np.array(datum[0]),
                                pad_width=((0,max_sent_len-datum[1])),
                                mode="constant", constant_values=0)
        padded_vec_s2 = np.pad(np.array(datum[2]),
                                pad_width=((0,max_sent_len-datum[3])),
                                mode="constant", constant_values=0)
        s1.append(padded_vec_s1)
        s2.append(padded_vec_s2)
    
    ind_s1 = np.argsort(s1_len)[::-1]
    ind_s2 = np.argsort(s2_len)[::-1]
    s1 = np.array(s1)[ind_s1]
    s1_len = np.array(s1_len)[ind_s1]
    s2 = np.array(s2)[ind_s2]
    s2_len = np.array(s2_len)[ind_s2]
    label_list = np.array(label_list)
    return [torch.from_numpy(np.array(s1)), torch.LongTensor(s1_len), ind_s1, torch.from_numpy(np.array(s2)), torch.LongTensor(s2_len), ind_s2, torch.LongTensor(label_list)]


In [9]:
train_set =VocabDataset(premise, hyp, labels, char2id)
val_set =VocabDataset(premise_val, hyp_val, labels_val, char2id)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
BATCH_SIZE = 32
train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=True)

In [11]:
def inverse(perm):
    inverse = [1] * len(perm)
    for index, perm in enumerate(perm):
        inverse[perm] = index
    return inverse

In [12]:
def prep_emb_layer(batch_data, cold_w2v, emb_size = 300):
    embed = np.zeros((len(batch_data),max_sent_len, emb_size))
    for i in range(len(batch_data)):
        for j in range(len(batch_data[i])):
            try:
                embed[i,j,:] = np.array(cold_w2v[id2char[batch_data[i][j]]])
            except KeyError:
                embed[i,j,:] = np.array(cold_w2v['<unk>'])
    return torch.from_numpy(embed).float()

In [13]:
class BiGRU(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes, emb_size=300):
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        super(BiGRU, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.gru = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)

    def forward(self, data, lengths, sort_idx):

        # get embedding of characters
        embed = prep_emb_layer(data, vecs)
        embed = embed.to(device)
        # pack padded sequence
        sent_packed = torch.nn.utils.rnn.pack_padded_sequence(embed, lengths.numpy(), batch_first=True)
        # fprop though GRU
        _, hn = self.gru(sent_packed)
        cat = torch.cat((hn[0], hn[1]), 1)
        # unsort 
        reverse_idx = inverse(sort_idx)
        un_sort = cat.index_select(0, torch.LongTensor(reverse_idx).to(device))

        return un_sort

class Fc_GRU(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes, Fc_dim, dropout=False, interact=False):
        super(Fc_GRU, self).__init__()

        self.encoder = BiGRU(hidden_size, num_layers, num_classes, emb_size =300)
        self.interact = interact
        inputdim = 4*hidden_size
        if interact:
            inputdim = 6*hidden_size

        if not dropout:
            self.fc = nn.Sequential(
                    nn.Linear(inputdim, Fc_dim),
                    nn.ReLU(),
                    nn.Linear(Fc_dim, Fc_dim),
                    nn.ReLU(),
                    nn.Linear(Fc_dim, num_classes)
                    )
        else:
            self.fc = nn.Sequential(
                    nn.Dropout(),
                    nn.Linear(inputdim, Fc_dim),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(Fc_dim, Fc_dim),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(Fc_dim, num_classes)
                    )
    def forward(self, s1, l1, idx1, s2, l2, idx2):
        s1 = self.encoder(s1, l1, idx1)
        s2 = self.encoder(s2, l2, idx2)

        if not self.interact:
            cat = torch.cat((s1, s2), 1)
        else:
            cat = torch.cat((s1, s1*s2, s2), 1)
        logits = self.fc(cat)
        return logits

In [21]:
import torch.nn.functional as F
def test_GRU_model(data, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    total_loss = 0 
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    for (data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in data:
        data_1,data_2, label = \
            data_1.to(device),data_2.to(device), label.to(device)
        outputs = F.softmax(model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        total_loss += criterion(outputs, label).item()
        total += label.size(0)
        correct += predicted.eq(label.view_as(predicted)).sum().item()
    return (100 * correct / total), total_loss

In [15]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
train_acc_hidden_size = {}
train_loss_hidden_size = {}
val_acc_hidden_size = {}
val_loss_hidden_size = {}
num_pars = {}
for hid_size in [100, 200, 800, 1600]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_GRU(hidden_size=hid_size, num_layers=1, num_classes=3, Fc_dim = 300, dropout=False)
    num_pars[hid_size] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_GRU_model(val_loader , model)
        train_acc, train_loss =test_GRU_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_hidden_size[hid_size] = epoch_loss_train
    val_loss_hidden_size[hid_size] = epoch_loss_val
    train_acc_hidden_size[hid_size] = epoch_acc_train
    val_acc_hidden_size[hid_size] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [18]:
import pickle
pickle.dump( train_acc_hidden_size, open( "train_acc_GRU_hdn.p", "wb" ) )
pickle.dump( val_acc_hidden_size, open( "val_acc_GRU_hdn.p", "wb" ) )
pickle.dump( train_loss_hidden_size, open( "train_loss_GRU_hdn.p", "wb" ) )
pickle.dump( val_loss_hidden_size, open( "val_loss_GRU_hdn.p", "wb" ) )
pickle.dump( num_pars, open( "num_pars_GRU_hdn.p", "wb" ) )

In [None]:
train_acc_drop = {}
train_loss_drop = {}
val_acc_drop = {}
val_loss_drop = {}
num_pars_drop = {}
for drop in [True, False]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=drop)
    num_pars_drop[drop] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_GRU_model(val_loader , model)
        train_acc, train_loss =test_GRU_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_drop[drop] = epoch_loss_train
    val_loss_drop[drop] = epoch_loss_val
    train_acc_drop[drop] = epoch_acc_train
    val_acc_drop[drop] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!


In [None]:
pickle.dump( train_acc_drop, open( "train_acc_GRU_dp.p", "wb" ) )
pickle.dump( val_acc_drop, open( "val_acc_GRU_dp.p", "wb" ) )
pickle.dump( train_loss_drop, open( "train_loss_GRU_dp.p", "wb" ) )
pickle.dump( val_loss_drop, open( "val_loss_GRU_dp.p", "wb" ) )
pickle.dump( num_pars_drop, open( "num_pars_GRU_dp.p", "wb" ) )

In [33]:
train_acc_decay = {}
train_loss_decay = {}
val_acc_decay = {}
val_loss_decay = {}
num_pars_decay = {}
for decay in [1e-6, 2e-5, 2e-4]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True)
    num_pars_decay[decay] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = decay)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_GRU_model(val_loader , model)
        train_acc, train_loss =test_GRU_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_decay[decay] = epoch_loss_train
    val_loss_decay[decay] = epoch_loss_val
    train_acc_decay[decay] = epoch_acc_train
    val_acc_decay[decay] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [34]:
pickle.dump( train_acc_decay, open( "train_acc_GRU_decay.p", "wb" ) )
pickle.dump( val_acc_decay, open( "val_acc_GRU_decay.p", "wb" ) )
pickle.dump( train_loss_decay, open( "train_loss_GRU_decay.p", "wb" ) )
pickle.dump( val_loss_decay, open( "val_loss_GRU_decay.p", "wb" ) )
pickle.dump( num_pars_decay, open( "num_pars_GRU_decay.p", "wb" ) )

In [45]:
train_acc_int = {}
train_loss_int = {}
val_acc_int = {}
val_loss_int = {}
num_pars_int = {}
for interact in [True, False]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=interact)
    num_pars_int[interact] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-6)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_GRU_model(val_loader , model)
        train_acc, train_loss =test_GRU_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_int[interact] = epoch_loss_train
    val_loss_int[interact] = epoch_loss_val
    train_acc_int[interact] = epoch_acc_train
    val_acc_int[interact] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [46]:
pickle.dump( train_acc_int, open( "train_acc_GRU_int.p", "wb" ) )
pickle.dump( val_acc_int, open( "val_acc_GRU_int.p", "wb" ) )
pickle.dump( train_loss_int, open( "train_loss_GRU_int.p", "wb" ) )
pickle.dump( val_loss_int, open( "val_loss_GRU_int.p", "wb" ) )
pickle.dump( num_pars_int, open( "num_pars_GRU_int.p", "wb" ) )

In [107]:
val_acc_drop

{True: [62.3,
  65.2,
  68.0,
  70.1,
  70.9,
  71.2,
  70.0,
  70.6,
  69.9,
  70.3,
  71.6,
  72.8,
  70.8,
  70.3,
  71.2],
 False: [64.3,
  68.3,
  69.5,
  69.4,
  71.3,
  72.4,
  70.8,
  71.7,
  71.6,
  69.6,
  70.5,
  70.3,
  70.1,
  70.1,
  68.8]}

In [17]:
class CNN(nn.Module):
    def __init__(self, hidden_size, num_layers = 2, k_size = 3, num_classes =3 , emb_size =300):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.conv1 = nn.Sequential(
            nn.Conv1d(emb_size, hidden_size, kernel_size=k_size, padding=1),
            nn.ReLU(inplace=True)
            )
        self.conv2 = nn.Sequential(
            nn.Conv1d(hidden_size, hidden_size, kernel_size=k_size, padding=1),
            nn.ReLU(inplace=True)
            )

    def forward(self, x, idx):
        batch_size, seq_len = x.size()
        reverse_idx = inverse(idx)
        x = x[reverse_idx]
        embed = prep_emb_layer(x, vecs)
        embed = embed.to(device)
        embed = embed.transpose(1,2).contiguous()
        hidden = self.conv1(embed)        
        hidden = self.conv2(hidden)        
        return torch.max(hidden, 2)[0]

class Fc_CNN(nn.Module):
    def __init__(self, hidden_size, num_layers, Fc_dim, k_size = 3, num_classes =3, dropout=False, interact = False):
        super(Fc_CNN, self).__init__()
        self.interact = interact
        self.encoder = CNN(hidden_size, num_layers, k_size = k_size)
        inputdim = 2*hidden_size
        if interact:
            inputdim = 3*hidden_size
        if not dropout:
            self.fc = nn.Sequential(
                    nn.Linear(inputdim, Fc_dim),
                    nn.ReLU(),
                    nn.Linear(Fc_dim, Fc_dim),
                    nn.ReLU(),
                    nn.Linear(Fc_dim, num_classes)
                    )
        else:
            self.fc = nn.Sequential(
                    nn.Dropout(),
                    nn.Linear(inputdim, Fc_dim),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(Fc_dim, Fc_dim),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(Fc_dim, num_classes)
                    )

    def forward(self, s1, idx1, s2, idx2):
        s1 = self.encoder(s1, idx1)
        s2 = self.encoder(s2, idx2)
        if self.interact:
            cat = torch.cat((s1,s1*s2, s2), 1)
        else:
            cat = torch.cat((s1, s2), 1)
        logits = self.fc(cat)
        return logits

In [20]:
def test_CNN_model(data, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    total_loss = 0 
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    for (data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in data:
        data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
        outputs = F.softmax(model(data_1, idx_1, data_2, idx_2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        total_loss += criterion(outputs, label).item()
        total += label.size(0)
        correct += predicted.eq(label.view_as(predicted)).sum().item()
    return (100 * correct / total), total_loss 

In [40]:
train_acc_hidden_size_cnn = {}
train_loss_hidden_size_cnn = {}
val_acc_hidden_size_cnn = {}
val_loss_hidden_size_cnn = {}
num_pars_cnn = {}
for hid_size in [100, 300, 1200, 2400]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_CNN(hidden_size=hid_size, num_layers=2, num_classes=3, Fc_dim = 300, dropout=False)
    num_pars_cnn[hid_size] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, idx_1, data_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_CNN_model(val_loader , model)
        train_acc, train_loss =test_CNN_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_hidden_size_cnn[hid_size] = epoch_loss_train
    val_loss_hidden_size_cnn[hid_size] = epoch_loss_val
    train_acc_hidden_size_cnn[hid_size] = epoch_acc_train
    val_acc_hidden_size_cnn[hid_size] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [41]:
pickle.dump( train_acc_hidden_size_cnn, open( "train_acc_CNN_hdn.p", "wb" ) )
pickle.dump( val_acc_hidden_size_cnn, open( "val_acc_CNN_hdn.p", "wb" ) )
pickle.dump( train_loss_hidden_size_cnn, open( "train_loss_CNN_hdn.p", "wb" ) )
pickle.dump( val_loss_hidden_size_cnn, open( "val_loss_CNN_hdn.p", "wb" ) )
pickle.dump( num_pars_cnn, open( "num_pars_CNN_hdn.p", "wb" ) )

In [48]:
val_acc_hidden_size_cnn

{100: [62.6,
  65.9,
  66.3,
  68.1,
  69.6,
  67.7,
  69.0,
  67.9,
  67.7,
  69.1,
  69.5,
  68.3,
  67.8,
  68.2,
  68.6],
 300: [64.9,
  67.0,
  71.0,
  70.6,
  69.1,
  70.9,
  68.2,
  68.0,
  69.8,
  67.6,
  70.0,
  67.7,
  67.1,
  66.6,
  67.9],
 1200: [66.2,
  67.5,
  71.1,
  70.9,
  69.1,
  69.6,
  70.4,
  70.6,
  69.4,
  70.1,
  70.2,
  70.1,
  68.4,
  70.0,
  70.4],
 2400: [66.3,
  69.4,
  68.9,
  71.3,
  71.5,
  70.6,
  68.5,
  69.5,
  70.2,
  69.8,
  69.6,
  71.1,
  71.0,
  71.4,
  69.5]}

In [90]:
train_acc_k_size_cnn = {}
train_loss_k_size_cnn = {}
val_acc_k_size_cnn = {}
val_loss_k_size_cnn = {}
num_pars_k_size_cnn = {}
for k_size in [1, 2, 3]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = k_size, dropout=False)
    num_pars_k_size_cnn[k_size] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, idx_1, data_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_CNN_model(val_loader , model)
        train_acc, train_loss =test_CNN_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_k_size_cnn[k_size] = epoch_loss_train
    val_loss_k_size_cnn[k_size] = epoch_loss_val
    train_acc_k_size_cnn[k_size] = epoch_acc_train
    val_acc_k_size_cnn[k_size] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [91]:
pickle.dump( train_acc_k_size_cnn, open( "train_acc_CNN_ker.p", "wb" ) )
pickle.dump( val_acc_k_size_cnn, open( "val_acc_CNN_ker.p", "wb" ) )
pickle.dump( train_loss_k_size_cnn, open( "train_loss_CNN_ker.p", "wb" ) )
pickle.dump( val_loss_k_size_cnn, open( "val_loss_CNN_ker.p", "wb" ) )
pickle.dump( num_pars_k_size_cnn, open( "num_pars_CNN_ker.p", "wb" ) )

In [92]:
val_acc_k_size_cnn

{1: [66.6,
  70.3,
  70.4,
  73.6,
  72.2,
  70.8,
  72.3,
  72.4,
  74.2,
  73.6,
  71.0,
  71.6,
  71.8,
  71.5,
  69.9],
 2: [67.4,
  69.2,
  70.4,
  70.7,
  71.0,
  70.7,
  71.0,
  69.9,
  68.8,
  69.0,
  70.6,
  68.9,
  68.8,
  69.4,
  69.4],
 3: [67.1,
  67.6,
  69.5,
  70.3,
  68.5,
  68.5,
  69.2,
  69.7,
  69.3,
  70.1,
  68.7,
  68.2,
  67.8,
  68.1,
  70.4]}

In [93]:
train_acc_int_cnn = {}
train_loss_int_cnn = {}
val_acc_int_cnn = {}
val_loss_int_cnn = {}
num_pars_int_cnn = {}
for interact in [True, False]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = 1, dropout=False, interact = interact)
    num_pars_int_cnn[interact] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, idx_1, data_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_CNN_model(val_loader , model)
        train_acc, train_loss =test_CNN_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_int_cnn[interact] = epoch_loss_train
    val_loss_int_cnn[interact] = epoch_loss_val
    train_acc_int_cnn[interact] = epoch_acc_train
    val_acc_int_cnn[interact] = epoch_acc_val

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [94]:
pickle.dump( train_acc_int_cnn, open( "train_acc_CNN_int.p", "wb" ) )
pickle.dump( val_acc_int_cnn, open( "val_acc_CNN_int.p", "wb" ) )
pickle.dump( train_loss_int_cnn, open( "train_loss_CNN_int.p", "wb" ) )
pickle.dump( val_loss_int_cnn, open( "val_loss_CNN_int.p", "wb" ) )
pickle.dump( num_pars_int_cnn, open( "num_pars_CNN_int.p", "wb" ) )

In [108]:
train_acc_reg_cnn = {}
train_loss_reg_cnn = {}
val_acc_reg_cnn = {}
val_loss_reg_cnn = {}
num_pars_reg_cnn = {}
for reg in [True, False]:
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = 1, dropout=reg, interact = True)
    num_pars_reg_cnn[reg] = count_parameters(model)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 20
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-6)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, idx_1, data_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_CNN_model(val_loader , model)
        train_acc, train_loss =test_CNN_model(train_loader , model)
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    train_loss_reg_cnn[reg] = epoch_loss_train
    val_loss_reg_cnn[reg] = epoch_loss_val
    train_acc_reg_cnn[reg] = epoch_acc_train
    val_acc_reg_cnn[reg] = epoch_acc_val
    

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 15 done!
Epoch 16 done!
Epoch 17 done!
Epoch 18 done!
Epoch 19 done!
Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!
Epoch 15 done!
Epoch 16 done!
Epoch 17 done!
Epoch 18 done!
Epoch 19 done!


In [109]:
pickle.dump( train_acc_reg_cnn, open( "train_acc_CNN_reg.p", "wb" ) )
pickle.dump( val_acc_reg_cnn, open( "val_acc_CNN_reg.p", "wb" ) )
pickle.dump( train_loss_reg_cnn, open( "train_loss_CNN_reg.p", "wb" ) )
pickle.dump( val_loss_reg_cnn, open( "val_loss_CNN_reg.p", "wb" ) )
pickle.dump( num_pars_reg_cnn, open( "num_pars_CNN_reg.p", "wb" ) )

In [110]:
val_acc_reg_cnn

{True: [64.6,
  68.4,
  68.5,
  71.3,
  70.9,
  71.3,
  73.7,
  72.9,
  74.2,
  73.5,
  74.5,
  74.6,
  74.1,
  75.3,
  73.7,
  74.8,
  74.9,
  74.5,
  75.7,
  74.2],
 False: [68.8,
  73.0,
  74.0,
  76.6,
  77.1,
  78.7,
  78.0,
  77.9,
  76.0,
  75.6,
  76.8,
  76.0,
  75.7,
  75.7,
  76.3,
  75.0,
  74.9,
  75.9,
  74.3,
  75.0]}

In [99]:
def find_best_BiGru():
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=True)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-6)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_GRU_model(val_loader , model)
        train_acc, train_loss =test_GRU_model(train_loader , model)
        try:
            if val_acc > max(epoch_acc_val):
                torch.save(model.state_dict(), 'my_BiGRU.pth')
        except ValueError:
            val_acc = val_acc
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    return epoch_acc_val, epoch_acc_train, epoch_loss_val, epoch_loss_train

In [100]:
my_Gru_acc_val, my_Gru_acc_train, my_Gru_los_val, my_Gru_los_train = find_best_BiGru()

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [101]:
my_best_gru = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=True)
my_best_gru = my_best_gru.to(device)
my_best_gru.load_state_dict(torch.load('my_BiGRU.pth'))
san_check, _ = test_GRU_model(val_loader , my_best_gru)

In [102]:
san_check

73.5

In [103]:
my_Gru_acc_val

[63.0,
 67.6,
 69.1,
 70.0,
 71.3,
 71.6,
 72.8,
 72.7,
 73.5,
 72.7,
 72.6,
 72.9,
 72.7,
 72.1,
 73.3]

In [133]:
def find_best_CNN():
    epoch_acc_train = []
    epoch_loss_train = []
    epoch_acc_val = []
    epoch_loss_val = []
    model = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = 1, dropout=False, interact = True)
    
    model = model.to(device)
    learning_rate = 3e-4
    num_epochs = 15
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-6)
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
            data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
            model.train()
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data_1, idx_1, data_2, idx_2)
            loss = criterion(outputs, label)

            # Backward and optimize
            loss.backward()
            optimizer.step()
        val_acc, val_loss = test_CNN_model(val_loader , model)
        train_acc, train_loss =test_CNN_model(train_loader , model)
        try:
            if val_acc > max(epoch_acc_val):
                torch.save(model.state_dict(), 'my_CNN.pth')
        except ValueError:
            torch.save(model.state_dict(), 'my_CNN.pth')
        epoch_acc_val.append(val_acc)
        epoch_acc_train.append(train_acc)
        epoch_loss_val.append(val_loss)
        epoch_loss_train.append(train_loss)
        print("Epoch {} done!".format(epoch))
    epoch_acc_val.append(val_acc)
    epoch_acc_train.append(train_acc)
    epoch_loss_val.append(val_loss)
    epoch_loss_train.append(train_loss)
    return epoch_acc_val, epoch_acc_train, epoch_loss_val, epoch_loss_train

In [134]:
my_cnn_acc_val, my_cnn_acc_train, my_cnn_los_val, my_cnn_los_train = find_best_CNN()

Epoch 0 done!
Epoch 1 done!
Epoch 2 done!
Epoch 3 done!
Epoch 4 done!
Epoch 5 done!
Epoch 6 done!
Epoch 7 done!
Epoch 8 done!
Epoch 9 done!
Epoch 10 done!
Epoch 11 done!
Epoch 12 done!
Epoch 13 done!
Epoch 14 done!


In [24]:
my_best_cnn = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = 1, dropout=False, interact = True)
my_best_cnn = my_best_cnn.to(device)
my_best_cnn.load_state_dict(torch.load('my_CNN.pth'))
san_check, _ = test_CNN_model(val_loader , my_best_cnn)

In [25]:
san_check

76.5

In [104]:
def print_GRU_predictions(loader, model, m_type= 'GRU'):
    # use this function to print prediction samples
    correct = {}
    wrong = {}
    model.eval()
    for data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label in loader:
        data_1,data_2, label = \
            data_1.to(device),data_2.to(device), label.to(device)
        if m_type == 'GRU':
            outputs = F.softmax(model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2), dim=1)
        elif m_type == 'CNN':
            outputs = F.softmax(model(data_1, idx_1, data_2, idx_2), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        for ind in range(32):
            # record correct indices data to the correct file
            if (predicted.tolist()[ind][0] == label.data.tolist()[ind]):
                correct[ind] = {}
                correct[ind]['s1'] = (data_1[ind].tolist())
                correct[ind]['s2'] = (data_2[ind].tolist())
                correct[ind]['lb'] = (label[ind].tolist())
            else:
                wrong[ind] = {}
                wrong[ind]['s1'] = (data_1[ind].tolist())
                wrong[ind]['s2'] = (data_2[ind].tolist())
                wrong[ind]['lb'] = (label[ind].tolist())
        # stop when we have enough data
        if (len(correct) > 3) and (len(wrong) > 3):
            break
    return correct, wrong

In [105]:
GRU_cor, GRU_wro = print_GRU_predictions(val_loader , my_best_gru, 'GRU')

In [106]:
CNN_cor, CNNwro = print_GRU_predictions(val_loader , my_best_cnn, 'CNN')

In [107]:
def id_2_sentence(meta):
    s1, s2 = [], []
# extract meaningful word indices
    for id in meta['s1']:
        # stop at padding indices
        if id == 0:
            break
        # transform indices to text
        s1.append(id2char[id])
    for id in meta['s2']:
        # stop at padding indices
        if id == 0:
            break
        # transform indices to text
        s2.append(id2char[id])
    return [' '.join(s1)], [' '.join(s2)], ('entitlement' if meta['lb']==0 else ( 'neutral' if meta['lb']==1 else 'contradiction'))

In [108]:
for (count,idx) in enumerate(GRU_cor):
    print(id_2_sentence(GRU_cor[idx]))
    if count == 2: break

(['A building that <unk> beautiful architecture stands in the sunlight as somebody on a bike passes by'], ['Three sisters , barefoot in pink dresses and who range in age from preschool to teenager'], 'contradiction')
(['A woman is looking at a pamphlet talking to someone , while a guy takes photos in'], ['The boy in the black and red swimsuit is about to go swimming'], 'entitlement')
(['A blond-haired lady with a gray tank top and jeans is holding her phone while she puts'], ['A woman waits for her husband to come home for dinner .'], 'entitlement')


In [109]:
for (count,idx) in enumerate(GRU_wro):
    print(id_2_sentence(GRU_wro[idx]))
    if count == 2: break

(['A gentleman in a striped shirt gesturing with a <unk> object in his hand while passersby stare'], ['A group of dancers is about to dance for a school competition'], 'entitlement')
(['Two boys are swimming underwater in a pool and a girl is swimming in the background .'], ['A man is talking to his wife on his cellphone .'], 'contradiction')
(['A barefooted adolescent boy in a yellow shirt reaching the top of a small skateboarding ramp .'], ['No one is sliding down a water slide .'], 'neutral')


In [110]:
for (count,idx) in enumerate(CNN_cor):
    print(id_2_sentence(CNN_cor[idx]))
    if count == 2: break

(['A group of dancers with green shirts are all holding hands in a circle , one lady'], ['A child is playing with a toy airplane that was made by his grandfather .'], 'neutral')
(['A man in a blue shirt and blue jeans rides a dark brown horse with white feet'], ['A live band on a lawn jamming out for the holiday crowd .'], 'neutral')
(['A man who has a gray beard and gray hair laughs while wearing a purple shirt .'], ['A fire engine shoots water at a young man with an umbrella .'], 'entitlement')


In [111]:
for (count,idx) in enumerate(CNNwro):
    print(id_2_sentence(CNNwro[idx]))
    if count == 2: break

(['A woman , wearing a white shirt and green shorts , sitting on a rock in a'], ['A man is laughing at a woman who has fallen over .'], 'neutral')
(['A young man shielding himself from a stream of water from a fire engine by using an'], ['A group of dancers is about to dance for a school competition'], 'contradiction')
(['A person with dark hair is standing on the sidewalk in front of an orange and white'], ['A boy is waiting in line for the Ferris Wheel .'], 'entitlement')


In [112]:
mul_val = pd.read_csv('mnli_val.tsv', delimiter='\t')

In [116]:
def get_acc_p_genre(model='GRU', genre_data = mul_val):
    gens = genre_data['genre'].unique()
    gen_acc = {}
    for gen in gens:
        gen_data = (genre_data[genre_data['genre'] == gen]).drop(['genre'], axis=1)
        premise = []
        hyp = []
        target = []
        for i in range(len(gen_data)):
            premise.append(gen_data.iloc[i]['sentence1'].split())
            hyp.append(gen_data.iloc[i]['sentence2'].split())
            target.append(gen_data.iloc[i]['label'])
        label_dcit = {'entailment': 0,  'neutral': 1, 'contradiction': 2}
        labels = []
        for label in target:
            labels.append(label_dcit[label])
        token = VocabDataset(premise, hyp, labels, char2id)
        gen_loader = torch.utils.data.DataLoader(dataset=token, batch_size=BATCH_SIZE,
                                                   collate_fn=collate_func, shuffle=True)
        if model == 'GRU':
            my_mod = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=True)
            my_mod = my_best_gru.to(device)
            my_mod.load_state_dict(torch.load('my_BiGRU.pth'))
            acc, _ = test_GRU_model(gen_loader , my_mod)            
        elif model == 'CNN':
            my_mod = Fc_CNN(hidden_size=1200, num_layers=2, num_classes=3, Fc_dim = 300, k_size = 1, dropout=False, interact = True)
            my_mod = my_best_cnn.to(device)
            my_mod.load_state_dict(torch.load('my_CNN.pth'))
            acc, _ = test_CNN_model(gen_loader , my_mod)
            
        gen_acc[gen] = acc
    return gen_acc  

In [120]:
GRU_genre = get_acc_p_genre('GRU')

In [121]:
GRU_genre

{'fiction': 49.447236180904525,
 'telephone': 44.37810945273632,
 'slate': 46.007984031936125,
 'government': 44.58661417322835,
 'travel': 45.010183299389}

In [122]:
CNN_genre = get_acc_p_genre('CNN')

In [123]:
CNN_genre

{'fiction': 41.50753768844221,
 'telephone': 42.18905472636816,
 'slate': 38.82235528942116,
 'government': 39.76377952755905,
 'travel': 37.78004073319756}

In [124]:
mul_train = pd.read_csv('mnli_train.tsv', delimiter='\t')

In [130]:
def fine_tune_GRU_p_genre(multi_train, multi_val):
    gens = multi_train['genre'].unique()
    gen_acc = {}
    for gen in gens:
        train_data = (multi_train[multi_train['genre'] == gen]).drop(['genre'], axis=1)
        premise = []
        hyp = []
        target = []
        for i in range(len(train_data)):
            premise.append(train_data.iloc[i]['sentence1'].split())
            hyp.append(train_data.iloc[i]['sentence2'].split())
            target.append(train_data.iloc[i]['label'])
        label_dcit = {'entailment': 0,  'neutral': 1, 'contradiction': 2}
        labels = []
        for label in target:
            labels.append(label_dcit[label])
        train_loader = torch.utils.data.DataLoader(dataset=VocabDataset(premise, hyp, labels, char2id), 
                                                   batch_size=BATCH_SIZE,
                                                   collate_fn=collate_func, shuffle=True)
        
        val_data = (multi_val[multi_val['genre'] == gen]).drop(['genre'], axis=1)
        premise = []
        hyp = []
        target = []
        for i in range(len(val_data)):
            premise.append(val_data.iloc[i]['sentence1'].split())
            hyp.append(val_data.iloc[i]['sentence2'].split())
            target.append(val_data.iloc[i]['label'])
        labels = []
        for label in target:
            labels.append(label_dcit[label])
        val_loader = torch.utils.data.DataLoader(dataset=VocabDataset(premise, hyp, labels, char2id), 
                                                 batch_size=BATCH_SIZE,
                                                   collate_fn=collate_func, shuffle=True)
        
        epoch_acc_val = []
        model = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=True)
        model = model.to(device)
        model.load_state_dict(torch.load('my_BiGRU.pth'))

        learning_rate = 5e-5
        num_epochs = 20
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-6)
        total_step = len(train_loader)
        for epoch in range(num_epochs):
            for i,(data_1, lengths_1, idx_1,data_2, lengths_2, idx_2, label) in enumerate(train_loader):
                data_1,data_2, label = data_1.to(device),data_2.to(device), label.to(device)
                model.train()
                optimizer.zero_grad()
                # Forward pass
                outputs = model(data_1, lengths_1, idx_1, data_2, lengths_2, idx_2)
                loss = criterion(outputs, label)

                # Backward and optimize
                loss.backward()
                optimizer.step()
            val_acc, _ = test_GRU_model(val_loader , model)
            try:
                if val_acc > max(epoch_acc_val):
                    torch.save(model.state_dict(), 'best_GRU_4_' + gen + '.pth')
            except ValueError:
                torch.save(model.state_dict(), 'best_GRU_4_' + gen + '.pth')
            epoch_acc_val.append(val_acc)
        gen_acc[gen] = epoch_acc_val
    return gen_acc

In [131]:
multi_res= fine_tune_GRU_p_genre(mul_train, mul_val)

In [132]:
multi_res

{'telephone': [50.9452736318408,
  50.646766169154226,
  50.9452736318408,
  52.039800995024876,
  52.33830845771144,
  52.83582089552239,
  54.22885572139303,
  52.83582089552239,
  53.333333333333336,
  53.53233830845771,
  53.333333333333336,
  54.42786069651741,
  53.134328358208954,
  54.62686567164179,
  54.32835820895522,
  54.22885572139303,
  54.42786069651741,
  54.12935323383085,
  53.53233830845771,
  53.73134328358209],
 'fiction': [50.25125628140704,
  52.06030150753769,
  52.462311557788944,
  52.462311557788944,
  51.959798994974875,
  52.06030150753769,
  52.96482412060301,
  52.663316582914575,
  53.869346733668344,
  54.07035175879397,
  54.472361809045225,
  54.2713567839196,
  54.2713567839196,
  55.27638190954774,
  55.879396984924625,
  55.27638190954774,
  56.28140703517588,
  54.77386934673367,
  56.28140703517588,
  56.18090452261306],
 'slate': [46.007984031936125,
  47.80439121756487,
  47.10578842315369,
  47.0059880239521,
  48.30339321357285,
  48.2035928

In [133]:
def x_gen_eval(genre_data = mul_val):
    gens = genre_data['genre'].unique()
    gen_acc = {}
    for gen in gens:
        x_gen_acc = {}
        for x_gen in gens:
            if x_gen == gen:
                continue
            gen_data = (genre_data[genre_data['genre'] == x_gen]).drop(['genre'], axis=1)
            premise = []
            hyp = []
            target = []
            for i in range(len(gen_data)):
                premise.append(gen_data.iloc[i]['sentence1'].split())
                hyp.append(gen_data.iloc[i]['sentence2'].split())
                target.append(gen_data.iloc[i]['label'])
            labels = []
            for label in target:
                labels.append(label_dcit[label])
            token = VocabDataset(premise, hyp, labels, char2id)
            gen_loader = torch.utils.data.DataLoader(dataset=token, batch_size=BATCH_SIZE,
                                                       collate_fn=collate_func, shuffle=True)
            my_mod = Fc_GRU(hidden_size=800, num_layers=1, num_classes=3, Fc_dim = 300, dropout=True, interact=True)
            my_mod = my_best_gru.to(device)
            my_mod.load_state_dict(torch.load( 'best_GRU_4_' + gen + '.pth'))
            acc, _ = test_GRU_model(gen_loader , my_mod)            
            x_gen_acc[x_gen] = acc
        gen_acc[gen] = x_gen_acc
    return gen_acc  

In [135]:
x_gen_val = x_gen_eval()

In [136]:
x_gen_val

{'fiction': {'telephone': 49.25373134328358,
  'slate': 47.50499001996008,
  'government': 49.803149606299215,
  'travel': 49.287169042769854},
 'telephone': {'fiction': 52.06030150753769,
  'slate': 47.80439121756487,
  'government': 51.27952755905512,
  'travel': 50.91649694501018},
 'slate': {'fiction': 51.05527638190955,
  'telephone': 51.54228855721393,
  'government': 52.16535433070866,
  'travel': 51.12016293279022},
 'government': {'fiction': 50.95477386934673,
  'telephone': 50.44776119402985,
  'slate': 47.70459081836327,
  'travel': 51.62932790224033},
 'travel': {'fiction': 51.65829145728643,
  'telephone': 51.243781094527364,
  'slate': 47.405189620758485,
  'government': 54.52755905511811}}