In [1]:
# First lets improve libraries that we are going to be used in this lab session
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from collections import Counter
import pickle as pkl
import random
import pdb
random.seed(134)
import pandas as pd

PAD_IDX = 0
UNK_IDX = 1
BATCH_SIZE = 32

In [2]:
from collections import Counter

# max_vocab_size = 25000
PAD_IDX = 0
UNK_IDX = 1

def build_vocab(all_tokens):
    token_counter = Counter(all_tokens)
#     print(token_counter)
    vocab = token_counter.keys()
    id2token = list(vocab)
    token2id = dict(zip(vocab, range(2,2+len(vocab)))) 
    id2token = ['<pad>', '<unk>'] + id2token
    token2id['<pad>'] = PAD_IDX 
    token2id['<unk>'] = UNK_IDX
    return token2id, id2token

def read_data(fine_name):
    df = pd.read_csv(fine_name,sep='\t')
    return df

In [3]:
df_test = pd.read_csv("mnli_val.tsv",sep="\t")

In [4]:
df_test.head()

Unnamed: 0,sentence1,sentence2,label,genre
0,"'Not entirely , ' I snapped , harsher than int...",I spoke more harshly than I wanted to .,entailment,fiction
1,cook and then the next time it would be my tur...,I would cook and then the next turn would be h...,contradiction,telephone
2,The disorder hardly seemed to exist before the...,The disorder did n't seem to be as common when...,entailment,slate
3,"The Report and Order , in large part , adopts ...",The Report and Order ignores recommendations f...,contradiction,government
4,"IDPA 's OIG 's mission is to prevent , detect ...",IDPA 's OIG 's mission is clear and cares abou...,entailment,government


In [5]:
df_test.shape

(5000, 4)

In [6]:
df_test['genre'].describe()

count           5000
unique             5
top       government
freq            1016
Name: genre, dtype: object

In [7]:
def tokenize(df):
    df['sentence1'] = df['sentence1'].apply(lambda x: [a.lower() for a in x.split(' ')])
    df['sentence2'] = df['sentence2'].apply(lambda x : [a.lower() for a in x.split(' ')])
    
    return df

In [8]:
df_test = tokenize(df_test)

In [9]:
df_test.head()

Unnamed: 0,sentence1,sentence2,label,genre
0,"['not, entirely, ,, ', i, snapped, ,, harsher,...","[i, spoke, more, harshly, than, i, wanted, to, .]",entailment,fiction
1,"[cook, and, then, the, next, time, it, would, ...","[i, would, cook, and, then, the, next, turn, w...",contradiction,telephone
2,"[the, disorder, hardly, seemed, to, exist, bef...","[the, disorder, did, n't, seem, to, be, as, co...",entailment,slate
3,"[the, report, and, order, ,, in, large, part, ...","[the, report, and, order, ignores, recommendat...",contradiction,government
4,"[idpa, 's, oig, 's, mission, is, to, prevent, ...","[idpa, 's, oig, 's, mission, is, clear, and, c...",entailment,government


In [10]:
words_to_load = 100000

import numpy as np

with open('wiki-news-300d-1M.vec') as f:
    loaded_embeddings = np.zeros((words_to_load+2, 300))
    words = {}
    idx2words = {}
    ordered_words = ['<pad>','<unk>']
    i = 2
    for line in f:
        if i-2 >= words_to_load: 
            break
        s = line.split()
        loaded_embeddings[i, :] = np.asarray(s[1:])
        words[s[0]] = i
        idx2words[i] = s[0]
        ordered_words.append(s[0])
        i = i+1

In [11]:
words['<pad>'] = 0
words['<unk>'] = 1

In [12]:
idx2words[0] = "<pad>"
idx2words[1] = "<unk>"

In [13]:
# words
def apply_idz(x):
    temp = []
    for a in x:
        if a in ordered_words:
            temp.append(words[a])
        else:
            temp.append(1)
    return temp

In [14]:
def idize(df):
    df['sentence1_idz'] = df["sentence1"].apply(apply_idz)
    df['sentence2_idz'] = df["sentence2"].apply(apply_idz)
    return df

In [15]:
df_test = idize(df_test)

In [16]:
unk_vec = np.random.normal(size=300)
pad_vec = np.random.normal(size=300)

In [17]:
loaded_embeddings[0,:] = pad_vec
# np.insert(loaded_embeddings,0,pad_vec,axis=0)
loaded_embeddings[1,:] = unk_vec
# np.insert(loaded_embeddings,1,unk_vec,axis=0)

In [18]:
def encode_target(train_train):
    train_train['label'][train_train['label']=='neutral']=0
    train_train['label'][train_train['label']=='entailment']=1
    train_train['label'][train_train['label']=='contradiction']=2
    return train_train

In [19]:
df_test =encode_target(df_test)

In [20]:
df_test.head()

Unnamed: 0,sentence1,sentence2,label,genre,sentence1_idz,sentence2_idz
0,"['not, entirely, ,, ', i, snapped, ,, harsher,...","[i, spoke, more, harshly, than, i, wanted, to, .]",1,fiction,"[1, 1862, 3, 26, 673, 13671, 3, 32878, 103, 25...","[673, 3528, 54, 26749, 103, 673, 1198, 8, 5]"
1,"[cook, and, then, the, next, time, it, would, ...","[i, would, cook, and, then, the, next, turn, w...",2,telephone,"[7496, 6, 90, 4, 425, 66, 22, 84, 41, 96, 954,...","[673, 84, 7496, 6, 90, 4, 425, 954, 84, 41, 45..."
2,"[the, disorder, hardly, seemed, to, exist, bef...","[the, disorder, did, n't, seem, to, be, as, co...",1,slate,"[4, 4788, 3685, 2567, 8, 1296, 121, 4, 40018, ...","[4, 4788, 155, 1, 1354, 8, 41, 21, 640, 74, 1,..."
3,"[the, report, and, order, ,, in, large, part, ...","[the, report, and, order, ignores, recommendat...",2,government,"[4, 301, 6, 573, 3, 9, 543, 277, 3, 22355, 4, ...","[4, 301, 6, 573, 12354, 2866, 29, 5436, 1969, ..."
4,"[idpa, 's, oig, 's, mission, is, to, prevent, ...","[idpa, 's, oig, 's, mission, is, clear, and, c...",1,government,"[1, 27, 1, 27, 1301, 16, 8, 1794, 3, 7763, 3, ...","[1, 27, 1, 27, 1301, 16, 478, 6, 11894, 49, 27..."


In [40]:
class SnliDataset(Dataset):
    def __init__(self, df):
        self.df = df
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        s1_full = self.df.iloc[idx]['sentence1']
        s2_full = self.df.iloc[idx]['sentence2']
        s1 = self.df.iloc[idx]['sentence1_idz']
        s2 = self.df.iloc[idx]['sentence2_idz']
        tar = self.df.iloc[idx]['label']
        len1 = len(s1)
        len2 = len(s2)
#         print(idx)
        return [s1,len1,s2,len2,tar,idx]
#         return [s1,len1,s2,len2,tar,s1_full,s2_full]

In [41]:
MAX_LEN = 25

In [42]:
def vocab_collate_func(batch):
    data_list_s1 = []
    data_list_s2 = []
    label_list = []
    length_list_s1 = []
    length_list_s2 = []
    idx = []
    s1_full = []
    s2_full = []
    for datum in batch:
        label_list.append(datum[4])
        length_list_s1.append(datum[1])
        length_list_s2.append(datum[3])
        idx.append(datum[5])
#         s1_full.append()
    # padding
    for datum in batch:
        if datum[1]>MAX_LEN:
            padded_vec_s1 = np.array(datum[0])[:MAX_LEN]
        else:
            padded_vec_s1 = np.pad(np.array(datum[0]),
                                pad_width=((0,MAX_LEN - datum[1])),
                                mode="constant", constant_values=0)
        if datum[3]>MAX_LEN:
            padded_vec_s2 = np.array(datum[2])[:MAX_LEN]
        else:
            padded_vec_s2 = np.pad(np.array(datum[2]),
                                pad_width=((0,MAX_LEN - datum[3])),
                                mode="constant", constant_values=0)
        data_list_s1.append(padded_vec_s1)
        data_list_s2.append(padded_vec_s2)
        
    return [torch.from_numpy(np.array(data_list_s1)), torch.LongTensor(length_list_s1), torch.from_numpy(np.array(data_list_s2)), torch.LongTensor(length_list_s2), torch.LongTensor(label_list),torch.LongTensor(idx)]

In [43]:
test_dataset = SnliDataset(df_test)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=10,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)



In [45]:
# next(iter(test_loader))

In [46]:
def test_genre(dataset,model):
    train_fiction = SnliDataset(dataset)
    train_loader_fiction = torch.utils.data.DataLoader(dataset=train_fiction,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)
    total=0
    correct=0
    for (data1, length1, data2, length2, labels,idx) in train_loader_fiction:
#                 data_batch1, length_batch1, data_batch2, len_batch2, label_batch = data1, length1, data2, length2, labels
        data_batch1, len_batch1, data_batch2, len_batch2, label_batch,idx_batch = data1.cuda(), length1.cuda(), data2.cuda(), length2.cuda(), labels.cuda(),idx
        outputs = model(data_batch1, len_batch1, data_batch2, len_batch2)


        outputs = F.softmax(outputs, dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        total += labels.size(0)
        correct += predicted.eq(label_batch.view_as(predicted)).sum().item()

    print(100*correct/total)



In [47]:
def print_wront(dataset,model):
    train_fiction = SnliDataset(dataset)
    train_loader_fiction = torch.utils.data.DataLoader(dataset=train_fiction,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True)
    total=0
    correct=0
    
    correct_3 = 0
    incorrect_3 = 0
    for (data1, length1, data2, length2, labels,idx) in train_loader_fiction:
#                 data_batch1, length_batch1, data_batch2, len_batch2, label_batch = data1, length1, data2, length2, labels
        data_batch1, len_batch1, data_batch2, len_batch2, label_batch,idx_batch = data1.cuda(), length1.cuda(), data2.cuda(), length2.cuda(), labels.cuda(),idx
        outputs = model(data_batch1, len_batch1, data_batch2, len_batch2)


        outputs = F.softmax(outputs, dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        
        mask =(predicted.squeeze(1).eq(label_batch)).cpu().data.numpy()==0
#         print(idx.numpy()[mask])
        fns = idx.numpy()[mask]
        tns = idx.numpy()[np.array(mask)==False]
        actual_out = labels.data.numpy()[mask]
        actual_true_out = labels.data.numpy()[np.array(mask)==False]
        pred_false = predicted.cpu().data.numpy()[mask]
        pred_true = predicted.cpu().data.numpy()[np.array(mask)==False]
        total += labels.size(0)
        correct += predicted.eq(label_batch.view_as(predicted)).sum().item()

    print("total accuracy",100*correct/total)
    return fns,pred_false,tns,pred_true,actual_out,actual_true_out


# CNN Model

In [48]:
class CNN2(nn.Module):
    def __init__(self, emb_size, num_classes, vocab_size,n_feat,n_channel,dp):
        super(CNN2, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx = PAD_IDX)
        self.embedding.from_pretrained(torch.from_numpy(loaded_embeddings), freeze=False)
        self.n_channel =n_channel
        self.conv1 = nn.Conv2d(1,n_feat,kernel_size=(3,300),stride=(1,1))
        self.conv2 = nn.Conv2d(n_feat,self.n_channel,kernel_size=(5,1),stride=(1,1))
        
        self.max1 = nn.MaxPool2d(kernel_size=(10,1))
        self.linear1 = nn.Linear(2*self.n_channel, 500)
        self.linear2 = nn.Linear(500,num_classes)
        self.dp1 = nn.Dropout(p=dp)

    def forward(self, data_s1, length1, data_s2, length2):

        embed1 = torch.unsqueeze(self.embedding(data_s1),dim=1)
        embed2 = torch.unsqueeze(self.embedding(data_s2),dim=1)
        
        out1 = F.relu(self.conv1(embed1))
        out2 = F.relu(self.conv1(embed2))
        
        out1 = F.relu(self.conv2(out1))
        out2 = F.relu(self.conv2(out2))

#         print("out",out1.size())
        mout1 = self.max1(out1)
        mout2 = self.max1(out2)
#         print("mout",mout1.size())
        
        sum1 = torch.squeeze(torch.sum(mout1,dim=2))
        sum2 = torch.squeeze(torch.sum(mout2,dim=2))
#         print("sum",sum1.size())
#         combined_out = torch.squeeze(torch.cat([mout1, mout2], dim=1))
        combined_out = torch.cat([sum1,sum2],dim=1)
#         print("comb",combined_out.size())
        
        logits = F.relu(self.linear1(self.dp1(combined_out)))
        res = self.linear2(logits)
        
        return res


In [50]:
model = torch.load("model_cnn11")

In [51]:
for gen in ['fiction', 'telephone', 'slate', 'government', 'travel']:
    data_d = df_test[df_test['genre']==gen]
    print(gen)
    test_genre(data_d,model)
    print("-"*10)

fiction
43.015075376884425
----------
telephone
41.39303482587065
----------
slate
41.71656686626746
----------
government
40.15748031496063
----------
travel
42.66802443991853
----------


In [52]:
fns,pred_false,tns,pred_true,actual_out,actual_true_out = print_wront(df_test,model)

total accuracy 41.78


In [53]:
fns

array([3983, 1092, 3559, 3023])

In [54]:
tns

array([4889, 2382, 2992, 3453])

## Incorrect predictions by CNN

In [60]:
for i in range(len(fns)):
    print("predicted",pred_false[i][0])
    print("Actual",actual_out[i])
    print("sentence1: ",' '.join(df_test.iloc[fns[i]]["sentence1"]))    
    print("sentence2: ",' '.join(df_test.iloc[fns[i]]["sentence2"]))
    print()
    print("-"*100)

predicted 1
Actual 2
sentence1:  the rustic bras-david picnic area , for example , is set alongside a burbling stream .
sentence2:  the picnic area is not near a stream .

----------------------------------------------------------------------------------------------------
predicted 0
Actual 2
sentence1:  his grandson akbar chose agra for his capital over delhi .
sentence2:  his grandson chose washington dc as the capital , not new york city .

----------------------------------------------------------------------------------------------------
predicted 1
Actual 0
sentence1:  the purpose of the diwan-i-khas is hotly disputed ; it is not necessarily the hall of private audience that its name implies .
sentence2:  the hall is not know many people .

----------------------------------------------------------------------------------------------------
predicted 0
Actual 2
sentence1:  for more than 26 centuries it has witnessed countless declines , falls , and rebirths , and today continues t

## Correct Predictions by CNN

In [61]:
for i in range(len(tns)):
    print("predicted",pred_true[i][0])
    print("Actual",actual_true_out[i])
    print("sentence1 :",' '.join(df_test.iloc[tns[i]]["sentence1"]))
    print("sentence2 : ",' '.join(df_test.iloc[tns[i]]["sentence2"]))
    print()
    print("-"*100)

predicted 1
Actual 1
sentence1 : the office of information and regulatory affairs of omb approved the
sentence2 :  something was approved by the office of affairs .

----------------------------------------------------------------------------------------------------
predicted 2
Actual 2
sentence1 : programs in michigan and the district of columbia received one-year grant terms for 2002 .
sentence2 :  programs in michigan receive no grants at all .

----------------------------------------------------------------------------------------------------
predicted 1
Actual 1
sentence1 : alternatively , there are sousa and goncalves ( rua do castanheiro , 47 ) and unibasket ( rua do carmo , 42 ; tel . 291/226 925 ) , both in funchal .
sentence2 :  there are other places in funchal .

----------------------------------------------------------------------------------------------------
predicted 2
Actual 2
sentence1 : so you um-hum so you think it comes down to education or or something like that

# RNN Model

In [62]:
class RNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes, vocab_size):
        super(RNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX)
#         self.embedding.weight.data.copy_(torch.from_numpy(loaded_embeddings))
#         self.embedding.weight.requires_grad = False
        self.embedding.from_pretrained(torch.from_numpy(loaded_embeddings).cuda(), freeze = True)


        
        self.rnn = nn.GRU(emb_size, hidden_size, num_layers, batch_first = True)
        self.linear1 = nn.Linear(2*hidden_size, 500)
        self.linear2 = nn.Linear(500,num_classes)

    def init_hidden(self, batch_size):
        hidden = torch.randn(self.num_layers, batch_size, self.hidden_size)
        return hidden.cuda()

    def forward(self, data_s1, length1, data_s2, length2):  
        batch_size = data_s1.size(0)
        self.hidden1 = self.init_hidden(batch_size)
        self.hidden2 = self.init_hidden(batch_size)

        embed1 = self.embedding(data_s1)
        embed2 = self.embedding(data_s2)
        
#         print(embed1.size())
        rnn_out1_, hidden1 = self.rnn(embed1, self.hidden1)
        rnn_out2_, hidden2 = self.rnn(embed2, self.hidden2)
#         print(self.hidden1.size())

        rnn_out1 = torch.sum(hidden1, dim=0)
        rnn_out2 = torch.sum(hidden2, dim=0)
#         print(rnn_out1.size())
        combined_out = torch.cat([rnn_out1, rnn_out2], dim=1)
#         print(combined_out.size())
        logits = F.relu(self.linear1(combined_out))
        res = self.linear2(logits)
        
        return res





In [63]:
model =torch.load("model_rnn4")
model.train(False)



RNN(
  (embedding): Embedding(100002, 300, padding_idx=0)
  (rnn): GRU(300, 500, num_layers=2, batch_first=True)
  (linear1): Linear(in_features=1000, out_features=500, bias=True)
  (linear2): Linear(in_features=500, out_features=3, bias=True)
)

In [65]:
for gen in ['fiction', 'telephone', 'slate', 'government', 'travel']:
    data_d = df_test[df_test['genre']==gen]
    print(gen)
    test_genre(data_d,model)
    print("-"*10)

fiction




46.130653266331656
----------
telephone
44.776119402985074
----------
slate
41.01796407185629
----------
government
40.6496062992126
----------
travel
42.66802443991853
----------


In [69]:
fns,pred_false,tns,pred_true,actual_out,actual_true_out = print_wront(df_test,model)



total accuracy 43.22


## Incorrect Predictions by RNN model

In [67]:
for i in range(len(fns)):
    print("predicted",pred_false[i][0])
    print("Actual",actual_out[i])
    print("sentence1: ",' '.join(df_test.iloc[fns[i]]["sentence1"]))    
    print("sentence2 : ",' '.join(df_test.iloc[fns[i]]["sentence2"]))
    print()
    print("-"*100)

predicted 0
Actual 1
sentence1:  her voice was doubtful .
sentence2 :  she sounded doubtful about it .

----------------------------------------------------------------------------------------------------
predicted 0
Actual 1
sentence1:  they do n't call them immigrants anymore that was back during my granddaddy 's day
sentence2 :  they used to call them immigrants .

----------------------------------------------------------------------------------------------------
predicted 1
Actual 0
sentence1:  this provides insight into the important japanese concept of katachi ( form ) , the rough equivalent of it is n't what you do ; it 's the way that you do it .
sentence2 :  all japanese people abide by the concept of katachi .

----------------------------------------------------------------------------------------------------
predicted 2
Actual 1
sentence1:  most of france went enthusiastically into world war i , and came out of it victorious yet bled white .
sentence2 :  most of france sup

## Correct predictions by RNN model

In [70]:
for i in range(len(tns)):
    print("predicted",pred_true[i][0])
    print("Actual",actual_true_out[i])
    print("sentence1 : ",' '.join(df_test.iloc[tns[i]]["sentence1"]))
    print("sentence2 : ",' '.join(df_test.iloc[tns[i]]["sentence2"]))
    print()
    print("-"*100)

predicted 0
Actual 0
sentence1 :  pro-choicers point out that these close-up images literally cut the fetus 's context -- the woman -- out of the picture .
sentence2 :  pro-choices say the close-up images are unfair to women .

----------------------------------------------------------------------------------------------------
predicted 2
Actual 2
sentence1 :  however , assuming the procedural requirements of chapter 36 are met , changes negotiated by the postal service and a mail user for their mutual benefit may merit recommendation under the applicable statutory standards .
sentence2 :  changes negotiated by the postal service are too regular and prohibit my postal services .

----------------------------------------------------------------------------------------------------
predicted 1
Actual 1
sentence1 :  no . i guess i 'm going too .
sentence2 :  i 'll come along .

----------------------------------------------------------------------------------------------------
predicted 0
