The system has two LSTM layers with 50 units, <br>
one for each context side, which concatenates the <br>
outputs and passes that to a feedforward layer <br>
with 64 neurons, followed by a dropout layer at <br>
rate 0.5, and a final one-neuron output layer of <br>
sigmoid activation.

In [9]:
#importing the things
import json
import torch
import numpy as np
from torch import tensor
from torch import nn
from torch import optim
import random

<torch._C.Generator at 0x7f5fdc6ff130>

In [10]:
#load the data and vocab
def load_data(file_name):
    data = []
    vocab = {"<UNK>":0}
    with open(file_name,'r') as file:
            for line in file.readlines():
                    line = json.loads(line)
                    #create vocabulary from all unique words in all sentences
                    line["sentence1"] = line["sentence1"].strip('.').strip(',').strip("?").strip("!").strip(";").strip(":")
                    line["sentence2"] = line["sentence2"].strip('.').strip(',').strip("?").strip("!").strip(";").strip(":")
                    sentence = line['sentence1'] + " " + line['sentence2']
                    #strip all punctuation from vocab words
                    words = sentence.split()
                    #add if not already in vocab
                    for word in words:
                        if word not in vocab:
                            #add word to vocab dict
                            vocab[word] = len(vocab)
                    #add line to data
                    data.append(line)
    return vocab, data

def sen2vec(s):
    v = []
    for word in s.split():
        try:
            v.append(vocab[word])
        except:
            v.append(0)
    return tensor(v).unsqueeze(0)

In [11]:
class NeuralNet(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim): # output = number tags
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, 1, bias=False)
        self.dropout = nn.Dropout(p=0.5)
        self.output_layer = nn.Linear(2*hidden_dim, output_dim)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, s1, s2):

        embed1 = torch.sum(self.embedding(s1),dim=1)
        _,(hidden_rep1,_) = self.lstm(embed1.unsqueeze(0))

        embed2 = torch.sum(self.embedding(s2),dim=1)
        _,(hidden_rep2,_) = self.lstm(embed2.unsqueeze(0))

        # Option 1 Concat
        hidden_rep1 = hidden_rep1.squeeze(0).squeeze(0)
        hidden_rep2 = hidden_rep2.squeeze(0).squeeze(0)

        final_hidden_rep = torch.cat((hidden_rep1, hidden_rep2))

        drop = self.dropout(final_hidden_rep)

        output = self.sigmoid(self.output_layer(drop.squeeze(0)))
        
        return output

In [12]:
vocab, train_data = load_data('train.jsonl')
_, test_data = load_data('test.jsonl')
_, val_data = load_data('val.jsonl')



In [13]:
our_wic = NeuralNet(len(vocab),73,127,2)

In [None]:
# Model Train 


epochs = 1000
ce = nn.CrossEntropyLoss()
softmax = nn.Softmax(dim=0)
optimizer = optim.SGD(our_wic.parameters(), lr=0.02)

train_d = []
val_d = []
loss_d = []

for i in range(epochs):
    random.shuffle(train_data)
    print("Epoch:",i)
    total_loss = 0
    for point in train_data:
        our_wic.train()
        optimizer.zero_grad()
        
        # a) calculate probs / get an output
        s1 = sen2vec(point["sentence1"])
        s2 = sen2vec(point["sentence2"])
        y_raw = our_wic(s1,s2)
        #y_hat = softmax(y_raw)
        
        y = tensor(int(point["label"]))
        # b) compute loss
        loss = ce(y_raw.unsqueeze(0),y.unsqueeze(0))
        total_loss += loss
        # c) get the gradient
        loss.backward()

        # d) update the weights
        optimizer.step()
        
    print(total_loss/len(train_data))
    loss_d.append(total_loss/len(train_data))
    
    our_wic.eval()

    score = 0
    for point in train_data:
        s1 = sen2vec(point['sentence1'])
        s2 = sen2vec(point['sentence2'])
        output = our_wic(s1,s2)
        result = torch.argmax(softmax(output))
        if bool(result) == point["label"]:
            score += 1

    print(score/len(train_data))
    train_d.append(score/len(train_data))
    
    score = 0
    for point in val_data:
        s1 = sen2vec(point['sentence1'])
        s2 = sen2vec(point['sentence2'])
        output = our_wic(s1,s2)
        result = torch.argmax(softmax(output))
        if bool(result) == point["label"]:
            score += 1

    print(score/len(val_data))
    val_d.append(score/len(val_data))

Epoch: 0
tensor(0.6889, grad_fn=<DivBackward0>)
0.6175386882829772
0.5266457680250783
Epoch: 1
tensor(0.6695, grad_fn=<DivBackward0>)
0.6588061901252763
0.542319749216301
Epoch: 2
tensor(0.6509, grad_fn=<DivBackward0>)
0.7076271186440678
0.5188087774294671
Epoch: 3
tensor(0.6289, grad_fn=<DivBackward0>)
0.7485261606484893
0.512539184952978
Epoch: 4
tensor(0.6052, grad_fn=<DivBackward0>)
0.7855563743551953
0.5094043887147336
Epoch: 5
tensor(0.5840, grad_fn=<DivBackward0>)
0.817243920412675
0.5094043887147336
Epoch: 6
tensor(0.5635, grad_fn=<DivBackward0>)
0.8465364775239499
0.5235109717868338
Epoch: 7
tensor(0.5420, grad_fn=<DivBackward0>)
0.868828297715549
0.5203761755485894
Epoch: 8
tensor(0.5272, grad_fn=<DivBackward0>)
0.8761974944731025
0.5344827586206896
Epoch: 9
tensor(0.5147, grad_fn=<DivBackward0>)
0.8949889462048637
0.5015673981191222
Epoch: 10
tensor(0.4989, grad_fn=<DivBackward0>)
0.9045689019896831
0.5015673981191222
Epoch: 11
tensor(0.4868, grad_fn=<DivBackward0>)
0.913780