The system has two LSTM layers with 50 units, <br>
one for each context side, which concatenates the <br>
outputs and passes that to a feedforward layer <br>
with 64 neurons, followed by a dropout layer at <br>
rate 0.5, and a final one-neuron output layer of <br>
sigmoid activation.

In [1]:
#importing the things
import json
import torch
import numpy as np
from torch import tensor
from torch import nn
from torch import optim

import random

In [2]:
#load the data and vocab
def load_data(file_name):
    data = []
    vocab = {"<UNK>":0}
    with open("train.jsonl",'r') as file:
            for line in file.readlines():
                    line = json.loads(line)
                    #create vocabulary from all unique words in all sentences
                    line["sentence1"] = line["sentence1"].strip('.').strip(',').strip("?").strip("!").strip(";").strip(":")
                    line["sentence2"] = line["sentence2"].strip('.').strip(',').strip("?").strip("!").strip(";").strip(":")
                    sentence = line['sentence1'] + " " + line['sentence2']
                    #strip all punctuation from vocab words
                    words = sentence.split()
                    #add if not already in vocab
                    for word in words:
                        if word not in vocab:
                            #add word to vocab dict
                            vocab[word] = len(vocab)
                    #add line to data
                    data.append(line)
    return vocab, data

def sen2vec(s):
    v = [vocab[word] for word in s.split()]
    return tensor(v).unsqueeze(0)

In [3]:
class NeuralNet(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim): # output = number tags
        super().__init__()
        
        # if option 1 change embedding_dim in LSTM to 2*embedding_dim

        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, 1, bias=False)
        self.lstm2 = nn.LSTM(embedding_dim, hidden_dim, 1, bias=False)
        self.dropout = nn.Dropout(p=0.5)
        self.output_layer = nn.Linear(2*hidden_dim, output_dim)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, s1, s2):

        embed1 = torch.sum(self.embedding(s1),dim=1)
        _,(hidden_rep1,_) = self.lstm1(embed1.unsqueeze(0))

        embed2 = torch.sum(self.embedding(s2),dim=1)
        _,(hidden_rep2,_) = self.lstm2(embed2.unsqueeze(0))

        # Option 1 Concat
        hidden_rep1 = hidden_rep1.squeeze(0).squeeze(0)
        hidden_rep2 = hidden_rep2.squeeze(0).squeeze(0)

        final_hidden_rep = torch.cat((hidden_rep1, hidden_rep2))

        drop = self.dropout(final_hidden_rep)

        output = self.sigmoid(self.output_layer(drop.squeeze(0)))
        
        return output

In [4]:
vocab, train_data = load_data('train.jsonl')
_, test_data = load_data('test.jsonl')
_, val_data = load_data('val.jsonl')

random.shuffle(train_data)

In [5]:
our_wic = NeuralNet(len(vocab),50,64,2)

In [11]:
# Model Train 

epochs = 10
ce = nn.CrossEntropyLoss()
softmax = nn.Softmax(dim=0)
optimizer = optim.SGD(our_wic.parameters(), lr=0.1)

our_wic.train()

for i in range(epochs):
    print("Epoch:",i)
    for point in train_data:
        optimizer.zero_grad()
        
        # a) calculate probs / get an output
        s1 = sen2vec(point["sentence1"])
        s2 = sen2vec(point["sentence2"])
        y_raw = our_wic(s1,s2)
        #y_hat = softmax(y_raw)
        
        y = tensor(int(point["label"]))
        # b) compute loss
        loss = ce(y_raw.unsqueeze(0),y.unsqueeze(0))
        
        # c) get the gradient
        loss.backward()

        # d) update the weights
        optimizer.step()

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9


In [12]:
our_wic.eval()

score = 0
for point in val_data:
    s1 = sen2vec(point['sentence1'])
    s2 = sen2vec(point['sentence2'])
    output = our_wic(s1,s2)
    result = torch.argmax(softmax(output))
    if bool(result) == point["label"]:
        score += 1

print(score/len(val_data))

0.9174649963154016
