In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from Vocab import *
from model import AttentionModel

import pandas as pd
import os
import string
import random

In [2]:
df = pd.read_csv('stsds.csv')

In [3]:
df = df.sample(frac=1)
df.head()

Unnamed: 0,sentence_A,sentence_B,relatedness_score
481,"A large, gray ball is hitting a running dog, w...",A large dog is running on the grass with a ten...,0.49
4531,java.util.regex package is used for this purpose.,java.util.regex package is used for this purpose.,1.0
1462,A top violin player is standing on the roof ov...,A man is standing on a roof top and playing a ...,0.72
209,A little dog is sprinting and the person who o...,A little dog is sprinting and its owner is try...,0.96
4414,The woman is frying a breaded pork chop,The lady isn't breaking raw eggs into a bowl,0.48


In [12]:
#Hyperparams
lr = 0.2
gamma = 0.95
embed_size = 128
hidden_size = 256
max_vocab_size = 5000
num_epochs = 20
random_sentence_prob = 0.05
dne_prob = 0.0
max_len = 50

In [13]:
textcat = open('stsds-cat.txt').read()
textcat[:400]

'a group of kids is playing in a yard and an old man is standing in the background a group of boys in a yard is playing and a man is standing in the background a group of children is playing in the house and there is no man standing in the background a group of kids is playing in a yard and an old man is standing in the background the young boys are playing outdoors and the man is smiling nearby th'

In [14]:
vocab = Vocabulary(textcat)

In [15]:
vocab_size = vocab.size()
print(vocab_size)

2394


In [16]:
model = AttentionModel(embed_size, hidden_size, max_vocab_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

In [19]:
for e in range(num_epochs):
    total_loss = 0
    for i in range(len(df)):
        optimizer.zero_grad()
        
        t_a = torch.tensor(vocab.getSentenceArray(df["sentence_A"][i], dne_prob))
        t_b = torch.tensor(vocab.getSentenceArray(df["sentence_B"][i], dne_prob))
        ans = torch.tensor(df["relatedness_score"][i])
        
        if random.uniform(0, 1) < random_sentence_prob:
            ni = random.randint(0, len(df)-1)
            if ni != i:
                t_b = torch.tensor(vocab.getSentenceArray(df["sentence_B"][ni], dne_prob))
                ans = torch.tensor(0.0)
        
        out = model(t_a, t_b)
        loss = criterion(out, ans.unsqueeze(0).unsqueeze(0))
        loss.backward()
        total_loss += loss.item()
        
        optimizer.step()

    for param_group in optimizer.param_groups:
        param_group['lr'] *= gamma

    print("Epoch", e, "Loss", total_loss)

  corr_attn_params = F.softmax(self.lin_attn(corr).view(1, -1))
  test_attn_params = F.softmax(self.lin_attn(test).view(1, -1))


Epoch 0 Loss 32.03229959275986
Epoch 1 Loss 29.57621567603152
Epoch 2 Loss 28.634305129634928
Epoch 3 Loss 25.660814836275712
Epoch 4 Loss 25.63910378681798
Epoch 5 Loss 26.562867954047313
Epoch 6 Loss 24.34626568323128
Epoch 7 Loss 24.731648035957527
Epoch 8 Loss 23.286491906013417
Epoch 9 Loss 26.506826578132557
Epoch 10 Loss 21.557648854979966
Epoch 11 Loss 20.328091027777155
Epoch 12 Loss 18.9340570520831
Epoch 13 Loss 21.08520808291762
Epoch 14 Loss 20.242610940954712
Epoch 15 Loss 20.366675613100792
Epoch 16 Loss 17.7134703544549
Epoch 17 Loss 20.239813617619426
Epoch 18 Loss 17.939050643744125
Epoch 19 Loss 19.46838204993649


In [20]:
torch.save(model, "saved_models/stsds9.pt")

In [None]:
mini = 1
for i in range(len(df)):
    if df["relatedness_score"][i] < mini:
        mini = df["relatedness_score"][i]
print(mini)

In [None]:
vocab.vocab[:100]