In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from Vocab import *
from model import *

import pandas as pd
import os
import string
import random

In [11]:
df = pd.read_csv('stsds.csv')

In [12]:
#shuffle data
df = df.sample(frac=1) 
df.head()

Unnamed: 0,sentence_A,sentence_B,relatedness_score
2285,A man is riding a water toy in the water,Two sumo ringers are not fighting,0.2
2150,A man is playing a guitar,A guitar is being played by a man,1.0
153,A soccer ball is rolling into a goal net,A soccer player is kicking a ball into the goal,0.86
964,A woman is sewing with a machine,There is no woman using a sewing machine,0.64
2430,There is no woman peeling a potato,A woman is peeling a potato,0.9


In [13]:
#Hyperparams
lr = 0.2
gamma = 0.95
embed_size = 128
hidden_size = 256
max_vocab_size = 5000
num_epochs = 20
random_sentence_prob = 0.01
dne_prob = 0.0
max_len = 50

In [14]:
textcat = open('stsds-cat.txt').read()
textcat[:400]

'a group of kids is playing in a yard and an old man is standing in the background a group of boys in a yard is playing and a man is standing in the background a group of children is playing in the house and there is no man standing in the background a group of kids is playing in a yard and an old man is standing in the background the young boys are playing outdoors and the man is smiling nearby th'

In [15]:
vocab = Vocabulary(textcat)

In [16]:
vocab_size = vocab.size()
print(vocab_size)

2394


In [17]:
model = AttentionModel2(embed_size, hidden_size, max_vocab_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

In [19]:
for e in range(num_epochs):
    total_loss = 0
    for i in range(len(df)):
        optimizer.zero_grad()
        
        t_a = torch.tensor(vocab.getSentenceArray(df["sentence_A"][i], dne_prob))
        t_b = torch.tensor(vocab.getSentenceArray(df["sentence_B"][i], dne_prob))
        ans = torch.tensor(df["relatedness_score"][i])

        '''
        #use random sentences (to be discouraged) once in a while 
        if random.uniform(0, 1) < random_sentence_prob:
            ni = random.randint(0, len(df)-1)
            if ni != i:
                t_b = torch.tensor(vocab.getSentenceArray(df["sentence_B"][ni], dne_prob))
                ans = torch.tensor(0.0)
        '''
        
        out = model(t_a, t_b)
        loss = criterion(out, ans.unsqueeze(0).unsqueeze(0))
        loss.backward()
        total_loss += loss.item()
        
        optimizer.step()

    for param_group in optimizer.param_groups:
        param_group['lr'] *= gamma

    print("Epoch", e, "Loss", total_loss)

  test_attn_params = F.softmax(self.lin_attn(test).view(1, -1))
  corr_attn = torch.matmul(corr_attn_params, corr.squeeze(0))


Epoch 0 Loss 38.67854076689326
Epoch 1 Loss 36.440685936757575
Epoch 2 Loss 34.68412908258628
Epoch 3 Loss 33.207077803407145
Epoch 4 Loss 31.93082440982864
Epoch 5 Loss 30.81064334069167
Epoch 6 Loss 29.815665377776273
Epoch 7 Loss 28.923634146546704
Epoch 8 Loss 28.118473881020005
Epoch 9 Loss 27.388555628765506
Epoch 10 Loss 26.725373291592554
Epoch 11 Loss 26.122285062511523
Epoch 12 Loss 25.57336185710682
Epoch 13 Loss 25.07282170515907
Epoch 14 Loss 24.61514305185817
Epoch 15 Loss 24.19534948022263
Epoch 16 Loss 23.809124689308938
Epoch 17 Loss 23.452794042593098
Epoch 18 Loss 23.12323324042367
Epoch 19 Loss 22.81775961921476


In [20]:
torch.save(model, "saved_models/stsds10.pt")

In [None]:
mini = 1
for i in range(len(df)):
    if df["relatedness_score"][i] < mini:
        mini = df["relatedness_score"][i]
print(mini)

In [None]:
vocab.vocab[:100]