In [18]:
from datetime import datetime
import os
import pickle
import math
import time

import torch
from torch import nn, optim
from torch.utils import data
from tqdm import tqdm

import numpy as np

In [19]:
with open('embeddings/bag_of_words.pickle', 'rb') as f:
    transcript_embeddings = pickle.load(f)

In [20]:
transcript_qna = []
for i in range(len(transcript_embeddings)):
    transcript_qna.append(transcript_embeddings[i][1])
qna_pairs = []

for i in range(len(transcript_qna)):
    counter = 0
    while(True):
        if counter > len(transcript_qna[i]) - 2: break
        qna_pairs.append((transcript_qna[i][counter][0].toarray()[0], transcript_qna[i][counter+1][0].toarray()[0]))
        counter+=2
print(len(qna_pairs))

34586


In [30]:
class QADataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, qna_list):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.qna = qna_list

    def __len__(self):
        return len(self.qna)

    def __getitem__(self, idx):
        return self.qna[idx][0], self.qna[idx][1]

In [31]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [50]:
class SiameseNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.L1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.L1.weight)
        self.L2 = nn.Linear(hidden_size, output_size)
        nn.init.kaiming_normal_(self.L2.weight)
    
    def forward(self, x):
        x_L1 = self.L1(x)
        x_relu = torch.nn.functional.relu(x_L1)
        x_L2 = self.L2(x_relu)  
        return x_L2

In [71]:
def train(network, train_data, dev_data, output_path, batch_size=32, n_epochs=10, lr=0.001):
    """ Train the Siamese Network.

    @param network (network): SiameseNetwork
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_cos_sim = 0
    optimizer = optim.Adam(network.parameters(),lr = lr)
    loss_func = nn.CosineEmbeddingLoss()
    
    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
        dev_cos_sim = train_for_epoch(network, train_data, dev_data, optimizer, loss_func, batch_size)
        if dev_cos_sim > best_dev_cos_sim:
            best_dev_cos_sim = dev_cos_sim
            print("New best dev Cosine Similarity! Saving model.")
            torch.save(network.model.state_dict(), output_path)
        print("")


def train_for_epoch(network, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train network for single epoch.

    @param network (Network): SiameseNetwork
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (cos_sim): Cosine Similarity Loss Function
    @param lr (float): learning rate

    @return dev_cos_sim (float): Cosine Similarity scores for dev data
    """

    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()
    train_generator = data.DataLoader(train_data, batch_size = batch_size)
    val_generator = data.DataLoader(dev_data, batch_size = batch_size)
    
    with tqdm(total=(n_minibatches)) as prog:
        for questions, answers in train_generator:
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch here
            train_q = questions.float()
            train_a = answers.float()
            output_q = network.forward(train_q)
            output_a = network.forward(train_a)
    
            loss = loss_func(output_q, output_a, torch.tensor(1, dtype=torch.float))
            loss.backward()
            optimizer.step()

            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))
    print("Evaluating on dev set",)
    with torch.set_grad_enabled(False):
        loss = 0.
        for questions, answers in val_generator:
            train_q = questions.float()
            train_a = answers.float()
            output_q = network.forward(train_q)
            output_a = network.forward(train_a)
            loss += loss_func(output_q, output_a, torch.tensor(1, dtype=torch.float))
    
    
    print("- dev loss: {:.2f}".format(loss))
    return loss

In [73]:
input_size = len(qna_pairs[0][0])
hidden_dim = 100 #1000
output_dim = 10 #768

print(80 * "=")
print("INITIALIZING")
print(80 * "=")

test_val_split = int(len(qna_pairs)*0.8)
train_data = QADataset(qna_pairs[0:test_val_split])
dev_data = QADataset(qna_pairs[test_val_split:])

start = time.time()
network =  SiameseNetwork(input_size, hidden_dim, output_dim)  
print("took {:.2f} seconds\n".format(time.time() - start))

print(80 * "=")
print("TRAINING")
print(80 * "=")
output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
output_path = output_dir + "model.weights"

if not os.path.exists(output_dir): os.makedirs(output_dir)

train(network, train_data, dev_data, output_path)

  0%|          | 0/865 [00:00<?, ?it/s]

INITIALIZING
took 0.12 seconds

TRAINING
Epoch 1 out of 10


 32%|███▏      | 279/865 [00:38<01:24,  6.93it/s]


KeyboardInterrupt: 