<center>

# NNFL Research Paper Assignment
## Skip-Thought Vectors  (Paper- 62)

####                  - Nikhil Khandelwal(2016A3PS0192P), Ajnkya Vyas(2016A3PS0246P), Anwesh Bhattacharya (2016B5A70590P)

In [None]:
import torch
from torch import nn
from torch.autograd import Variable
from datetime import datetime, timedelta
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

from config import *
from model import UniSkip, Encoder
from data_loader import DataLoader
from vocab import load_dictionary
import nearest_neighbors
import eval_mpqa
import eval_msrp

### Task:1 Training the model

In [None]:
d = DataLoader("./data_corpus.txt")

In [None]:
mod = UniSkip()
if USE_CUDA:
    mod.cuda(CUDA_DEVICE)

In [None]:
lr = 3e-4
optimizer = torch.optim.Adam(params=mod.parameters(), lr=lr)

In [None]:
loss_trail = []
last_best_loss = None
current_time = datetime.utcnow()

def debug(i, loss, prev, nex, prev_pred, next_pred):
    global loss_trail
    global last_best_loss
    global current_time

    this_loss = loss.data.item()
    loss_trail.append(this_loss)
    loss_trail = loss_trail[-20:]
    new_current_time = datetime.utcnow()
    time_elapsed = str(new_current_time - current_time)
    current_time = new_current_time
    print("Iteration {}: time = {} , this_loss = {}".format(
              i, time_elapsed, this_loss))
    
    print("prev = {}\nnext = {}\npred_prev = {}\npred_next = {}".format(
        d.convert_indices_to_sentences(prev),
        d.convert_indices_to_sentences(nex),
        d.convert_indices_to_sentences(prev_pred),
        d.convert_indices_to_sentences(next_pred),
    ))
    
    try:
        trail_loss = sum(loss_trail)/len(loss_trail)
        if last_best_loss is None or last_best_loss > trail_loss:
            print("Loss improved from {} to {}".format(last_best_loss, trail_loss))
            
            save_loc = "./skip-best".format(lr, VOCAB_SIZE)
            print("saving model at {}".format(save_loc))
            torch.save(mod.state_dict(), save_loc)
            
            last_best_loss = trail_loss
    except Exception as e:
       print("Couldn't save model because {}".format(e))

In [None]:
print("####################### Training model #######################\n\n")

# a million iterations
for i in range(0, 1000000):
    sentences, lengths = d.fetch_batch(32 * 8)

    loss, prev, nex, prev_pred, next_pred  = mod(sentences, lengths)
    

    if i % 200 == 0:
        debug(i, loss, prev, nex, prev_pred, next_pred)
        print("\n----------------------------------------------------------------------------\n")

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

### Task 2:  Obtaining the nearest neighbors of different sentences scored by cosine similarity

In [None]:
class UsableEncoder:
    
    def __init__(self, loc="./skip-best"):
        print("Preparing the DataLoader. Loading the word dictionary")
        self.d = DataLoader(sentences=[''], word_dict=load_dictionary('./data_corpus.txt.pkl'))
        self.encoder = None
        
        print("Loading encoder from the saved model at {}".format(loc))
        model = UniSkip()
        model.load_state_dict(torch.load(loc, map_location=lambda storage, loc: storage))
        self.encoder = model.encoder
        if USE_CUDA:
            self.encoder.cuda(CUDA_DEVICE)
    
    def encode(self, text):
        def chunks(l, n):
            for i in range(0, len(l), n):
                yield l[i:i + n]
        
        ret = []
        
        for chunk in chunks(text, 100):
            print("encoding chunk of size {}".format(len(chunk)))
            indices = [self.d.convert_sentence_to_indices(sentence) for sentence in chunk]
            indices = torch.stack(indices)
            indices, _ = self.encoder(indices)
            indices = indices.view(-1, self.encoder.thought_size)
            indices = indices.data.cpu().numpy()
            
            ret.extend(indices)
        ret = np.array(ret)
        
        return ret

usable_encoder = UsableEncoder()

In [None]:
input_text = [
              "After her outburst, she darted out of the restaurant .",
              "he ran his hand inside his coat , double-checking that the unopened letter was still there .",
              "if he had a weapon , he could maybe take out their last imp , and then beat up errol and vanessa ."
             ]
nearest_neighbors.generate(input_text)

### Task 3: Using the trained model for paraphrase detection and Opinion Polarity datasets

In [None]:
eval_msrp.evaluate(usable_encoder, loc='./')

In [None]:
eval_mpqa.evaluate(usable_encoder, loc='./', k=3)