# **SETUP**

In [1]:
from os import path
from google.colab import files
import os
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
print(accelerator)

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch
print(torch.__version__)
print(torch.cuda.is_available())

cu80
tcmalloc: large alloc 1073750016 bytes == 0x5607c000 @  0x7f5987f6e2a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641
0.4.1
True


In [2]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


f1 = drive.CreateFile({'id': '1cFyVZjK-T9b02H2-vpX_GuhTklqojKsa'})
f1.GetContentFile('test_vectors.npy')
print("Loaded test vectors")

f1 = drive.CreateFile({'id': '1G6OhSk4aEHcFiQuobI9TIXfTNvJqiiMo'})
f1.GetContentFile('train_vectors.npy')
print("Loaded train vectors")

f1 = drive.CreateFile({'id': '1KYx5JouFMbrIWbwyGaP1SHlGAT-uePOT'})
f1.GetContentFile('test_sentences.npy')

f1 = drive.CreateFile({'id': '134nqEfHiMDeUQquQCy5nokJmBFSwm-yc'})
f1.GetContentFile('train_sentences.npy')

# # f1 = drive.CreateFile({'id': '1OBfCX2yCqdOG8r7GDCVw5N-qkDRxdjlL'})
# # f1.GetContentFile('model1.tar')

all_files = [(name, os.path.getmtime(name)) for name in os.listdir()]
print(all_files)
 

Loaded test vectors
Loaded train vectors
[('.config', 1544221350.396336), ('test_vectors.npy', 1544221354.0633378), ('train_vectors.npy', 1544221394.2653563), ('adc.json', 1544221350.432336), ('train_sentences.npy', 1544221396.9533575), ('test_sentences.npy', 1544221395.654357), ('sample_data', 1544031593.0)]


# IMPORTING AND LOADING

In [0]:
import torch
import torch.nn as nn
import time
from torch.autograd import Variable
from torch.utils.data import Dataset
import pandas as pd
import numpy as np

train_vectors = np.load("train_vectors.npy")
train_sentences = np.load("train_sentences.npy")

vec = train_vectors[::5], train_vectors[1::5], train_vectors[2::5], train_vectors[3::5], train_vectors[4::5]
sen = train_sentences[::5], train_sentences[1::5], train_sentences[2::5], train_sentences[3::5], train_sentences[4::5]

dataset1 = np.asarray(vec[0:2])
dataset2 = np.asarray(vec[0:3])
dataset3 = np.asarray(vec[0:4])
dataset4 = np.asarray(vec[0:5])

vec = train_vectors = None

sentences1 = np.asarray(sen[0:2])
sentences2 = np.asarray(sen[0:3])
sentences3 = np.asarray(sen[0:4])
sentences4 = np.asarray(sen[0:5])

sen = train_sentences = None

device = torch.device("cuda:0")

In [0]:
class StoryVectors(Dataset):

    def __init__(self, dataset, sentences):
      self.dataset = dataset
      self.type = self.dataset.shape[0]
      self.sen = sentences

        

    def __len__(self):
        return self.dataset.shape[1]

    def __getitem__(self, idx):
      
        if self.type == 2:
            X = [self.dataset[0][idx]]
            y = [self.dataset[1][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx]]
          
          
        elif self.type == 3:
            X = [self.dataset[0][idx], self.dataset[1][idx]]
            y = [self.dataset[2][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx]]
        
        elif self.type == 4:
            X = [self.dataset[0][idx], self.dataset[1][idx], self.dataset[2][idx]]
            y = [self.dataset[3][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx], self.sen[3][idx]]
        
        elif self.type == 5:
            X = [self.dataset[0][idx], self.dataset[1][idx], self.dataset[2][idx], self.dataset[3][idx]]
            y = [self.dataset[4][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx], self.sen[3][idx], self.sen[4][idx]]
        
        
        return [X, len(X), y, sentences]
      
      
def vocab_collate_func(batch):
    X = []
    y = []
    lengths = []
    sentences = []

    for datum in batch:
        X.append(datum[0])
        lengths.append(datum[1])
        y.append(datum[2])
        sentences.append(datum[3])

    return [torch.FloatTensor(X), torch.LongTensor(lengths), torch.FloatTensor(y), sentences]

In [0]:
train_dataset1 = StoryVectors(dataset1, sentences1)
train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset1,
                                           batch_size=32,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True,
                                           num_workers=4)

# x, l, y, s = iter(train_loader).next()
# print(x.shape)
# print(l)
# print(y.shape)
# print(s)

#MODELS

In [0]:
class BasicGRU(nn.Module):
    def __init__(self, hidden_size, n_layers=1):
        super(BasicGRU, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=0, bidirectional=True)       
        self.lin = nn.Linear(hidden_size, hidden_size)

    def forward(self, input_seq, input_lengths, hidden=None):

        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seq, input_lengths, batch_first=True)

        outputs, hidden = self.gru(packed, hidden)
        
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs , batch_first=True)

        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]

        output = self.lin(outputs[:,-1,:].unsqueeze(1))
        return output

In [0]:
model2 = None
model1 = BasicGRU(hidden_size = 4800).to(device)
criterion1 = torch.nn.MSELoss(reduction='sum')
optimizer1 = torch.optim.RMSprop(model1.parameters(), lr=1e-4)

#TRAINING

In [8]:
def training(model, criterion, optimizer, train_loader, num_epochs):

      total_step = len(train_loader)
      for epoch in range(num_epochs):
          t0 = time.time()
          for i, (data, lengths, labels, sentences) in enumerate(train_loader):
              model.train()
              optimizer.zero_grad()

              y_pred = model(data.to(device), lengths.to(device))
              loss = criterion(y_pred, labels.to(device))

              loss.backward()
              optimizer.step()
              if i % 1 == 0:
                  loss_data = loss.data[0]

                  print(
                      'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                      format(epoch, i * len(data), len(train_loader.dataset),
                             100. * i / len(train_loader), loss_data))
                  
#           if (epoch+1) % 3 == 0:
#             name = 'full' + str(epoch) + '.tar'
#             torch.save(model, 'full.tar')
#             uploaded = drive.CreateFile({'title': name})
#             uploaded.SetContentFile('full.tar')
#             uploaded.Upload()

          print('Time taken by the epoch: {} seconds'.format(time.time() - t0))
          
training(model1, criterion1, optimizer1, train_loader1, 10)

  app.launch_new_instance()


Time taken by the epoch: 173.43420886993408 seconds
Time taken by the epoch: 174.03831553459167 seconds
Time taken by the epoch: 174.08261489868164 seconds
Time taken by the epoch: 174.02995920181274 seconds
Time taken by the epoch: 173.91582250595093 seconds
Time taken by the epoch: 174.26982021331787 seconds
Time taken by the epoch: 174.19861268997192 seconds
Time taken by the epoch: 174.23547530174255 seconds
Time taken by the epoch: 174.20461106300354 seconds
Time taken by the epoch: 174.14960265159607 seconds


# SAVE MODEL

In [9]:
torch.save(model1, 'model1.tar')
uploaded = drive.CreateFile({'title': 'model1.tar'})
uploaded.SetContentFile('model1.tar')
uploaded.Upload()

# uploaded = drive.CreateFile({'title': 'pred.npy'})
# uploaded.SetContentFile('pred.npy')
# uploaded.Upload()


  "type " + obj.__name__ + ". It won't be checked "


# VALIDATION

In [0]:
def nn(qvec, vectors, array, k=5):
#     print("processing")
#     qvec /= np.linalg.norm(qvec)
#     vectors = np.asarray([ i / np.linalg.norm(i) for i in vectors.tolist()])
    print("computing scores")
    scores = np.dot(qvec, vectors.T).flatten()
    #distr(scores)
    #analyse(scores)
    print("sorting scores")
    sorted_args = np.argsort(scores)[::-1]
    sentences = [(array[a], scores[a]) for a in sorted_args[:k]]
    for i, s in enumerate(sentences):
        print (s, sorted_args[i])

In [0]:
test_dataset = StoryVectors(dataset4[:,6:7,:], sentences4[:, 6:7])
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=1,
                                           collate_fn=vocab_collate_func,
                                           shuffle=True,
                                           num_workers=4)

model4.eval()

for data, lengths, labels, sentences in test_loader:
        pred = model4(data.to(device), lengths.to(device))

In [25]:
pred = pred.detach().cpu().numpy().squeeze()
print(sentences)
no, sample, dim = dataset4.shape
d = dataset4.reshape(no*sample, dim)
s = sentences4.reshape(no*sample)
print(d.shape)
nn(pred, d, s, k=5)
# nn(pred, dataset2[2,:,:], sentences2[2,:], k=5)

[[b'Emily was excited for prom .', b"Emily did n't have a date though .", b'John asked Emily if Emily would like to go to prom with John .', b"Emily happily accepted John's invitation .", b'John and Emily went to the prom and had a wonderful time .']]
(2000, 4800)
computing scores
sorting scores
(b'John and Emily went to the prom and had a wonderful time .', 1.9001805) 1606
(b'Ryder and Emily went to a winery on a hot July day .', 1.7158275) 313
(b'John and Emily went to the park to have a picnic .', 1.6723434) 149
(b'John and Emily went out to dinner .', 1.6700802) 116
(b'Emily went with friends to a Haunted House .', 1.6624787) 278


In [0]:
*# model1.eval()
# X = torch.FloatTensor(dataset2[:2,0:1,:].transpose((1,0,2)))
# length = torch.LongTensor(len(X))
# y = torch.FloatTensor(dataset2[2:3,0:1,:].transpose((1,0,2)))
# sentences = sentences2[:,0:1]

# pred = model1(X.to(device), length.to(device))

#dataset2[2:3,0:1,:].transpose((1,0,2)).shape
#torch.FloatTensor(X), torch.LongTensor(lengths), torch.FloatTensor(y), sentences

NameError: ignored