In [1]:
import numpy as np
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.autograd import Variable
from itertools import chain

from data_reader import amazon_dataset_iters
from tqdm import tqdm

import matplotlib.pyplot as plt
import gc
%matplotlib inline
%env CUDA_VISIBLE_DEVICES=2

env: CUDA_VISIBLE_DEVICES=2


In [2]:
if torch.cuda.is_available():
    import torch.cuda as device
else:
    import torch as device

In [3]:
#TODO: set constants to ones from constants.py
class Merger(nn.Module):
    def __init__(self, latent_factors_count, hidden_size):
        super(Merger, self).__init__()
        self.user_model = nn.Linear(latent_factors_count, hidden_size, bias=False)
        self.item_model = nn.Linear(latent_factors_count, hidden_size, bias=False)
        self.bias = nn.Parameter(torch.Tensor(1))
        
    def forward(self, user_embed, item_embed):
        return nn.Sigmoid()(self.user_model(user_embed) + self.item_model(item_embed) + self.bias)

In [4]:
class ContextMerger(nn.Module):
    def __init__(self, latent_factors_count, vocabulary_size, context_size):
        super(ContextMerger, self).__init__()
        self.user_model = nn.Linear(latent_factors_count, context_size, bias=False)
        self.item_model = nn.Linear(latent_factors_count, context_size, bias=False)
        self.rating_weight = nn.Parameter(torch.Tensor(1))
        self.review_model = nn.Linear(vocabulary_size, context_size, bias=False)
        self.bias = nn.Parameter(torch.Tensor(1))
        
    def forward(self, user_embed, item_embed, rating, review):
        return nn.Tanh()(
            self.user_model(user_embed) + self.item_model(item_embed) + \
            self.rating_weight * rating + self.review_model(review) + self.bias
        )

In [5]:
class EncoderModel(nn.Module):
    def __init__(self, users_count, items_count, latent_factors_count, vocabulary_size=333,
                 context_size=50, hidden_size=400, n_regression_layers=3, n_review_layers=1):
        super(EncoderModel, self).__init__()
        self.latent_factors_count = latent_factors_count

        self.user_embedding = nn.Embedding(users_count, latent_factors_count)
        self.item_embedding = nn.Embedding(items_count, latent_factors_count)
        
        self.merger = Merger(latent_factors_count, hidden_size)
        self.regression_model = nn.Sequential(
              *(list(chain.from_iterable([
                  [nn.Linear(hidden_size, hidden_size), nn.Sigmoid()]
                  for _ in range(n_regression_layers - 1)])) + \
              [nn.Linear(hidden_size, hidden_size), nn.Linear(hidden_size, 1)])
        )
        self.review_model = nn.Sequential(
            *(list(chain.from_iterable([
                      [nn.Linear(hidden_size, hidden_size), nn.Sigmoid()]
                      for _ in range(n_review_layers - 1)])) + \
                  [nn.Linear(hidden_size, vocabulary_size)])
        )
        self.context_merger = ContextMerger(latent_factors_count, vocabulary_size, context_size)

    def forward(self, input_user, input_item):
        embedded_user = self.user_embedding(input_user)
        embedded_item = self.item_embedding(input_item)
        
        merged = self.merger(embedded_user, embedded_item)
        regression_result = self.regression_model(merged)
        review_result = self.review_model(merged)
        review_softmax = nn.LogSoftmax()(review_result)
        
        context = self.context_merger(embedded_user, embedded_item, regression_result, review_result)
        return regression_result, review_softmax, context

In [6]:
class DecoderModel(nn.Module):
    def __init__(self, hidden_size=400, context_size=50, vocabulary_size=333):
        super(DecoderModel, self).__init__()
        
        self.embedding = nn.Embedding(vocabulary_size, context_size)
        self.gru = nn.GRU(context_size, context_size)
        self.out = nn.Linear(context_size, vocabulary_size)
        self.softmax = nn.LogSoftmax()
        
    def forward(self, output, hidden):
        output = torch.transpose(self.embedding(output), dim1=0, dim2=1)
        hidden = hidden.view(1, hidden.size()[0], -1)
        output, hidden = self.gru(output, hidden)
        output = F.log_softmax(torch.transpose(self.out(output), dim1=2, dim2=0))
        output = torch.transpose(output, dim1=0, dim2=1)
        output = torch.transpose(output, dim1=1, dim2=2)
        return output, hidden

In [7]:
class Model(nn.Module):
    
    def __init__(self, users_count=192403, # defaults are for Electronics Dataset
                 items_count=63001,
                 latent_factors_count=300,
                 vocabulary_size=70294,
                 context_size=400,
                 hidden_size=400,
                 max_tip_len=22):
        super(Model, self).__init__()
        self.SEQ_START_ID = 2 # TODO accept vocabulary take ID from it
        self.encoder = EncoderModel(users_count=users_count,
                                    items_count=items_count,
                                    latent_factors_count=latent_factors_count,
                                    vocabulary_size=vocabulary_size,
                                    context_size=context_size,
                                    hidden_size=hidden_size)
        
        self.decoder = DecoderModel(hidden_size=hidden_size,
                                    context_size=context_size,
                                    vocabulary_size=vocabulary_size)
        
        self.max_tip_len = max_tip_len
        self.empty_output = [[self.SEQ_START_ID] * self.max_tip_len]
        self.vocabulary_size = vocabulary_size
        
    def forward(self, input_user, input_item):
        regression_result, review_softmax, context = self.encoder.forward(input_user, input_item)
        output_tip_probs = Variable(device.LongTensor(self.empty_output * len(input_user))) 
        output, hidden = self.decoder.forward(output_tip_probs, context)
        return regression_result, review_softmax, output
    
    def voc_size(self):
        return self.vocabulary_size

In [8]:
def review_loss(c_hat, c):
    assert c_hat.size() == c.size(), '{} != {}'.format(c_hat.size(), c.size()) 
    return torch.mul(c_hat, c.float()).sum()

In [9]:
class Trainer(object):
    
    def __init__(self, model):
        self.model = model
        
        self.loss_criterion = (
            lambda r, r_hat, c, c_hat, s, s_hat:
                nn.MSELoss()(r_hat, r) + review_loss(c_hat, c) + nn.NLLLoss()(s_hat, s)
        )
        self.optimizer = optim.Adadelta(model.parameters(),
                                        weight_decay=0.0001) # L2 regularisation is included here
    
    def train(self, train_iter, n_epochs=10): # TODO change n_epochs to 1000
        losses = []
        
        for epoch_i in range(n_epochs):
            for batch in tqdm(train_iter, desc="epoch %d / %d" % (epoch_i, n_epochs)):
                # TODO extract info from the batch
                users_batch = batch.user
                items_batch = batch.item
                ratings_batch = batch.rating
                reviews_batch = batch.text
                tips_batch = torch.transpose(batch.tips, dim1=0, dim2=1)

                regression_result, review_softmax, tips_output = self.model.forward(users_batch, items_batch)
                self.optimizer.zero_grad()
                
                loss = self.loss_criterion(ratings_batch, regression_result,
                                           reviews_batch, review_softmax,
                                           tips_batch.contiguous().view(-1),
                                           tips_output.contiguous().view(-1, model.voc_size()))
                losses.append(loss.data.cpu().numpy())

                loss.backward()
                self.optimizer.step()
                gc.collect()
                # TODO print statistics from training / validation
        return losses

In [None]:
text_vocab, tips_vocab, train_iter, val_iter, test_iter = (
    amazon_dataset_iters('./data/reviews_Movies_and_TV_5/')
)

Loading datasets...
datasets loaded
item vocab built
user vocab built
text vocab built
tips vocab built


In [13]:
len(text_vocab.itos)

117340

In [14]:
# with device.device(0):
model = Model(vocabulary_size=len(text_vocab.itos),
              items_count=50052,
              users_count=123960, context_size=50, hidden_size=50).cuda()
trainer = Trainer(model)

In [None]:
# test_iter.train = True
history = trainer.train(train_iter, n_epochs=20)

epoch 0 / 20:   0%|          | 42/42439 [15:36<262:33:06, 22.29s/it]

In [None]:
plt.plot(-np.array(history))
plt.yscale('log')

## Stuff for testing

In [13]:
small_text_vocab, small_tips_vocab, small_train_iter, small_val_iter, small_test_iter = (
    amazon_dataset_iters('./data/sample_dataset/')
)

Loading datasets...
datasets loaded
item vocab built
user vocab built
text vocab built
tips vocab built


In [15]:
# with device.device(0):
model = Model(vocabulary_size=len(small_text_vocab.itos),
              items_count=50052,
              users_count=123960, context_size=50, hidden_size=50).cuda()
trainer = Trainer(model)

In [16]:
history = trainer.train(small_train_iter, n_epochs=1000)

epoch 0 / 1000: 100%|██████████| 3/3 [00:00<00:00,  7.47it/s]
epoch 1 / 1000: 100%|██████████| 3/3 [00:00<00:00, 11.93it/s]
epoch 2 / 1000: 100%|██████████| 3/3 [00:00<00:00,  8.91it/s]
epoch 3 / 1000: 100%|██████████| 3/3 [00:00<00:00, 11.49it/s]
epoch 4 / 1000: 100%|██████████| 3/3 [00:00<00:00, 12.22it/s]
epoch 5 / 1000: 100%|██████████| 3/3 [00:00<00:00, 11.90it/s]
epoch 6 / 1000: 100%|██████████| 3/3 [00:00<00:00, 12.01it/s]
epoch 7 / 1000: 100%|██████████| 3/3 [00:00<00:00, 10.07it/s]
epoch 8 / 1000: 100%|██████████| 3/3 [00:00<00:00, 12.17it/s]
epoch 9 / 1000: 100%|██████████| 3/3 [00:00<00:00, 11.61it/s]
epoch 10 / 1000:   0%|          | 0/3 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [13]:
first_val_batch = next(iter(small_train_iter))

In [14]:
first_val_batch.user

Variable containing:
 64
 63
 67
 69
 28
 12
 81
  5
  4
 21
  2
 14
 57
 46
 47
 65
 77
 35
 43
 22
 71
 24
 55
 26
  6
 27
 34
  1
 25
 68
 48
  8
[torch.cuda.LongTensor of size 32 (GPU 0)]

In [20]:
out = model.forward(first_val_batch.user, first_val_batch.item)

In [26]:
first_val_batch.batch_size

81

In [27]:
batch_predict = out[2]

In [31]:
torch.save(batch_predict, 'batch_predict_sample.trch')

In [32]:
import pickle

In [35]:
with open('vocab_itos_sample.pkl', 'wb') as f:
    pickle.dump(small_text_vocab.itos, f)
with open('vocab_stoi_sample.pkl', 'wb') as f:
    pickle.dump(small_text_vocab.stoi, f)