In [1]:
import numpy as np

import torch
from torch import optim

import random
from copy import deepcopy

from utils import get_data, ndcg, recall
from model_try import VAE

In [2]:
seed = 1337
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

hidden_dim = 600
latent_dim = 200
batch_size = 500
beta = None
gamma = 0.005
lr = 5e-4
n_epochs = 10
enc_epochs = 3
dec_epochs = 1
not_alternating = False

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0")
#device = torch.device("cpu")

data = get_data('dataset/Ori')
train_data, valid_in_data, valid_out_data, test_in_data, test_out_data = data

(116677, 20108)


#Try
max = 0
for i in range(len(train_data)):
    if len(train_data.iloc[i][0]) > max:
        max = len(train_data.iloc[i][0])
print(max)

new_train = 
for i in range(len(train_data)):
    if len(train_data.iloc[i][0]) > max:
        max = len(train_data.iloc[i][0])

In [3]:
def generate(batch_size, device, data_in, data_out=None, shuffle=False, samples_perc_per_epoch=1):
    assert 0 < samples_perc_per_epoch <= 1
    
    total_samples = data_in.shape[0]
    samples_per_epoch = int(total_samples * samples_perc_per_epoch)
    
    if shuffle:
        idxlist = np.arange(total_samples)
        np.random.shuffle(idxlist)
        idxlist = idxlist[:samples_per_epoch]
    else:
        idxlist = np.arange(samples_per_epoch)
    
    for st_idx in range(0, samples_per_epoch, batch_size):
        end_idx = min(st_idx + batch_size, samples_per_epoch)
        idx = idxlist[st_idx:end_idx]

        yield Batch(device, idx, data_in, data_out)

In [4]:
class Batch:
    def __init__(self, device, idx, data_in, data_out=None):
        self._device = device
        self._idx = idx
        self._data_in = data_in
        self._data_out = data_out
    
    def get_idx(self):
        return self._idx
    
    def get_idx_to_dev(self):
        return torch.LongTensor(self.get_idx()).to(self._device)
        
    def get_ratings(self, is_out=False):
        data = self._data_out if is_out else self._data_in
        return data[self._idx]
    
    def get_ratings_to_dev(self, is_out=False):
        return torch.Tensor(
            self.get_ratings(is_out).toarray()
        ).to(self._device)

In [5]:
def evaluate(model, data_in, data_out, metrics, samples_perc_per_epoch=1, batch_size=500):
    metrics = deepcopy(metrics)
    model.eval()
    
    for m in metrics:
        m['score'] = []
    
    for batch in generate(batch_size=batch_size,
                          device=device,
                          data_in=data_in,
                          data_out=data_out,
                          samples_perc_per_epoch=samples_perc_per_epoch
                         ):
        
        ratings_in = batch.get_ratings_to_dev()
        ratings_out = batch.get_ratings(is_out=True)
    
        ratings_pred = model(ratings_in, calculate_loss=False).cpu().detach().numpy()
        
        if not (data_in is data_out):
            ratings_pred[batch.get_ratings().nonzero()] = -np.inf
            
        for m in metrics:
            m['score'].append(m['metric'](ratings_pred, ratings_out, k=m['k']))

    for m in metrics:
        m['score'] = np.concatenate(m['score']).mean()
        
    return [x['score'] for x in metrics]

In [6]:
def run(model, opts, train_data, batch_size, n_epochs, beta, gamma, dropout_rate):
    model.train()
    for epoch in range(n_epochs):
        for batch in generate(batch_size=batch_size, device=device, data_in=train_data, shuffle=True):
            ratings = batch.get_ratings_to_dev()

            for optimizer in opts:
                optimizer.zero_grad()
                
            loss = model(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate)
            loss.backward()
            
            for optimizer in opts:
                optimizer.step()
                
        #print(loss)

In [7]:
model_kwargs = {
    'hidden_dim': hidden_dim,
    'latent_dim': latent_dim,
    'input_dim': train_data.shape[1]
}
metrics = [{'metric': ndcg, 'k': 100}]

best_ndcg = -np.inf
train_scores, valid_scores = [], []

model = VAE(**model_kwargs).to(device)
model_best = VAE(**model_kwargs).to(device)

learning_kwargs = {
    'model': model,
    'train_data': train_data,
    'batch_size': batch_size,
    'gamma': gamma,
}

In [8]:
print(type(best_ndcg))
print(best_ndcg)

<class 'float'>
-inf


In [9]:
decoder_params = set(model.decoder.parameters())
tecoder_params = set(model.tecoder.parameters())
encoder_params = set(model.encoder.parameters())

optimizer_encoder = optim.Adam(encoder_params, lr=lr)
optimizer_decoder = optim.Adam(decoder_params, lr=lr)
optimizer_tecoder = optim.Adam(tecoder_params, lr=lr)

In [10]:
model

VAE(
  (encoder): Encoder(
    (fc1): Linear(in_features=20108, out_features=600, bias=True)
    (ln1): LayerNorm((600,), eps=0.1, elementwise_affine=True)
    (fc2): Linear(in_features=600, out_features=600, bias=True)
    (ln2): LayerNorm((600,), eps=0.1, elementwise_affine=True)
    (fc3): Linear(in_features=600, out_features=600, bias=True)
    (ln3): LayerNorm((600,), eps=0.1, elementwise_affine=True)
    (fc4): Linear(in_features=600, out_features=600, bias=True)
    (ln4): LayerNorm((600,), eps=0.1, elementwise_affine=True)
    (fc5): Linear(in_features=600, out_features=600, bias=True)
    (ln5): LayerNorm((600,), eps=0.1, elementwise_affine=True)
    (fc_mu): Linear(in_features=600, out_features=200, bias=True)
    (fc_logvar): Linear(in_features=600, out_features=200, bias=True)
  )
  (prior): CompositePrior(
    (encoder_old): Encoder(
      (fc1): Linear(in_features=20108, out_features=600, bias=True)
      (ln1): LayerNorm((600,), eps=0.1, elementwise_affine=True)
      (f

In [11]:
for epoch in range(n_epochs):

    if not_alternating:
        run(opts=[optimizer_encoder, optimizer_decoder], n_epochs=1, dropout_rate=0.5, **learning_kwargs)
    else:
        run(opts=[optimizer_encoder], n_epochs=enc_epochs, dropout_rate=0.5, beta=False, **learning_kwargs)
        model.update_prior()
        run(opts=[optimizer_decoder], n_epochs=dec_epochs, dropout_rate=0, beta=False, **learning_kwargs)
        run(opts=[optimizer_tecoder], n_epochs=dec_epochs, dropout_rate=0, beta=True, **learning_kwargs)

    train_scores.append(
        evaluate(model, train_data, train_data, metrics, 0.01)[0]
    )
    valid_scores.append(
        evaluate(model, valid_in_data, valid_out_data, metrics, 1)[0]
    )
    
    if valid_scores[-1] > best_ndcg:
        best_ndcg = valid_scores[-1]
        model_best.load_state_dict(deepcopy(model.state_dict()))
        

    print(f'epoch {epoch} | valid ndcg@100: {valid_scores[-1]:.4f} | ' +
          f'best valid: {best_ndcg:.4f} | train ndcg@100: {train_scores[-1]:.4f}')

RuntimeError: CUDA out of memory. Tried to allocate 1.52 GiB (GPU 0; 6.00 GiB total capacity; 3.88 GiB already allocated; 556.39 MiB free; 3.91 GiB reserved in total by PyTorch)

In [None]:
test_metrics = [{'metric': ndcg, 'k': 100}, {'metric': recall, 'k': 20}, {'metric': recall, 'k': 50}]

final_scores = evaluate(model_best, test_in_data, test_out_data, test_metrics)
for metric, score in zip(test_metrics, final_scores):
    print(f"{metric['metric'].__name__}@{metric['k']}:\t{score:.4f}")