In [1]:
import time
import torch
import random
import numpy as np

from Setting import Setting
from Dataset import Dataset
from Environment import Environment

import utils.models as models
import utils.metrics as metrics

torch.cuda.is_available()

False

# General settings

In [2]:
# Set the random seed for reproducibility.
random_seed = 1
#tf.random.set_random_seed(random_seed)
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

args = Setting()
args.windowed_dataset = False
device = args.device
dataset = Dataset(args.dataset_path, args.batch_size, args.fast_testing)
padding_number = dataset.num_items
env = Environment(args, dataset, padding_number).to(device)

# Import dataset

In [3]:
train_seq_base_data = env.train_seq_base_data
train_seq_base_data.shape

torch.Size([70829, 20])

In [4]:
test_seq_base_data = env.test_seq_base_data
test_seq_base_data.shape

torch.Size([14913, 20])

In [5]:
train_seq_target_data = env.train_seq_target_data
train_seq_target_data.shape

torch.Size([70829, 1])

In [6]:
train_base_data = env.train_base_data
train_base_data.shape

torch.Size([70829, 18])

In [7]:
test_base_data = env.test_base_data
test_base_data.shape

torch.Size([14913, 18])

In [8]:
true_train_data = train_base_data

In [9]:
true_test_data = test_base_data

In [10]:
train_target_data = env.train_target_data
train_target_data.shape

torch.Size([70829, 18])

In [11]:
test_target_data = env.test_target_data
test_target_data.shape

torch.Size([14913, 18])

In [12]:
true_train_data, masked_train_ref = env.mask_data(train_base_data, train_target_data)
print(true_train_data.shape)
print(masked_train_ref.shape)

torch.Size([70829, 18])
torch.Size([70829, 18])


In [13]:
true_test_data, masked_test_ref = env.mask_data(test_base_data, test_target_data)
print(true_test_data.shape)
print(masked_test_ref.shape)

torch.Size([14913, 18])
torch.Size([14913, 18])


# EASE

In [14]:
learning_rate = 1e-3
weight_decay = 0.00
epochs = 20
n_users = dataset.n_users
n_categories = dataset.n_categories
lambda_p = 3000

## Functions

In [15]:
# We create a tensor to store the most recent profiles of each user.
train_profiles = torch.zeros(n_users, n_categories).to(device)
train_profiles.shape

torch.Size([943, 18])

In [16]:
for i in range(len(train_seq_base_data)):
    # We find the user of the current item.
    user_id = int(train_seq_base_data[i][0].item())
    # We add the past profile of the user to the train data.
    train_profiles[user_id] = train_base_data[i] 

In [17]:
def train():
    # We use the previously computed profiles as train data.
    model.fit(train_profiles)

In [18]:
def evaluate():
    recon_data = []

    for i in range(len(test_seq_base_data)):
        # We find the user of the current item.
        user_id = int(test_seq_base_data[i][0].item())
        user_data = test_base_data[i]
        user_data = torch.unsqueeze(user_data, axis=0).to(device)

        # We reconstruct the user profile.
        user_recon_data = model.predict(user_data, user_id=user_id)
        recon_data.append(user_recon_data)
        
        # We add the unmasked true data to the train profiles.
        train_profiles[user_id] = test_base_data[i] 
        
        # We compute again the weight matrix B.
        model.fit(train_profiles)
        
    recon_data = torch.vstack(recon_data).to(device)
    
    # We compute the RMSE values.
    rmse_total, rmse_explicit, rmse_implicit = metrics.rmse_with_mask(test_base_data, true_test_data, recon_data, test_target_data, masked_test_ref)
    
    return rmse_total, rmse_explicit, rmse_implicit

In [19]:
model_name = 'ease'

#######################################################################
# Build the model
#######################################################################
model = models.EASE(lambda_p)

#######################################################################
# Training
#######################################################################
rmse_total_list, rmse_explicit_list, rmse_implicit_list = [], [], []
train_times, test_times = [], []
for epoch in range(1):
    start_train_time = time.time()
    train()
    end_train_time = time.time() - start_train_time
    train_times.append(end_train_time)
    
    start_test_time = time.time()
    rmse_total, rmse_explicit, rmse_implicit = evaluate()
    rmse_total_list.append(rmse_total)
    rmse_explicit_list.append(rmse_explicit)
    rmse_implicit_list.append(rmse_implicit)
    end_test_time = time.time() - start_test_time
    test_times.append(end_test_time)
    
    print("| Epoch {:d} | Total RMSE: {:5.4f} ± {:5.4f} | "
          "Explicit RMSE: {:5.4f} ± {:5.4f} | "
          "Implicit RMSE: {:5.4f} ± {:5.4f} | ".
          format(epoch, torch.mean(rmse_total), torch.std(rmse_total),
                 torch.mean(rmse_explicit), torch.std(rmse_explicit),
                 torch.mean(rmse_implicit), torch.std(rmse_implicit)))
print("| Total time: {:5.4f} s. | Train time: {:5.4f} s. | "
      "Test time: {:5.4f} s. | ".
      format(np.sum(np.concatenate((train_times, test_times), axis=0)), 
             np.sum(train_times), 
             np.sum(test_times)))
with open('./models/model_ease_predict.pt', 'wb') as file:
    torch.save(model.state_dict(), file)

| Epoch 0 | Total RMSE: 0.8515 ± 0.2387 | Explicit RMSE: 0.8493 ± 0.2355 | Implicit RMSE: 1.6425 ± 1.0592 | 
| Total time: 8.7714 s. | Train time: 0.0006 s. | Test time: 8.7707 s. | 
