In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
from utils.preprocess import load_dataset

# Load the dataset
dataset = 'movielens-1m'
users, items, train_ratings, test_ratings, items_features_tensor, user_features_tensor = load_dataset(dataset)

  from tqdm.autonotebook import tqdm, trange


In [4]:
num_users = users.nunique()
num_items = items.nunique()

In [7]:
from utils.graph_splitters import python_stratified_split

def mask_datasets(device, train_ratings, test_ratings, val_ratio=0.1):
    col_user = 'userid'
    col_item = 'itemid'
    col_timestamp = 'timestamp'
    train_ratings, val_ratings = python_stratified_split(train_ratings, ratio=1-val_ratio, col_user=col_user, col_item=col_item, col_timestamp=col_timestamp)
    
    num_users = train_ratings[col_user].max() + 1
    num_items = train_ratings[col_item].max() + 1
    
    # Create rating and mask matrices for train, val and test
    rating_matrix_train = np.zeros(shape=(num_users, num_items))
    mask_matrix_train = np.zeros(shape=(num_users, num_items))
    rating_matrix_val = np.zeros(shape=(num_users, num_items))
    mask_matrix_val = np.zeros(shape=(num_users, num_items))
    rating_matrix_test = np.zeros(shape=(num_users, num_items))
    mask_matrix_test = np.zeros(shape=(num_users, num_items))
    
    for _, r in train_ratings.iterrows():
        rating_matrix_train[int(int(r[0])), int(int(r[1]))] = int(r[2])
        mask_matrix_train[int(r[0]), int(r[1])] = 1
    
    for _, r in val_ratings.iterrows():
        rating_matrix_val[int(r[0]), int(r[1])] = int(r[2])
        mask_matrix_val[int(r[0]), int(r[1])] = 1
    
    for _, r in test_ratings.iterrows():
        rating_matrix_test[int(r[0]), int(r[1])] = int(r[2])
        mask_matrix_test[int(r[0]), int(r[1])] = 1
    
    rating_matrix_train = torch.tensor(rating_matrix_train).to(device)
    mask_matrix_train = torch.tensor(mask_matrix_train).to(device)
    rating_matrix_val = torch.tensor(rating_matrix_val).to(device)
    mask_matrix_val = torch.tensor(mask_matrix_val).to(device)
    rating_matrix_test = torch.tensor(rating_matrix_test).to(device)
    mask_matrix_test = torch.tensor(mask_matrix_test).to(device)
    
    return rating_matrix_train, mask_matrix_train, rating_matrix_val, mask_matrix_val, rating_matrix_test, mask_matrix_test

In [8]:
rating_matrix_train, mask_matrix_train, \
rating_matrix_val, mask_matrix_val, \
rating_matrix_test, mask_matrix_test = mask_datasets(device, train_ratings, test_ratings, val_ratio=0.1)

  rating_matrix_train[int(int(r[0])), int(int(r[1]))] = int(r[2])
  mask_matrix_train[int(r[0]), int(r[1])] = 1
  rating_matrix_val[int(r[0]), int(r[1])] = int(r[2])
  mask_matrix_val[int(r[0]), int(r[1])] = 1
  rating_matrix_test[int(r[0]), int(r[1])] = int(r[2])
  mask_matrix_test[int(r[0]), int(r[1])] = 1


In [13]:
import torch.nn as nn

class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, latent_dim, rating_matrix):
        super(MatrixFactorization, self).__init__()
        # User and item latent factors
        self.user_factors = nn.Parameter(torch.randn(num_users, latent_dim, device=device) * 0.01)
        self.item_factors = nn.Parameter(torch.randn(num_items, latent_dim, device=device) * 0.01)
        
        # User and item biases
        self.user_bias = nn.Parameter(torch.zeros(num_users, 1, device=device))
        self.item_bias = nn.Parameter(torch.zeros(1, num_items, device=device))
        
        # Global average rating
        self.global_bias = nn.Parameter(torch.tensor([rating_matrix[rating_matrix != 0].mean()], device=device))
    
    def forward(self):
        # Compute the predicted rating matrix
        interaction = torch.matmul(self.user_factors, self.item_factors.t())
        pred_ratings = interaction + self.user_bias + self.item_bias + self.global_bias
        return pred_ratings


In [14]:
def loss_function(pred_ratings, true_ratings, mask, model, alpha, beta):
    # Compute the squared error only on observed entries
    diff = mask * (true_ratings - pred_ratings)
    mse_loss = (diff ** 2).sum()
    
    # Regularization terms
    reg_loss = alpha * torch.norm(model.user_factors, p=2) ** 2 + \
               beta * torch.norm(model.item_factors, p=2) ** 2
    
    # Total loss
    total_loss = mse_loss + reg_loss
    return total_loss

In [15]:
# Hyperparameters
latent_dim = 64      # Number of latent factors
alpha = 0.01         # Regularization parameter for user factors
beta = 0.01          # Regularization parameter for item factors
num_epochs = 500     # Number of training epochs
learning_rate = 0.001

# Initialize the model
num_users, num_items = rating_matrix_train.shape
model = MatrixFactorization(num_users, num_items, latent_dim, rating_matrix_train).to(device)
early_stop = 0
best_val_rmse = float('inf')

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(1, num_epochs + 1):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    pred_ratings = model()
    
    # Compute loss
    loss = loss_function(pred_ratings, rating_matrix_train, mask_matrix_train, model, alpha, beta)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    # Compute RMSE on observed ratings
    with torch.no_grad():
        train_mse = ((mask_matrix_train * (rating_matrix_train - pred_ratings)) ** 2).sum() / mask_matrix_train.sum()
        train_rmse = torch.sqrt(train_mse)

        val_mse = ((mask_matrix_val * (rating_matrix_val - pred_ratings)) ** 2).sum() / mask_matrix_val.sum()
        val_rmse = torch.sqrt(val_mse)

        test_mse = ((mask_matrix_test * (rating_matrix_test - pred_ratings)) ** 2).sum() / mask_matrix_test.sum()
        test_rmse = torch.sqrt(test_mse)

    # Early stopping
    if val_rmse < best_val_rmse:
        best_val_rmse = val_rmse
        early_stop = 0
    else:
        early_stop += 1
        if early_stop == 10:
            print(f'Converged at epoch {epoch}')
            break
    
    # Print progress
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{num_epochs}], Train Loss: {loss.item():.4f}, Train RMSE: {train_rmse.item():.4f}, Val RMSE: {val_rmse.item():.4f}, Test RMSE: {test_rmse.item():.4f}')

Epoch [10/500], Train Loss: 861276.9565, Train RMSE: 1.1091, Val RMSE: 1.1160, Test RMSE: 1.1110
Epoch [20/500], Train Loss: 845622.4658, Train RMSE: 1.0990, Val RMSE: 1.1071, Test RMSE: 1.1021
Epoch [30/500], Train Loss: 823167.8880, Train RMSE: 1.0843, Val RMSE: 1.0939, Test RMSE: 1.0889
Epoch [40/500], Train Loss: 791775.8227, Train RMSE: 1.0634, Val RMSE: 1.0746, Test RMSE: 1.0697
Epoch [50/500], Train Loss: 752025.1241, Train RMSE: 1.0364, Val RMSE: 1.0493, Test RMSE: 1.0448
Epoch [60/500], Train Loss: 707583.2912, Train RMSE: 1.0053, Val RMSE: 1.0204, Test RMSE: 1.0163
Epoch [70/500], Train Loss: 663756.4469, Train RMSE: 0.9736, Val RMSE: 0.9914, Test RMSE: 0.9878
Epoch [80/500], Train Loss: 624801.4590, Train RMSE: 0.9446, Val RMSE: 0.9657, Test RMSE: 0.9626
Epoch [90/500], Train Loss: 592326.9705, Train RMSE: 0.9197, Val RMSE: 0.9447, Test RMSE: 0.9421
Epoch [100/500], Train Loss: 565877.0459, Train RMSE: 0.8990, Val RMSE: 0.9286, Test RMSE: 0.9262
Epoch [110/500], Train Loss: 