In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from utils.matrix_factor_utils import mask_datasets, loss_function, recall_at_k, precision_at_k
from models.matrix_factorization import MatrixFactorization
from utils.data_utils import build_user_item_interactions, get_positive_negative_ratings

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
from utils.preprocess import load_dataset

# Load the dataset
dataset = 'goodbooks-10k'
users, items, train_ratings, test_ratings, items_features_tensor, user_features_tensor = load_dataset(dataset)

In [8]:
rating_matrix_train, mask_matrix_train, \
rating_matrix_val, mask_matrix_val, \
rating_matrix_test, mask_matrix_test = mask_datasets(device, train_ratings, test_ratings, val_ratio=0.1)

In [9]:
positive_threshold = 5
negative_threshold = 3

train_user_item_dict = build_user_item_interactions(train_ratings)
test_user_item_dict = build_user_item_interactions(test_ratings)
train_user_ratings = get_positive_negative_ratings(train_user_item_dict, positive_threshold, negative_threshold)
test_user_ratings = get_positive_negative_ratings(test_user_item_dict, positive_threshold, negative_threshold)

In [10]:
latent_dim = 64      # Number of latent factors
alpha = 0.01         # Regularization parameter for user factors
beta = 0.01          # Regularization parameter for item factors
num_epochs = 500     # Number of training epochs
learning_rate = 0.001
k = 10

In [11]:
# Initialize the model
num_users, num_items = rating_matrix_train.shape

model = MatrixFactorization(num_users, num_items, latent_dim, rating_matrix_train).to(device)
early_stop = 0
best_val_rmse = float('inf')

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(1, num_epochs + 1):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    pred_ratings = model()
    
    # Compute loss
    loss = loss_function(pred_ratings, rating_matrix_train, mask_matrix_train, model, alpha, beta)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    # Compute RMSE on observed ratings
    with torch.no_grad():
        train_mse = ((mask_matrix_train * (rating_matrix_train - pred_ratings)) ** 2).sum() / mask_matrix_train.sum()
        train_rmse = torch.sqrt(train_mse)

        val_mse = ((mask_matrix_val * (rating_matrix_val - pred_ratings)) ** 2).sum() / mask_matrix_val.sum()
        val_rmse = torch.sqrt(val_mse)

        test_mse = ((mask_matrix_test * (rating_matrix_test - pred_ratings)) ** 2).sum() / mask_matrix_test.sum()
        test_rmse = torch.sqrt(test_mse)

    # Early stopping
    if val_rmse < best_val_rmse:
        best_val_rmse = val_rmse
        early_stop = 0
    else:
        early_stop += 1
        if early_stop == 10:
            print(f'Converged at epoch {epoch}')
            break
    
    # Print progress
    if epoch % 10 == 0:
        recall = recall_at_k(train_user_ratings, model.embeddings, k=10)
        precision = precision_at_k(train_user_ratings, model.embeddings, k=10)
        print(f'Epoch [{epoch}/{num_epochs}], Train Loss: {loss.item():.4f}, Train RMSE: {train_rmse.item():.4f}, Val RMSE: {val_rmse.item():.4f}, Test RMSE: {test_rmse.item():.4f}')
        print(f'Recall@{k}: {recall:.4f}, Precision@{k}: {precision:.4f}')

Epoch [10/500], Train Loss: 88318.1800, Train RMSE: 0.9767, Val RMSE: 0.9923, Test RMSE: 0.9867
Recall@10: 0.1951, Precision@10: 0.4376
Epoch [20/500], Train Loss: 86591.4186, Train RMSE: 0.9671, Val RMSE: 0.9871, Test RMSE: 0.9816
Recall@10: 0.1995, Precision@10: 0.4475
Epoch [30/500], Train Loss: 84321.4773, Train RMSE: 0.9543, Val RMSE: 0.9815, Test RMSE: 0.9760
Recall@10: 0.2012, Precision@10: 0.4512
Epoch [40/500], Train Loss: 81353.8064, Train RMSE: 0.9373, Val RMSE: 0.9750, Test RMSE: 0.9697
Recall@10: 0.2038, Precision@10: 0.4572
Epoch [50/500], Train Loss: 77606.1902, Train RMSE: 0.9155, Val RMSE: 0.9674, Test RMSE: 0.9623
Recall@10: 0.2081, Precision@10: 0.4668
Epoch [60/500], Train Loss: 73091.4309, Train RMSE: 0.8884, Val RMSE: 0.9587, Test RMSE: 0.9538
Recall@10: 0.2122, Precision@10: 0.4759
Epoch [70/500], Train Loss: 67922.2316, Train RMSE: 0.8564, Val RMSE: 0.9492, Test RMSE: 0.9446
Recall@10: 0.2147, Precision@10: 0.4816
Epoch [80/500], Train Loss: 62296.1685, Train RM

In [12]:
recall = recall_at_k(test_user_ratings, model.embeddings, k=10)
precision = precision_at_k(test_user_ratings, model.embeddings, k=10)

print(f'Test Recall@{k}: {recall:.4f}, Test Precision@{k}: {precision:.4f}')

Test Recall@10: 0.8497, Test Precision@10: 0.7057
