In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from utils.matrix_factor_utils import mask_datasets, loss_function, recall_at_k, precision_at_k
from models.matrix_factorization import MatrixFactorization
from utils.data_utils import build_user_item_interactions, get_positive_negative_ratings

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
from utils.preprocess import load_dataset

# Load the dataset
dataset = 'goodbooks-10k'
users, items, train_ratings, test_ratings, items_features_tensor, user_features_tensor = load_dataset(dataset)

  from tqdm.autonotebook import tqdm, trange


In [4]:
rating_matrix_train, mask_matrix_train, \
rating_matrix_val, mask_matrix_val, \
rating_matrix_test, mask_matrix_test = mask_datasets(device, train_ratings, test_ratings, val_ratio=0.1)

In [5]:
positive_threshold = 5
negative_threshold = 3

train_user_item_dict = build_user_item_interactions(train_ratings)
test_user_item_dict = build_user_item_interactions(test_ratings)
train_user_ratings = get_positive_negative_ratings(train_user_item_dict, positive_threshold, negative_threshold)
test_user_ratings = get_positive_negative_ratings(test_user_item_dict, positive_threshold, negative_threshold)

In [6]:
latent_dim = 64      # Number of latent factors
alpha = 0.01         # Regularization parameter for user factors
beta = 0.01          # Regularization parameter for item factors
num_epochs = 500     # Number of training epochs
learning_rate = 0.001
k = 10

In [7]:
# Initialize the model
num_users, num_items = rating_matrix_train.shape

model = MatrixFactorization(num_users, num_items, latent_dim, rating_matrix_train).to(device)
early_stop = 0
best_val_rmse = float('inf')

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(1, num_epochs + 1):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    pred_ratings = model()
    
    # Compute loss
    loss = loss_function(pred_ratings, rating_matrix_train, mask_matrix_train, model, alpha, beta)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    # Compute RMSE on observed ratings
    with torch.no_grad():
        train_mse = ((mask_matrix_train * (rating_matrix_train - pred_ratings)) ** 2).sum() / mask_matrix_train.sum()
        train_rmse = torch.sqrt(train_mse)

        val_mse = ((mask_matrix_val * (rating_matrix_val - pred_ratings)) ** 2).sum() / mask_matrix_val.sum()
        val_rmse = torch.sqrt(val_mse)

        test_mse = ((mask_matrix_test * (rating_matrix_test - pred_ratings)) ** 2).sum() / mask_matrix_test.sum()
        test_rmse = torch.sqrt(test_mse)

    # Early stopping
    if val_rmse < best_val_rmse:
        best_val_rmse = val_rmse
        early_stop = 0
    else:
        early_stop += 1
        if early_stop == 10:
            print(f'Converged at epoch {epoch}')
            break
    
    # Print progress
    if epoch % 10 == 0:
        recall = recall_at_k(train_user_ratings, model.embeddings, k=10)
        precision = precision_at_k(train_user_ratings, model.embeddings, k=10)
        print(f'Epoch [{epoch}/{num_epochs}], Train Loss: {loss.item():.4f}, Train RMSE: {train_rmse.item():.4f}, Val RMSE: {val_rmse.item():.4f}, Test RMSE: {test_rmse.item():.4f}')
        print(f'Recall@{k}: {recall:.4f}, Precision@{k}: {precision:.4f}')

tensor([ 7,  5, 18, 20, 28, 16,  4,  9, 19,  1], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')
no. correct: 5
total positive: 29
tensor([10,  3,  2, 12,  8, 14, 16,  1, 17,  6], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')
no. correct: 5
total positive: 20
tensor([ 0,  9,  3,  2, 23,  5,  8, 25, 19, 17], device='cuda:0')
tensor([ True, False, False, False, False, False, False, False, False, False],
       device='cuda:0')
no. correct: 6
total positive: 1
tensor([ 4, 10, 14, 21, 20,  8, 18,  1, 25, 19], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')
no. correct: 3
total positive: 27
tensor([32, 35,  9, 13, 24, 30,  1, 18, 25, 11], device='cuda:0')
tensor([True, True, True, True, True, True, True, True, True, True],
       device='cuda:0')
no. correct: 2
total positive: 36
tensor([10,  1,  8, 14, 11

In [8]:
recall = recall_at_k(test_user_ratings, model.embeddings, k=10)
precision = precision_at_k(test_user_ratings, model.embeddings, k=10)

print(f'Test Recall@{k}: {recall:.4f}, Test Precision@{k}: {precision:.4f}')

tensor([ 1, 11,  6,  5,  4,  8,  3,  7,  9, 10], device='cuda:0')
tensor([ True, False, False, False, False, False, False, False, False, False],
       device='cuda:0')
no. correct: 8
total positive: 3
tensor([13,  2,  5, 12,  1,  7,  4,  6, 11,  9], device='cuda:0')
tensor([False,  True,  True, False,  True, False,  True, False, False, False],
       device='cuda:0')
no. correct: 7
total positive: 6
tensor([1, 7, 2, 5, 4, 0, 3, 6], device='cuda:0')
tensor([ True, False, False, False, False,  True, False, False],
       device='cuda:0')
no. correct: 8
total positive: 2
tensor([ 8,  5, 10,  3,  0,  1,  7,  4,  2,  6], device='cuda:0')
tensor([ True,  True, False,  True,  True,  True,  True,  True,  True,  True],
       device='cuda:0')
no. correct: 9
total positive: 9
tensor([12,  8, 10, 16, 19, 11,  2, 18, 15,  6], device='cuda:0')
tensor([False,  True, False, False, False, False,  True, False, False,  True],
       device='cuda:0')
no. correct: 3
total positive: 9
tensor([ 8,  0, 10, 

tensor([False,  True, False, False, False,  True,  True,  True,  True,  True],
       device='cuda:0')
no. correct: 6
total positive: 10
tensor([10,  0,  9,  4,  7,  8,  5, 12,  1, 13], device='cuda:0')
tensor([False,  True, False,  True, False, False, False, False,  True, False],
       device='cuda:0')
no. correct: 7
total positive: 5
tensor([9, 2, 0, 5, 3, 6, 1, 7, 8, 4], device='cuda:0')
tensor([False,  True,  True,  True,  True, False,  True, False, False,  True],
       device='cuda:0')
no. correct: 10
total positive: 6
tensor([ 6,  3,  0, 13, 14, 12,  9,  7,  1,  8], device='cuda:0')
tensor([ True,  True,  True, False, False, False, False,  True,  True,  True],
       device='cuda:0')
no. correct: 7
total positive: 9
tensor([ 5, 17,  1, 12,  0,  7,  4,  6, 13, 19], device='cuda:0')
tensor([ True, False,  True, False,  True, False,  True,  True, False, False],
       device='cuda:0')
no. correct: 6
total positive: 7
tensor([12, 13,  6, 11, 10,  8,  1,  2,  3, 14], device='cuda:0'