This notebook allow to test init functions similar to the uniform init. However, they all performed worse than the normal uniform.
The selection of the function can be made in the last cell by changing the parameter init_star with one of the name present in the second cell.

In [None]:
import torch
from torch import nn, optim
import numpy as np
import pandas as pd
from torch.nn.init import xavier_uniform_
from sklearn.model_selection import train_test_split

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
np.random.seed(42)
torch.manual_seed(42)

In [None]:
def prepare_data(data_path):
    """
    Prepare data for training and validation.

    Args:
        data_path (str): Path to the dataset.

    Returns:
        tuple: Prepared data, indices, and metadata.
    """
    df_train = pd.read_csv(data_path + 'train.csv')
    book_ids = df_train['book_id'].unique()
    user_ids = df_train['user_id'].unique()

    n_books = len(book_ids)
    n_users = len(user_ids)

    book_idx = {ids: i for i, ids in enumerate(book_ids)}
    user_idx = {ids: i for i, ids in enumerate(user_ids)}

    user_assigned_idx = torch.LongTensor([user_idx[i] for i in df_train['user_id'].values]).to(device)
    book_assigned_idx = torch.LongTensor([book_idx[i] for i in df_train['book_id'].values]).to(device)
    ratings = torch.FloatTensor(df_train['rating'].values).to(device)

    df_train['user_idx'] = df_train['user_id'].map(user_idx)
    df_train['book_idx'] = df_train['book_id'].map(book_idx)
    train_data, val_data = train_test_split(df_train, test_size=0.01, random_state=42)

    return (train_data, val_data, user_assigned_idx, book_assigned_idx, ratings, book_idx, user_idx, n_users, n_books)

class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_books, embedding_size, var, init):
        super().__init__()
        self.P = nn.Embedding(n_users, embedding_size)
        self.Q = nn.Embedding(n_books, embedding_size)

        if init == 'uniform':
            self.P.weight.data.uniform_(0, var)
            self.Q.weight.data.uniform_(0, var)
        elif init == 'noisy_uniform':
            self.P.weight.data.uniform_(-var, var)
            self.Q.weight.data.uniform_(-var, var)
            noise = torch.randn_like(self.P.weight) * (var / 10)
            self.P.weight.data += noise
            noise = torch.randn_like(self.Q.weight) * (var / 10)
            self.Q.weight.data += noise
        elif init == 'clipped_uniform':
            self.P.weight.data.uniform_(-var, var)
            self.P.weight.data = self.P.weight.data.clamp(-var / 2, var / 2)
            self.Q.weight.data.uniform_(-var, var)
            self.Q.weight.data = self.Q.weight.data.clamp(-var / 2, var / 2)
        elif init == 'lecun_uniform':
            fan_in = self.P.embedding_dim
            bound = 1 / fan_in**0.5
            self.P.weight.data.uniform_(-bound, bound)
            self.Q.weight.data.uniform_(-bound, bound)
        elif init == 'mean_shifted_uniform':
            self.P.weight.data.uniform_(-var, var)
            self.Q.weight.data.uniform_(-var, var)
            self.P.weight.data += 0.1  # Shift the mean
            self.Q.weight.data += 0.1  # Shift the mean

    def forward(self, user_id, book_id):
        user_vec = self.P(user_id)
        book_vec = self.Q(book_id)
        return (user_vec * book_vec).sum(1)

def train_model(user_assigned_idx, book_assigned_idx, ratings, n_users, n_books, embedding_size=250, var=0.01, init='uniform',
                decay=1e-6, lr=1e-4, lambda_=0, n_epochs=1500, verbose=False):
    """
    Train the Matrix Factorization model.

    Args:
        user_assigned_idx (torch.Tensor): User indices.
        book_assigned_idx (torch.Tensor): Book indices.
        ratings (torch.Tensor): Ratings.
        n_users (int): Number of users.
        n_books (int): Number of books.
        embedding_size (int): Size of embedding.
        var (float): Initialization variance.
        init (str): Initialization method ('uniform', 'normal', 'xavier').
        decay (float): Weight decay.
        lr (float): Learning rate.
        lambda_ (float): Regularization parameter.
        n_epochs (int): Number of epochs.
        verbose (bool): Verbosity flag.

    Returns:
        MatrixFactorization: Trained model.
    """
    model = MatrixFactorization(n_users, n_books, embedding_size, var, init).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=decay)
    mse_metric = nn.MSELoss()

    model.train()
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        r_hat = model(user_assigned_idx, book_assigned_idx)
        mse = mse_metric(r_hat, ratings)
        rmse = torch.sqrt(mse)
        loss = mse + lambda_ * (torch.mean(r_hat) - 2.5) ** 2
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 100 == 0 and verbose:
            print(f'Epoch {epoch + 1}/{n_epochs}, RMSE: {rmse.item()}, Mean Rating: {torch.mean(r_hat)}')

    return model

def validate_model(model, user_assigned_idx_val, book_assigned_idx_val, ratings_val):
    """
    Validate the model on unseen data.

    Args:
        model (MatrixFactorization): Trained model.
        user_assigned_idx_val (torch.Tensor): Validation user indices.
        book_assigned_idx_val (torch.Tensor): Validation book indices.
        ratings_val (torch.Tensor): Validation ratings.

    Returns:
        tuple: RMSE and mean predicted rating.
    """
    model.eval()
    r_hat = model(user_assigned_idx_val, book_assigned_idx_val)
    r_hat_clipped = torch.clamp(r_hat, 1, 5)
    mse_metric = nn.MSELoss()
    err = mse_metric(r_hat_clipped, ratings_val)
    rmse = torch.sqrt(err).item()
    mean_rating = torch.mean(r_hat_clipped).item()
    return rmse, mean_rating


In [None]:
def write_submission(model, df_test, user_idx, book_idx, output_path='submission.csv'):
    """
    Generate predictions for the test set and save to CSV.

    Args:
        model (MatrixFactorization): Trained model.
        df_test (pd.DataFrame): Test data.
        user_idx (dict): User index mapping.
        book_idx (dict): Book index mapping.
        output_path (str): Path to save submission file.

    Returns:
        pd.DataFrame: Submission DataFrame.
    """
    model.eval()
    test_user_assigned_idx = torch.LongTensor([user_idx[i] for i in df_test['user_id'].values]).to(device)
    test_book_assigned_idx = torch.LongTensor([book_idx[i] for i in df_test['book_id'].values]).to(device)
    predicted_ratings = model(test_user_assigned_idx, test_book_assigned_idx)
    predicted_ratings_clipped = torch.clamp(predicted_ratings, 1, 5)
    final = [rating.item() for rating in predicted_ratings_clipped]

    submission = pd.DataFrame({'id': range(len(df_test)), 'rating': final})
    print(f'Saving submission to {output_path}')
    submission.to_csv(output_path, index=False)
    return submission

In [None]:
# Example usage
data_path = "./data/"
train_data, val_data, user_assigned_idx, book_assigned_idx, ratings, book_idx, user_idx, n_users, n_books = prepare_data(data_path)

# Optimal hyperparameters
d_star = 250
lr_star = 1e-4
var_star = 1e-4
decay_star = 1e-6
N_EPOCH_STAR = 1500
lambda_star = 0
init_star = 'uniform'

# Train on whole dataset
model = train_model(user_assigned_idx, book_assigned_idx, ratings, n_users, n_books, embedding_size=d_star, var=var_star, init=init_star,
                decay=decay_star, lr=lr_star, lambda_=lambda_star, n_epochs=N_EPOCH_STAR, verbose=False)

# Generate submission
df_test = pd.read_csv(data_path + '/test.csv')
submission = write_submission(model, df_test, user_idx, book_idx)