In [1]:
import os
import pandas as pd
import numpy as np
import torch
from surprise import Dataset as SurpriseDataset, Reader, SVDpp
import pickle

In [2]:
data_dir = '/home/Soufiane/Desktop/CIL/BayesWatch_CollaborativeFiltering/Data/'
output_dir = '/home/Soufiane/Desktop/CIL/BayesWatch_CollaborativeFiltering/Baselines/Seeded LightGCN/svd_embedding/'
n_factors = 128
svd_epochs = 100
seed = 0


In [3]:

# Data Loading
def read_ratings(data_dir: str):
    """
    Load explicit ratings from train_ratings.csv.
    Returns DataFrame with columns ['sid','pid','rating'].
    """
    path = os.path.join(data_dir, 'train_ratings.csv')
    if not os.path.exists(path):
        raise FileNotFoundError(f"Ratings file not found: {path}")
    df = pd.read_csv(path, usecols=['sid_pid','rating'], dtype={'sid_pid': str, 'rating': float})
    df[['sid','pid']] = df['sid_pid'].str.split('_', expand=True).astype(int)
    return df[['sid','pid','rating']]


In [4]:
# SVD++ Training
def train_svdpp(ratings_df, n_factors: int, n_epochs: int, seed: int = 0):
    """
    Train SVD++ on explicit ratings only.
    Returns trained Surprise algorithm and the corresponding trainset.
    """
    np.random.seed(seed)
    torch.manual_seed(seed)

    reader = Reader(rating_scale=(1,5))
    data = SurpriseDataset.load_from_df(ratings_df[['sid','pid','rating']], reader)
    trainset = data.build_full_trainset()

    algo = SVDpp(n_factors=n_factors, n_epochs=n_epochs, random_state=seed)
    algo.fit(trainset)
    return algo, trainset

In [5]:
# Make output dir
os.makedirs(output_dir, exist_ok=True)

# Load data and train
ratings   = read_ratings(data_dir)
algo, trainset = train_svdpp(ratings, n_factors, svd_epochs, seed)

# Extract embeddings and biases from svdpp
pu = algo.pu
qi = algo.qi
bu = algo.bu 
bi = algo.bi

# raw user id -> inner user id
user_map = trainset._raw2inner_id_users   
# raw item id -> inner item id
item_map = trainset._raw2inner_id_items    


In [6]:
# Save embeddings and mappings
save_dict = {
    'pu': pu,
    'qi': qi,
    'bu': bu,
    'bi': bi,
    'user_map': user_map,
    'item_map': item_map
}
torch.save(save_dict, os.path.join(output_dir, f'svd_embeddings.pt'))

# Save the trained Surprise model
with open(os.path.join(output_dir, f'svd_model.pkl'), 'wb') as f:
    pickle.dump(algo, f)

print(f"Saved embeddings to {output_dir}/svd_embeddings.pt "
      f"and model to {output_dir}/svd_model.pkl")

Saved embeddings to /home/Soufiane/Desktop/CIL/BayesWatch_CollaborativeFiltering/Baselines/Seeded LightGCN/svd_embedding//svd_embeddings.pt and model to /home/Soufiane/Desktop/CIL/BayesWatch_CollaborativeFiltering/Baselines/Seeded LightGCN/svd_embedding//svd_model.pkl
