In [31]:
from utils.evaluator import Evaluator
from fastFM import als
import numpy as np
import tqdm

from scipy.sparse import csr_matrix

In [34]:
def build_interaction_matrix(evaluator: Evaluator) -> np.ndarray[float]:
    ratings = []
    users = []
    items = []
    n_users, n_items = len(evaluator.user_mapping), len(evaluator.anime_mapping)
    
    user_id_to_canonical_id = {k: i for i, (k, _) in enumerate(evaluator.user_mapping.items())}
    # Sparse matrix, anime x user
    # Intersection: anime @ anime.T
    for user_id in tqdm.tqdm(evaluator.train_indices):
        user = evaluator.user_mapping[user_id]
        canonical_user_id: int = user_id_to_canonical_id[user_id]
        interacted_ids: np.ndarray = user.masked_watch_history
        interacted_ratings: np.ndarray = user.masked_rating_history
        
        ratings.extend(interacted_ratings.tolist())
        users.extend([canonical_user_id] * len(interacted_ratings))
        items.extend(interacted_ids.tolist())
        
    interaction_matrix = csr_matrix((ratings, (users, items)), shape = (n_users, n_items))
    return interaction_matrix, np.array(ratings)
data_path = "../data/copperunion"
evaluator = Evaluator(data_path, normalize_unrated = True)
interaction_matrix, data = build_interaction_matrix(evaluator)

normalize_unrated=True


parsing animes...: 100%|██████████| 12294/12294 [00:00<00:00, 20229.96it/s]
parsing users...: 100%|██████████| 73515/73515 [00:17<00:00, 4217.25it/s]


Total Animes: 12294
Total Users: 54077


100%|██████████| 54077/54077 [00:00<00:00, 99640.60it/s] 


In [43]:
from fastFM import als

# Create the FastFM model
non_zero_indices = interaction_matrix.nonzero()
y_train = data[non_zero_indices[0]]

print("Shape of interaction_matrix:", interaction_matrix.shape)
print("Shape of y_train:", y_train.shape)
print(f"{interaction_matrix.nnz=}")
print("Type of interaction_matrix:", type(interaction_matrix))
print("Type of y_train:", type(y_train))
print("Any NaN values in y_train:", np.any(np.isnan(y_train)))

Shape of interaction_matrix: (54077, 12294)
Shape of y_train: (7012923,)
interaction_matrix.nnz=7012923
Type of interaction_matrix: <class 'scipy.sparse._csr.csr_matrix'>
Type of y_train: <class 'numpy.ndarray'>
Any NaN values in y_train: False


In [46]:
from fastFM import als

# Create the FastFM model
non_zero_indices = interaction_matrix.nonzero()
y_train = data[non_zero_indices[0]]

print("Shape of interaction_matrix:", interaction_matrix.shape)
print("Shape of y_train:", y_train.shape)
print(f"{interaction_matrix.nnz=}")
print("Type of interaction_matrix:", type(interaction_matrix))
print("Type of y_train:", type(y_train))
print("Any NaN values in y_train:", np.any(np.isnan(y_train)))
unique_indices = set(zip(non_zero_indices[0], non_zero_indices[1]))
print("Number of unique (user, item) pairs:", len(unique_indices))
print("Number of duplicate entries in interaction_matrix:", interaction_matrix.nnz - len(unique_indices))


Shape of interaction_matrix: (54077, 12294)
Shape of y_train: (7012923,)
interaction_matrix.nnz=7012923
Type of interaction_matrix: <class 'scipy.sparse._csr.csr_matrix'>
Type of y_train: <class 'numpy.ndarray'>
Any NaN values in y_train: False
Number of unique (user, item) pairs: 7012923
Number of duplicate entries in interaction_matrix: 0


In [49]:
fm = als.FMRegression(n_iter=10000, rank=24, l2_reg=0.1, verbose=True)
fm.fit(interaction_matrix, y_train)

TypeError: FMRegression.__init__() got an unexpected keyword argument 'verbose'