In [4]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix, save_npz
from lightfm import LightFM
import joblib
import os

# === Config ===
RATING_THRESHOLD = 6
EPOCHS = 20
NUM_THREADS = 1
LOSS = 'logistic'

# === Load your data ===
# Assume CSV file format with: userId, movieId, rating
print("[INFO] Loading data...")
ratings_df = pd.read_csv("../data/ratings.csv")
ratings_df['rating'] = ratings_df['rating'] * 2
ratings_df = ratings_df[ratings_df['rating'] > RATING_THRESHOLD]

# === Map raw IDs to indices ===
print("[INFO] Mapping IDs to indices...")
user_ids = ratings_df['userId'].unique()
item_ids = ratings_df['movieId'].unique()

user_id_map = {raw_id: idx for idx, raw_id in enumerate(user_ids)}
item_id_map = {raw_id: idx for idx, raw_id in enumerate(item_ids)}

ratings_df['user_idx'] = ratings_df['userId'].map(user_id_map)
ratings_df['item_idx'] = ratings_df['movieId'].map(item_id_map)

# === Create interaction matrix ===
print("[INFO] Building interaction matrix...")
interactions = coo_matrix(
    (np.ones(len(ratings_df)), (ratings_df['user_idx'], ratings_df['item_idx'])),
    shape=(len(user_ids), len(item_ids))
)

# === Save interaction matrix ===
print("[INFO] Saving interaction matrix...")
save_npz("interactions.npz", interactions)

# === Train LightFM model ===
print(f"[INFO] Training LightFM model (loss={LOSS}, epochs={EPOCHS})...")
model = LightFM(loss=LOSS)
model.fit(interactions, epochs=EPOCHS, num_threads=NUM_THREADS)

# === Save model and mappings ===
print("[INFO] Saving model and ID mappings...")
joblib.dump(model, "lightfm_model.pkl")
joblib.dump(user_id_map, "user_id_map.pkl")
joblib.dump(item_id_map, "item_id_map.pkl")

print("[DONE] Model training and saving complete.")


[INFO] Loading data...
[INFO] Mapping IDs to indices...
[INFO] Building interaction matrix...
[INFO] Saving interaction matrix...
[INFO] Training LightFM model (loss=logistic, epochs=20)...
[INFO] Saving model and ID mappings...
[DONE] Model training and saving complete.


In [5]:
from lightfm.evaluation import precision_at_k, auc_score

# Evaluate on the same interaction matrix (full training set)
print("[INFO] Evaluating model...")

precision = precision_at_k(model, interactions, k=10).mean()
auc = auc_score(model, interactions).mean()

print(f"[RESULT] Precision@10: {precision:.4f}")
print(f"[RESULT] AUC: {auc:.4f}")


[INFO] Evaluating model...
[RESULT] Precision@10: 0.3727
[RESULT] AUC: 0.9133


In [6]:
from lightfm.cross_validation import random_train_test_split

train, test = random_train_test_split(interactions, test_percentage=0.2, random_state=42)

model = LightFM(loss=LOSS)
model.fit(train, epochs=30, num_threads=1)

precision = precision_at_k(model, test, train_interactions=train, k=10).mean()
auc = auc_score(model, test, train_interactions=train).mean()

print(f"[EVAL] Precision@10: {precision:.4f}")
print(f"[EVAL] AUC: {auc:.4f}")


[EVAL] Precision@10: 0.1246
[EVAL] AUC: 0.8848
