In [1]:
import os, pickle, time
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import implicit
import math

with open("../models/train_valid_test.pkl","rb") as f:
    train_df, valid_df, test_df = pickle.load(f)

train_df['rating'] = train_df['rating'].astype(float)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
users = sorted(train_df['user_id'].unique())
items = sorted(train_df['item_id'].unique())

u_to_idx = {u:i for i,u in enumerate(users)}
i_to_idx = {it:j for j,it in enumerate(items)}

idx_to_u = {v:k for k,v in u_to_idx.items()}
idx_to_i = {v:k for k,v in i_to_idx.items()}

num_users = len(u_to_idx)
num_items = len(i_to_idx)


In [3]:
# For training ALS
rows = train_df.item_id.map(i_to_idx)   # ITEMS as rows
cols = train_df.user_id.map(u_to_idx)   # USERS as columns
data = train_df["rating"].astype("float32").values

# rows = items, cols = users
item_user_csr = csr_matrix(
    (data, (rows, cols)),
    shape=(num_items, num_users)
).astype("float32")

In [4]:
factors = 50
regularization = 0.01
iterations = 20
alpha = 40

model = implicit.als.AlternatingLeastSquares(
    factors=factors,
    regularization=regularization,
    iterations=iterations,
)

# confidence = 1 + alpha * R
confidence = (item_user_csr * alpha).astype("float32")
confidence.data = 1 + confidence.data

print("Training ALS ...")
start = time.time()
model.fit(confidence)
print("Done. Training time:", time.time() - start, "seconds")

  check_blas_config()


Training ALS ...


100%|██████████| 20/20 [00:00<00:00, 51.42it/s]

Done. Training time: 0.4146409034729004 seconds





In [5]:
def als_recommend_for_user(user_id, N=10, filter_seen=True):
    """Return top-N recommended item_ids for a user."""
    if user_id not in u_to_idx:
        return []

    uid = u_to_idx[user_id]

    rec = model.recommend(
        userid=uid,
        user_items=item_user_csr.T,   # MUST BE user x item matrix
        N=N,
        filter_already_liked_items=filter_seen
)


    return [idx_to_i[i_idx] for i_idx, score in rec]

In [6]:
def als_recommend_for_user(user_id, N=10, filter_seen=True):
    if user_id not in u_to_idx:
        return []

    uid = u_to_idx[user_id]

    rec = model.recommend(
        userid=uid,
        user_items=item_user_csr.T,        # FIXED: now correct shape
        N=N,
        filter_already_liked_items=filter_seen
    )

    return [idx_to_i[i_idx] for i_idx, score in rec]


In [7]:
def precision_at_k(recommended, true_items, k=10):
    recommended_set = set(recommended[:k])
    true_set = set(true_items)
    return len(recommended_set & true_set) / k

def recall_at_k(recommended, true_items, k=10):
    true_set = set(true_items)
    if len(true_set) == 0:
        return 0.0
    recommended_set = set(recommended[:k])
    return len(recommended_set & true_set) / len(true_set)

def ndcg_at_k(recommended, true_items, k=10):
    dcg = 0.0
    for i, item in enumerate(recommended[:k]):
        if item in true_items:
            dcg += 1.0 / math.log2(i + 2)
    ideal_hits = min(len(true_items), k)
    if ideal_hits == 0:
        return 0.0
    idcg = sum(1.0 / math.log2(i + 2) for i in range(ideal_hits))
    return dcg / idcg

In [8]:
def evaluate_als_on_df(df, k=10):
    grouped = df.groupby("user_id")["item_id"].apply(list).to_dict()
    precisions, recalls, ndcgs = [], [], []

    for u, true_items in grouped.items():
        preds = als_recommend_for_user(u, N=k, filter_seen=True)
        precisions.append(precision_at_k(preds, true_items, k))
        recalls.append(recall_at_k(preds, true_items, k))
        ndcgs.append(ndcg_at_k(preds, true_items, k))

    return {
        f"precision@{k}": float(np.mean(precisions)),
        f"recall@{k}": float(np.mean(recalls)),
        f"ndcg@{k}": float(np.mean(ndcgs))
    }

In [9]:

# =============================
# SAVE MODEL + MAPS
# =============================
with open("../models/als_model.pkl","wb") as f:
    pickle.dump(model, f)

with open("../models/als_maps.pkl","wb") as f:
    pickle.dump((u_to_idx, i_to_idx, users, items), f)

import pickle


print("Model + maps saved.")

Model + maps saved.


In [10]:

# TRANSPOSE to user x item
user_item_csr = item_user_csr.T  # now shape: num_users x num_items

# Save CSR
with open("../models/user_item_csr.pkl", "wb") as f:
    pickle.dump(user_item_csr, f)