## 5 — Model Evaluation.

In [1]:
import os
import math
import pickle
import numpy as np
import pandas as pd
from collections import defaultdict
from scipy.sparse import coo_matrix
from sklearn.neighbors import NearestNeighbors

# === Paths (relative to notebooks/) ===
DATA_FILTERED_PATH = "../data/processed/df_filtered.pkl"
ARTIFACTS_DIR = "../artifacts"
MODELS_DIR = os.path.join(ARTIFACTS_DIR, "models")
EMBEDDINGS_DIR = os.path.join(ARTIFACTS_DIR, "embeddings")

POPULARITY_PATH = os.path.join(MODELS_DIR, "popularity_model.parquet")
KNN_MODEL_PATH = os.path.join(MODELS_DIR, "knn_model.pkl")
CB_SIM_PATH = os.path.join(EMBEDDINGS_DIR, "item_similarity_sub.npy")
CB_ITEMS_PATH = os.path.join(EMBEDDINGS_DIR, "item_similarity_sub_items.csv")

# === Load Data and Models ===
print("Loading filtered interaction data...")
df_filtered = pd.read_pickle(DATA_FILTERED_PATH)
print(f"Loaded filtered data with shape: {df_filtered.shape}")

print("Loading popularity model...")
popularity_df = pd.read_parquet(POPULARITY_PATH).sort_values("count", ascending=False)
popular_items = popularity_df["itemid"].tolist()

print("Loading content-based similarity subset...")
item_sim_matrix = np.load(CB_SIM_PATH, mmap_mode="r")
subset_items = pd.read_csv(CB_ITEMS_PATH, header=None).iloc[:, 0].tolist()
subset_item_to_idx = {item: idx for idx, item in enumerate(subset_items)}
print(f"Loaded {len(subset_items)} items in content-based similarity subset.")

print("Loading KNN collaborative filtering model...")
with open(KNN_MODEL_PATH, "rb") as f:
    knn_model: NearestNeighbors = pickle.load(f)
print("KNN model loaded.")

# === Train/Test Split (user-wise) ===
def train_test_split_userwise(df, test_ratio=0.2, seed=42):
    rng = np.random.default_rng(seed)
    train_parts, test_parts = [], []
    for user_id, group in df.groupby("visitorid"):
        if len(group) < 2:
            train_parts.append(group)
            continue
        test_size = max(1, int(len(group) * test_ratio))
        test_indices = rng.choice(group.index.values, size=test_size, replace=False)
        test_parts.append(group.loc[test_indices])
        train_parts.append(group.drop(test_indices))
    return pd.concat(train_parts), pd.concat(test_parts)

train_df, test_df = train_test_split_userwise(df_filtered, test_ratio=0.2, seed=42)
print(f"Train shape: {train_df.shape}, Test shape: {test_df.shape}")

# === Prepare Ground Truth for Evaluation ===
test_user_items = defaultdict(set)
for row in test_df.itertuples(index=False):
    test_user_items[row.visitorid].add(row.itemid)

# === Build Sparse User-Item Interaction Matrix for TRAIN ===
def build_sparse_user_item(df):
    users_cat = df["visitorid"].astype("category")
    items_cat = df["itemid"].astype("category")
    u_codes = users_cat.cat.codes
    i_codes = items_cat.cat.codes
    counts = (
        pd.DataFrame({"u": u_codes, "i": i_codes})
        .value_counts()
        .reset_index(name="c")
    )
    n_users = users_cat.cat.categories.size
    n_items = items_cat.cat.categories.size

    mat = coo_matrix(
        (counts["c"].values, (counts["u"].values, counts["i"].values)),
        shape=(n_users, n_items),
    ).tocsr()

    user_idx_to_id = users_cat.cat.categories
    item_idx_to_id = items_cat.cat.categories
    user_id_to_idx = {uid: i for i, uid in enumerate(user_idx_to_id)}
    item_id_to_idx = {iid: i for i, iid in enumerate(item_idx_to_id)}
    return mat, user_idx_to_id, item_idx_to_id, user_id_to_idx, item_id_to_idx

print("Building sparse user-item matrix from TRAIN data...")
user_item_csr, u_idx2id, i_idx2id, u_id2idx, i_id2idx = build_sparse_user_item(train_df)
print(f"Sparse TRAIN matrix shape: users={user_item_csr.shape[0]}, items={user_item_csr.shape[1]}, non-zeros={user_item_csr.nnz}")

# === Check KNN model compatibility ===
expected_features = getattr(knn_model, "n_features_in_", None)
if expected_features is not None and expected_features != user_item_csr.shape[0]:
    print(f"WARNING: KNN model expects {expected_features} features, but user-item matrix has {user_item_csr.shape[0]} users.")
    print("Skipping KNN evaluation due to incompatibility.")
    knn_model = None

# === Recommendation Functions ===
def recommend_popularity(_seed_item, top_n=10):
    # Simply return top popular items regardless of seed item
    return popular_items[:top_n]

def recommend_content_based(seed_item, top_n=10):
    idx = subset_item_to_idx.get(seed_item)
    if idx is None:
        return []
    sims = item_sim_matrix[idx]
    # Get indices of top similar items (excluding seed itself)
    top_indices = np.argpartition(-sims, range(top_n + 1))[:top_n + 1]
    top_indices = [i for i in top_indices if i != idx][:top_n]
    top_indices = sorted(top_indices, key=lambda i: sims[i], reverse=True)[:top_n]
    return [subset_items[i] for i in top_indices]

def recommend_knn(seed_item, top_n=10):
    if knn_model is None:
        return []
    item_idx = i_id2idx.get(seed_item)
    if item_idx is None:
        return []
    # Extract the item vector (column) as dense row vector shape (1, num_users)
    vec = user_item_csr[:, item_idx].toarray().reshape(1, -1)
    # Verify dimensions match knn_model input feature count
    expected_dim = getattr(knn_model, "n_features_in_", None)
    if expected_dim is not None and expected_dim != vec.shape[1]:
        print(f"WARNING: KNN query vector dimension {vec.shape[1]} does not match model expected {expected_dim}.")
        return []
    distances, indices = knn_model.kneighbors(vec, n_neighbors=top_n + 1)
    rec_idxs = [j for j in indices.flatten() if j != item_idx][:top_n]
    # Defensive: clip indices to valid range
    max_idx = len(i_idx2id) - 1
    rec_idxs = [min(max(0, idx), max_idx) for idx in rec_idxs]
    return [i_idx2id[idx] for idx in rec_idxs]

# === Evaluation Metrics ===
def precision_at_k(recommended, relevant, k):
    if k == 0:
        return 0.0
    return len(set(recommended[:k]) & relevant) / k

def recall_at_k(recommended, relevant, k):
    if not relevant:
        return 0.0
    return len(set(recommended[:k]) & relevant) / len(relevant)

def average_precision(recommended, relevant, k):
    if not relevant:
        return 0.0
    ap = 0.0
    hits = 0
    for rank, item in enumerate(recommended[:k], start=1):
        if item in relevant:
            hits += 1
            ap += hits / rank
    return ap / min(len(relevant), k)

def ndcg_at_k(recommended, relevant, k):
    if not relevant:
        return 0.0
    dcg = 0.0
    for rank, item in enumerate(recommended[:k], start=1):
        if item in relevant:
            dcg += 1.0 / math.log2(rank + 1)
    idcg = sum(1.0 / math.log2(i + 1) for i in range(1, min(len(relevant), k) + 1))
    return dcg / idcg if idcg > 0 else 0.0

# === Evaluation Loop ===
def evaluate(recommender_fn, users, k=10, seed=42, max_users=None, require_seed_in_domain=False):
    rng = np.random.default_rng(seed)
    if max_users is not None and len(users) > max_users:
        users = rng.choice(users, size=max_users, replace=False)

    precisions, recalls, maps, ndcgs = [], [], [], []
    train_user_items = train_df.groupby("visitorid")["itemid"].apply(set).to_dict()

    for user in users:
        if user not in test_user_items:
            continue
        relevant = test_user_items[user]
        if not relevant:
            continue
        seed_pool = train_user_items.get(user, set())
        if not seed_pool:
            continue

        # Pick a seed item from user's training history
        if require_seed_in_domain:
            # Restrict seed to items in content-based similarity subset
            candidates = list(seed_pool & set(subset_items))
            if not candidates:
                continue
            seed_item = rng.choice(candidates)
        else:
            seed_item = rng.choice(list(seed_pool))

        recommended = recommender_fn(seed_item, top_n=k)
        if not recommended:
            continue

        precisions.append(precision_at_k(recommended, relevant, k))
        recalls.append(recall_at_k(recommended, relevant, k))
        maps.append(average_precision(recommended, relevant, k))
        ndcgs.append(ndcg_at_k(recommended, relevant, k))

    return {
        "Precision@K": float(np.mean(precisions)) if precisions else 0.0,
        "Recall@K": float(np.mean(recalls)) if recalls else 0.0,
        "MAP": float(np.mean(maps)) if maps else 0.0,
        "NDCG": float(np.mean(ndcgs)) if ndcgs else 0.0,
        "UsersEvaluated": len(precisions),
    }

# === Run Evaluation ===
all_train_users = train_df["visitorid"].unique()
K = 10
MAX_EVAL_USERS = 10000

print(f"\nEvaluating Popularity-based recommender on up to {MAX_EVAL_USERS} users...")
pop_metrics = evaluate(recommend_popularity, all_train_users, k=K, max_users=MAX_EVAL_USERS)

print(f"Evaluating Content-Based recommender on up to {MAX_EVAL_USERS} users...")
cb_metrics = evaluate(recommend_content_based, all_train_users, k=K, max_users=MAX_EVAL_USERS, require_seed_in_domain=True)

print(f"Evaluating KNN Collaborative Filtering recommender on up to {MAX_EVAL_USERS} users...")
knn_metrics = {}
if knn_model is not None:
    knn_metrics = evaluate(recommend_knn, all_train_users, k=K, max_users=MAX_EVAL_USERS)
else:
    print("KNN model unavailable or incompatible, skipping evaluation.")

# === Summarize Results ===
results_df = pd.DataFrame([
    {"Model": "Popularity", **pop_metrics},
    {"Model": "Content-Based", **cb_metrics},
])
if knn_metrics:
    results_df = pd.concat([results_df, pd.DataFrame([{"Model": "KNN Collaborative Filtering", **knn_metrics}])], ignore_index=True)

print("\nEvaluation Results (Top-K = {}):".format(K))
print(results_df)

# === Select Best Model ===
best_model_row = results_df.loc[results_df["MAP"].idxmax()]
print(f"\nBest performing model based on MAP: {best_model_row['Model']}")
print(best_model_row)

# === Save evaluation results to CSV ===
eval_results_path = os.path.join(MODELS_DIR, "model_evaluation_results.csv")
results_df.to_csv(eval_results_path, index=False)
print(f"Saved evaluation results to {eval_results_path}")


Loading filtered interaction data...
Loaded filtered data with shape: (833463, 12)
Loading popularity model...
Loading content-based similarity subset...
Loaded 8967 items in content-based similarity subset.
Loading KNN collaborative filtering model...
KNN model loaded.
Train shape: (681915, 12), Test shape: (151548, 12)
Building sparse user-item matrix from TRAIN data...
Sparse TRAIN matrix shape: users=80112, items=38975, non-zeros=393640

Evaluating Popularity-based recommender on up to 10000 users...
Evaluating Content-Based recommender on up to 10000 users...
Evaluating KNN Collaborative Filtering recommender on up to 10000 users...

Evaluation Results (Top-K = 10):
                         Model  Precision@K  Recall@K       MAP      NDCG  \
0                   Popularity     0.001488  0.008161  0.003680  0.005270   
1                Content-Based     0.000175  0.000755  0.000282  0.000494   
2  KNN Collaborative Filtering     0.021086  0.143621  0.062179  0.086425   

   UsersEva

## Step 6 — Hyperparameter Tuning for KNN Recommender

In [3]:
import os
import pickle
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Paths 
ARTIFACTS_DIR = "../artifacts"
MODELS_DIR = os.path.join(ARTIFACTS_DIR, "models")
os.makedirs(MODELS_DIR, exist_ok=True)
TUNED_KNN_MODEL_PATH = os.path.join(MODELS_DIR, "knn_model_tuned.pkl")

# Prepare user-item dictionaries for evaluation
from collections import defaultdict

train_user_items = train_df.groupby("visitorid")["itemid"].apply(set).to_dict()

test_user_items = defaultdict(set)
for r in test_df.itertuples(index=False):
    test_user_items[r.visitorid].add(r.itemid)

# Existing evaluate function should already be defined here from Step 5

def tune_knn_manual(
    user_item_csr,
    train_user_items,
    test_user_items,
    k=10,
    max_users=5000,
    n_neighbors_options=[5, 10, 20],
    algorithm_options=["auto", "brute"],
    metric_options=["cosine", "euclidean"]
):
    """
    Manual hyperparameter tuning for KNN collaborative filtering recommender.
    Tries combinations of n_neighbors, algorithm, and metric.
    Returns the best trained KNN model, best MAP score, and best params.
    """

    best_map = -1
    best_params = None
    best_model = None

    print(f"Starting KNN hyperparameter tuning with {len(n_neighbors_options) * len(algorithm_options) * len(metric_options)} combinations...")

    for n_neighbors in n_neighbors_options:
        for algorithm in algorithm_options:
            for metric in metric_options:
                print(f"Training KNN with n_neighbors={n_neighbors}, algorithm='{algorithm}', metric='{metric}'...")
                try:
                    knn = NearestNeighbors(
                        n_neighbors=n_neighbors,
                        algorithm=algorithm,
                        metric=metric,
                        n_jobs=-1,
                    )
                    # Fit on user-item CSR matrix transpose because we're looking for similar items
                    knn.fit(user_item_csr.T)

                    # Define a recommend function wrapper for evaluation
                    def knn_recommender(seed_item, top_n=k):
                        item_idx = i_id2idx.get(seed_item, None)
                        if item_idx is None:
                            return []
                        vec = user_item_csr[:, item_idx].T
                        distances, indices = knn.kneighbors(vec, n_neighbors=top_n + 1)
                        rec_idxs = [j for j in indices.flatten() if j != item_idx][:top_n]
                        return list(i_idx2id[rec] for rec in rec_idxs)

                    # Evaluate knn model
                    metrics = evaluate(knn_recommender, list(train_user_items.keys()), k=k, max_users=max_users)

                    current_map = metrics["MAP"]
                    print(f"MAP: {current_map:.4f}")

                    if current_map > best_map:
                        best_map = current_map
                        best_params = {
                            "n_neighbors": n_neighbors,
                            "algorithm": algorithm,
                            "metric": metric,
                        }
                        best_model = knn

                except Exception as e:
                    print(f"Error training with params n_neighbors={n_neighbors}, algorithm={algorithm}, metric={metric}: {e}")
                    continue

    print(f"Best MAP: {best_map:.4f} with params: {best_params}")

    return best_model, best_map, best_params


# Run tuning
best_knn_model, best_map, best_params = tune_knn_manual(
    user_item_csr, train_user_items, test_user_items, k=10, max_users=5000
)

# Save tuned model to disk
with open(TUNED_KNN_MODEL_PATH, "wb") as f:
    pickle.dump(best_knn_model, f)
print(f"Saved tuned KNN model to {TUNED_KNN_MODEL_PATH}")


Starting KNN hyperparameter tuning with 12 combinations...
Training KNN with n_neighbors=5, algorithm='auto', metric='cosine'...
MAP: 0.0733
Training KNN with n_neighbors=5, algorithm='auto', metric='euclidean'...
MAP: 0.0242
Training KNN with n_neighbors=5, algorithm='brute', metric='cosine'...
MAP: 0.0733
Training KNN with n_neighbors=5, algorithm='brute', metric='euclidean'...
MAP: 0.0242
Training KNN with n_neighbors=10, algorithm='auto', metric='cosine'...
MAP: 0.0733
Training KNN with n_neighbors=10, algorithm='auto', metric='euclidean'...
MAP: 0.0242
Training KNN with n_neighbors=10, algorithm='brute', metric='cosine'...
MAP: 0.0733
Training KNN with n_neighbors=10, algorithm='brute', metric='euclidean'...
MAP: 0.0242
Training KNN with n_neighbors=20, algorithm='auto', metric='cosine'...
MAP: 0.0733
Training KNN with n_neighbors=20, algorithm='auto', metric='euclidean'...
MAP: 0.0242
Training KNN with n_neighbors=20, algorithm='brute', metric='cosine'...
MAP: 0.0733
Training KNN

### loading and testing of the tuned KNN model 

Verify it performs well on unseen data

In [4]:
import os
import pickle
import numpy as np
import pandas as pd

# Paths 
ARTIFACTS_DIR = "../artifacts"
MODELS_DIR = os.path.join(ARTIFACTS_DIR, "models")
TUNED_KNN_MODEL_PATH = os.path.join(MODELS_DIR, "knn_model_tuned.pkl")

# Load tuned KNN model
print("Loading tuned KNN model...")
with open(TUNED_KNN_MODEL_PATH, "rb") as f:
    tuned_knn_model = pickle.load(f)
print("Tuned KNN model loaded.")

# Define the recommend_knn function using the tuned model
def recommend_tuned_knn(seed_item, top_n=10):
    item_idx = i_id2idx.get(seed_item, None)
    if item_idx is None:
        return []
    vec = user_item_csr[:, item_idx].T
    distances, indices = tuned_knn_model.kneighbors(vec, n_neighbors=top_n + 1)
    rec_idxs = [j for j in indices.flatten() if j != item_idx][:top_n]
    return list(i_idx2id[rec] for rec in rec_idxs)

# Run evaluation on a larger set 
all_train_users = train_df["visitorid"].unique()
K = 10
MAX_EVAL_USERS = 10000

print(f"\nEvaluating Tuned KNN Collaborative Filtering on up to {MAX_EVAL_USERS} users...")
tuned_knn_metrics = evaluate(recommend_tuned_knn, all_train_users, k=K, max_users=MAX_EVAL_USERS)

print("\nTuned KNN Evaluation Results:")
for metric, value in tuned_knn_metrics.items():
    print(f"{metric}: {value:.6f}")


Loading tuned KNN model...
Tuned KNN model loaded.

Evaluating Tuned KNN Collaborative Filtering on up to 10000 users...

Tuned KNN Evaluation Results:
Precision@K: 0.021749
Recall@K: 0.145209
MAP: 0.071979
NDCG: 0.094960
UsersEvaluated: 9812.000000
