# Event Recommendation System - Collaborative Filtering Evaluation

In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

sys.path.append(str(Path.cwd().parent))

from models.collaborative import CollaborativeFilteringRecommender
from utils.metrics import evaluate_recommendations
from utils.temporal_split import temporal_split_per_user, print_split_stats
from utils.preprocessing import InteractionMatrix

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load and Split Data

In [None]:
raw_dir = Path("../data/raw")

train_raw = pd.read_csv(raw_dir / "train.csv")
events_raw = pd.read_csv(raw_dir / "events.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")

print(f"Raw train data: {len(train_raw)} interactions")
print(f"Events: {len(events_raw)}")
print(f"Event attendees: {len(event_attendees)}")

In [None]:
train_df, val_df = temporal_split_per_user(train_raw, train_ratio=0.5, min_interactions=3)

print_split_stats(train_df, val_df)

TEMPORAL SPLIT STATISTICS

TRAIN SET:
  Total interactions: 7393
  Unique users: 2034
  Unique events: 4733
  Interested=1: 1337

VALIDATION SET:
  Total interactions: 8005
  Unique users: 2034
  Unique events: 5127
  Interested=1: 2794

OVERLAP:
  Users in both: 2034
  Events in both: 1014


## 2. Preprocess Events Data

This cell processes events (K-means clustering, feature extraction) only if not already cached. Subsequent runs will load from cache.

In [None]:
from utils.preprocessing import EventFeatureExtractor

processed_events_path = Path("../data/processed/events_processed.csv")

if processed_events_path.exists():
    print("Loading cached processed events...")
    events = pd.read_csv(processed_events_path)
else:
    print("Processing events (this will take a few minutes)...")
    extractor = EventFeatureExtractor(n_clusters=30)
    events = extractor.fit_transform(events_raw)
    events.to_csv(processed_events_path, index=False)
    print("Events processed and cached!")

print(f"Processed events shape: {events.shape}")
print(f"Event categories: {events['event_category'].nunique()}")

Loading cached processed events...
Processed events shape: (3137972, 113)
Event categories: 30


## 3. Build Interaction Matrices

Build R (rating) and W (weight) matrices for collaborative filtering.

In [None]:
MATRIX_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "weight_not_interested": 0.5,
    "weight_unseen": 0.1
}

print("Building interaction matrices...")
matrix_builder = InteractionMatrix(**MATRIX_PARAMS)
R, W, user_to_idx, event_to_idx = matrix_builder.build_matrices(train_df, event_attendees)

print(f"R matrix shape: {R.shape}")
print(f"W matrix shape: {W.shape}")
print(f"Users: {len(user_to_idx)}")
print(f"Events: {len(event_to_idx)}")

Building interaction matrices...
R matrix shape: (2034, 4733)
W matrix shape: (2034, 4733)
Users: 2034
Events: 4733


## 4. Hyperparameters

Model uses geographic filtering to reduce search space from 3M events to top-K nearest events based on user's median location.

In [None]:
COLLABORATIVE_PARAMS = {
    "n_factors": 10,
    "regularization": 0.01,
    "iterations": 15,
    "random_state": 42,
    "geo_top_k": 1000
}

K = 200
N_TEST_USERS = 100

## 5. Train Collaborative Filtering Model

In [None]:
print("Training Collaborative Filtering model...")
cf_model = CollaborativeFilteringRecommender(**COLLABORATIVE_PARAMS)
cf_model.fit(R, W, user_to_idx, event_to_idx, train_df, events)
print("Training complete!")

Training Collaborative Filtering model...


  check_blas_config()
100%|██████████| 15/15 [00:00<00:00, 609.58it/s]

Training complete!





## 6. Evaluate on Validation Set

In [None]:
val_with_labels = val_df[(val_df["interested"] == 1) | (val_df["not_interested"] == 1)]
users_with_labels = val_with_labels["user"].unique()

print(f"Users with labels in validation: {len(users_with_labels)}")

if N_TEST_USERS:
    test_users = users_with_labels[:N_TEST_USERS]
else:
    test_users = users_with_labels

print(f"Evaluating on {len(test_users)} users...")

cf_predictions = {}
actuals = {}
not_interested = {}

for user in test_users:
    cf_predictions[user] = cf_model.recommend(user, n=K, exclude_seen=True)
    actuals[user] = val_df[(val_df["user"] == user) & (val_df["interested"] == 1)]["event"].tolist()
    not_interested[user] = val_df[(val_df["user"] == user) & (val_df["not_interested"] == 1)]["event"].tolist()

metrics = evaluate_recommendations(actuals, cf_predictions, not_interested, k=K)

print(f"\n{'='*50}")
print(f"COLLABORATIVE FILTERING RESULTS @ K={K}")
print(f"{'='*50}")
for metric, value in metrics.items():
    print(f"{metric:20s}: {value:.5f}")
print(f"{'='*50}")

Users with labels in validation: 1501
Evaluating on 100 users...

COLLABORATIVE FILTERING RESULTS @ K=200
Recall@K            : 0.22255
Hit_Rate@K          : 0.29000
Contamination@K     : 0.00000


## 7. Hyperparameter Tuning: n_factors

Test different values of n_factors to find the optimal embedding dimensionality.

In [None]:
n_factors_to_test = [1, 5, 10, 20, 50, 100]

results_factors = []

for n_factors in n_factors_to_test:
    print(f"\n{'='*60}")
    print(f"Testing n_factors = {n_factors}")
    print(f"{'='*60}")
    
    params = {
        "n_factors": n_factors,
        "regularization": 0.01,
        "iterations": 15,
        "random_state": 42,
        "geo_top_k": 1000
    }
    
    print(f"Training model with n_factors={n_factors}...")
    model = CollaborativeFilteringRecommender(**params)
    model.fit(R, W, user_to_idx, event_to_idx, train_df, events)
    
    print(f"Evaluating on {len(test_users)} users...")
    predictions = {}
    for user in test_users:
        predictions[user] = model.recommend(user, n=K, exclude_seen=True)
    
    metrics = evaluate_recommendations(actuals, predictions, not_interested, k=K)
    
    results_factors.append({
        "n_factors": n_factors,
        "recall": metrics["Recall@K"],
        "hit_rate": metrics["Hit_Rate@K"],
        "contamination": metrics["Contamination@K"]
    })
    
    print(f"Recall@{K}: {metrics['Recall@K']:.5f}")
    print(f"Hit_Rate@{K}: {metrics['Hit_Rate@K']:.5f}")

print(f"\n{'='*60}")
print("SUMMARY OF RESULTS - n_factors")
print(f"{'='*60}")
print(f"{'n_factors':<15} {'Recall@K':<15} {'Hit_Rate@K':<15}")
print(f"{'-'*45}")
for r in results_factors:
    print(f"{r['n_factors']:<15} {r['recall']:<15.5f} {r['hit_rate']:<15.5f}")


Testing n_factors = 1
Training model with n_factors=1...


100%|██████████| 15/15 [00:00<00:00, 3565.37it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing n_factors = 5
Training model with n_factors=5...


100%|██████████| 15/15 [00:00<00:00, 1293.90it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing n_factors = 10
Training model with n_factors=10...


100%|██████████| 15/15 [00:00<00:00, 675.40it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing n_factors = 20
Training model with n_factors=20...


100%|██████████| 15/15 [00:00<00:00, 396.92it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing n_factors = 50
Training model with n_factors=50...


100%|██████████| 15/15 [00:00<00:00, 95.59it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing n_factors = 100
Training model with n_factors=100...


100%|██████████| 15/15 [00:00<00:00, 57.85it/s]


Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

SUMMARY OF RESULTS - n_factors
n_factors       Recall@K        Hit_Rate@K     
---------------------------------------------
1               0.22255         0.29000        
5               0.22255         0.29000        
10              0.22255         0.29000        
20              0.22255         0.29000        
50              0.22255         0.29000        
100             0.22255         0.29000        


## 8. Hyperparameter Tuning: geo_top_k

Test different values of geo_top_k to understand the impact of geographic filtering on performance.

In [None]:
geo_top_k_to_test = [500, 1000, 2000, 3000, 5000]

results_geo = []

for geo_k in geo_top_k_to_test:
    print(f"\n{'='*60}")
    print(f"Testing geo_top_k = {geo_k}")
    print(f"{'='*60}")
    
    params = {
        "n_factors": 10,
        "regularization": 0.01,
        "iterations": 15,
        "random_state": 42,
        "geo_top_k": geo_k
    }
    
    print(f"Training model with geo_top_k={geo_k}...")
    model = CollaborativeFilteringRecommender(**params)
    model.fit(R, W, user_to_idx, event_to_idx, train_df, events)
    
    print(f"Evaluating on {len(test_users)} users...")
    predictions = {}
    for user in test_users:
        predictions[user] = model.recommend(user, n=K, exclude_seen=True)
    
    metrics = evaluate_recommendations(actuals, predictions, not_interested, k=K)
    
    results_geo.append({
        "geo_top_k": geo_k,
        "recall": metrics["Recall@K"],
        "hit_rate": metrics["Hit_Rate@K"],
        "contamination": metrics["Contamination@K"]
    })
    
    print(f"Recall@{K}: {metrics['Recall@K']:.5f}")
    print(f"Hit_Rate@{K}: {metrics['Hit_Rate@K']:.5f}")

print(f"\n{'='*60}")
print("SUMMARY OF RESULTS - geo_top_k")
print(f"{'='*60}")
print(f"{'geo_top_k':<15} {'Recall@K':<15} {'Hit_Rate@K':<15}")
print(f"{'-'*45}")
for r in results_geo:
    print(f"{r['geo_top_k']:<15} {r['recall']:<15.5f} {r['hit_rate']:<15.5f}")


Testing geo_top_k = 500
Training model with geo_top_k=500...


100%|██████████| 15/15 [00:00<00:00, 586.86it/s]


DEBUG: interaction_matrix shape: (2034, 4733)
DEBUG: item_factors shape: (4733, 10)
DEBUG: user_factors shape: (2034, 10)
DEBUG: len(event_to_idx): 4733
DEBUG: len(user_to_idx): 2034
Evaluating on 100 users...
Recall@200: 0.08445
Hit_Rate@200: 0.13000

Testing geo_top_k = 1000
Training model with geo_top_k=1000...


100%|██████████| 15/15 [00:00<00:00, 578.19it/s]


DEBUG: interaction_matrix shape: (2034, 4733)
DEBUG: item_factors shape: (4733, 10)
DEBUG: user_factors shape: (2034, 10)
DEBUG: len(event_to_idx): 4733
DEBUG: len(user_to_idx): 2034
Evaluating on 100 users...
Recall@200: 0.22255
Hit_Rate@200: 0.29000

Testing geo_top_k = 2000
Training model with geo_top_k=2000...


100%|██████████| 15/15 [00:00<00:00, 649.49it/s]


DEBUG: interaction_matrix shape: (2034, 4733)
DEBUG: item_factors shape: (4733, 10)
DEBUG: user_factors shape: (2034, 10)
DEBUG: len(event_to_idx): 4733
DEBUG: len(user_to_idx): 2034
Evaluating on 100 users...
Recall@200: 0.26231
Hit_Rate@200: 0.33000

Testing geo_top_k = 3000
Training model with geo_top_k=3000...


100%|██████████| 15/15 [00:00<00:00, 599.95it/s]


DEBUG: interaction_matrix shape: (2034, 4733)
DEBUG: item_factors shape: (4733, 10)
DEBUG: user_factors shape: (2034, 10)
DEBUG: len(event_to_idx): 4733
DEBUG: len(user_to_idx): 2034
Evaluating on 100 users...
Recall@200: 0.27231
Hit_Rate@200: 0.34000

Testing geo_top_k = 5000
Training model with geo_top_k=5000...


100%|██████████| 15/15 [00:00<00:00, 596.50it/s]


DEBUG: interaction_matrix shape: (2034, 4733)
DEBUG: item_factors shape: (4733, 10)
DEBUG: user_factors shape: (2034, 10)
DEBUG: len(event_to_idx): 4733
DEBUG: len(user_to_idx): 2034
Evaluating on 100 users...


KeyboardInterrupt: 