# Event Recommendation System - Temporal Evaluation

In [2]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

sys.path.append(str(Path.cwd().parent))

from models.content_based import ContentBasedRecommender
from utils.metrics import evaluate_recommendations
from utils.temporal_split import temporal_split_per_user, print_split_stats

## 1. Load and Split Data

In [3]:
raw_dir = Path("../data/raw")

train_raw = pd.read_csv(raw_dir / "train.csv")
events_raw = pd.read_csv(raw_dir / "events.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")
user_friends = pd.read_csv(raw_dir / "user_friends.csv")

print(f"Raw train data: {len(train_raw)} interactions")
print(f"Events: {len(events_raw)}")
print(f"Event attendees: {len(event_attendees)}")

Raw train data: 15398 interactions
Events: 3137972
Event attendees: 24144


In [4]:
train_df, val_df = temporal_split_per_user(train_raw, train_ratio=0.5, min_interactions=3)

print_split_stats(train_df, val_df)

TEMPORAL SPLIT STATISTICS

TRAIN SET:
  Total interactions: 7393
  Unique users: 2034
  Unique events: 4733
  Interested=1: 1337

VALIDATION SET:
  Total interactions: 8005
  Unique users: 2034
  Unique events: 5127
  Interested=1: 2794

OVERLAP:
  Users in both: 2034
  Events in both: 1014


## 2. Preprocess Events Data

This cell processes events (K-means clustering, feature extraction) only if not already cached. Subsequent runs will load from cache.

In [5]:
from utils.preprocessing import EventFeatureExtractor

processed_events_path = Path("../data/processed/events_processed.csv")

if processed_events_path.exists():
    print("Loading cached processed events...")
    events = pd.read_csv(processed_events_path)
else:
    print("Processing events (this will take a few minutes)...")
    extractor = EventFeatureExtractor(n_clusters=30)
    events = extractor.fit_transform(events_raw)
    events.to_csv(processed_events_path, index=False)
    print("Events processed and cached!")

print(f"Processed events shape: {events.shape}")
print(f"Event categories: {events['event_category'].nunique()}")

Loading cached processed events...
Processed events shape: (3137972, 113)
Event categories: 30


## 3. Hyperparameters

Model uses geographic filtering to reduce search space from 3M events to top-K nearest events based on user's median location.

In [6]:
CONTENT_BASED_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "temporal_decay": 0.01,
    "geo_top_k": 3000
}

K = 200
N_TEST_USERS = 100

## 4. Train Content-Based Model

In [7]:
print("Training Content-Based model...")
cb_model = ContentBasedRecommender(**CONTENT_BASED_PARAMS)
cb_model.fit(events, train_df, event_attendees)
print("Training complete!")

Training Content-Based model...
Training complete!


## 5. Evaluate on Validation Set

In [8]:
val_with_labels = val_df[(val_df["interested"] == 1) | (val_df["not_interested"] == 1)]
users_with_labels = val_with_labels["user"].unique()

print(f"Users with labels in validation: {len(users_with_labels)}")

if N_TEST_USERS:
    test_users = users_with_labels[:N_TEST_USERS]
else:
    test_users = users_with_labels

print(f"Evaluating on {len(test_users)} users...")

cb_predictions = {}
actuals = {}
not_interested = {}

for user in test_users:
    cb_predictions[user] = cb_model.recommend(user, n=K, exclude_seen=True)
    actuals[user] = val_df[(val_df["user"] == user) & (val_df["interested"] == 1)]["event"].tolist()
    not_interested[user] = val_df[(val_df["user"] == user) & (val_df["not_interested"] == 1)]["event"].tolist()

metrics = evaluate_recommendations(actuals, cb_predictions, not_interested, k=K)

print(f"\n{'='*50}")
print(f"CONTENT-BASED RESULTS @ K={K}")
print(f"{'='*50}")
for metric, value in metrics.items():
    print(f"{metric:20s}: {value:.5f}")
print(f"{'='*50}")

Users with labels in validation: 1501
Evaluating on 100 users...

CONTENT-BASED RESULTS @ K=200
Recall@K            : 0.06343
Hit_Rate@K          : 0.11000
Contamination@K     : 0.00000


## 6. Hyperparameter Tuning: geo_top_k

Test different values of geo_top_k to understand the impact of geographic filtering on performance.

In [12]:
import time

geo_top_k_to_test = [500, 1000, 2000, 3000, 5000]

results_geo = []

for idx, geo_k in enumerate(geo_top_k_to_test, 1):
    print(f"\n{'='*60}")
    print(f"Testing geo_top_k = {geo_k}")
    print(f"{'='*60}")
    
    params = {
        "weight_purchase": 3.0,
        "weight_interested": 1.0,
        "temporal_decay": 0.01,
        "geo_top_k": geo_k
    }
    
    start_time = time.time()
    
    print(f"Training model with geo_top_k={geo_k}...")
    model = ContentBasedRecommender(**params)
    model.fit(events, train_df, event_attendees)
    
    print(f"Evaluating on {len(test_users)} users...")
    predictions = {}
    for user in test_users:
        predictions[user] = model.recommend(user, n=K, exclude_seen=True)
    
    metrics = evaluate_recommendations(actuals, predictions, not_interested, k=K)
    
    elapsed = time.time() - start_time
    
    results_geo.append({
        "geo_top_k": geo_k,
        "recall": metrics["Recall@K"],
        "hit_rate": metrics["Hit_Rate@K"],
        "contamination": metrics["Contamination@K"]
    })
    
    print(f"Recall@{K}: {metrics['Recall@K']:.5f}")
    print(f"Hit_Rate@{K}: {metrics['Hit_Rate@K']:.5f}")
    print(f"Time: {elapsed:.1f}s (~{elapsed/60:.1f} min)")

print(f"\n{'='*60}")
print("SUMMARY OF RESULTS - geo_top_k")
print(f"{'='*60}")
print(f"{'geo_top_k':<15} {'Recall@K':<15} {'Hit_Rate@K':<15}")
print(f"{'-'*45}")
for r in results_geo:
    print(f"{r['geo_top_k']:<15} {r['recall']:<15.5f} {r['hit_rate']:<15.5f}")


Testing geo_top_k = 500
Training model with geo_top_k=500...
Evaluating on 100 users...
Recall@200: 0.04743
Hit_Rate@200: 0.09000
Time: 250.4s (~4.2 min)

Testing geo_top_k = 1000
Training model with geo_top_k=1000...
Evaluating on 100 users...
Recall@200: 0.10376
Hit_Rate@200: 0.16000
Time: 319.2s (~5.3 min)

Testing geo_top_k = 2000
Training model with geo_top_k=2000...
Evaluating on 100 users...
Recall@200: 0.10376
Hit_Rate@200: 0.15000
Time: 327.8s (~5.5 min)

Testing geo_top_k = 3000
Training model with geo_top_k=3000...
Evaluating on 100 users...
Recall@200: 0.06343
Hit_Rate@200: 0.11000
Time: 334.8s (~5.6 min)

Testing geo_top_k = 5000
Training model with geo_top_k=5000...
Evaluating on 100 users...
Recall@200: 0.04876
Hit_Rate@200: 0.07000
Time: 342.0s (~5.7 min)

SUMMARY OF RESULTS - geo_top_k
geo_top_k       Recall@K        Hit_Rate@K     
---------------------------------------------
500             0.04743         0.09000        
1000            0.10376         0.16000    

## 7. Hyperparameter Tuning: temporal_decay

Test different values of temporal_decay to understand how much older interactions should be discounted.

In [13]:
temporal_decay_to_test = [0.0, 0.001, 0.01, 0.05, 0.1]

results_decay = []

for decay in temporal_decay_to_test:
    print(f"\n{'='*60}")
    print(f"Testing temporal_decay = {decay}")
    print(f"{'='*60}")
    
    params = {
        "weight_purchase": 3.0,
        "weight_interested": 1.0,
        "temporal_decay": decay,
        "geo_top_k": 3000
    }
    
    print(f"Training model with temporal_decay={decay}...")
    model = ContentBasedRecommender(**params)
    model.fit(events, train_df, event_attendees)
    
    print(f"Evaluating on {len(test_users)} users...")
    predictions = {}
    for user in test_users:
        predictions[user] = model.recommend(user, n=K, exclude_seen=True)
    
    metrics = evaluate_recommendations(actuals, predictions, not_interested, k=K)
    
    results_decay.append({
        "temporal_decay": decay,
        "recall": metrics["Recall@K"],
        "hit_rate": metrics["Hit_Rate@K"],
        "contamination": metrics["Contamination@K"]
    })
    
    print(f"Recall@{K}: {metrics['Recall@K']:.5f}")
    print(f"Hit_Rate@{K}: {metrics['Hit_Rate@K']:.5f}")

print(f"\n{'='*60}")
print("SUMMARY OF RESULTS - temporal_decay")
print(f"{'='*60}")
print(f"{'temporal_decay':<15} {'Recall@K':<15} {'Hit_Rate@K':<15}")
print(f"{'-'*45}")
for r in results_decay:
    print(f"{r['temporal_decay']:<15.3f} {r['recall']:<15.5f} {r['hit_rate']:<15.5f}")


Testing temporal_decay = 0.0
Training model with temporal_decay=0.0...
Evaluating on 100 users...
Recall@200: 0.06010
Hit_Rate@200: 0.10000

Testing temporal_decay = 0.001
Training model with temporal_decay=0.001...
Evaluating on 100 users...
Recall@200: 0.06343
Hit_Rate@200: 0.11000

Testing temporal_decay = 0.01
Training model with temporal_decay=0.01...
Evaluating on 100 users...
Recall@200: 0.06343
Hit_Rate@200: 0.11000

Testing temporal_decay = 0.05
Training model with temporal_decay=0.05...
Evaluating on 100 users...
Recall@200: 0.06343
Hit_Rate@200: 0.11000

Testing temporal_decay = 0.1
Training model with temporal_decay=0.1...
Evaluating on 100 users...
Recall@200: 0.06343
Hit_Rate@200: 0.11000

SUMMARY OF RESULTS - temporal_decay
temporal_decay  Recall@K        Hit_Rate@K     
---------------------------------------------
0.000           0.06010         0.10000        
0.001           0.06343         0.11000        
0.010           0.06343         0.11000        
0.050       