# Event Recommendation System - Temporal Evaluation

In [15]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

sys.path.append(str(Path.cwd().parent))

from models.content_based import ContentBasedRecommender
from models.collaborative import CollaborativeFilteringRecommender
from models.social import SocialRecommender
from models.hybrid import HybridRecommender
from utils.metrics import mean_average_precision_at_k
from utils.temporal_split import temporal_split_per_user, print_split_stats

## 1. Load and Split Data

In [16]:
raw_dir = Path("../data/raw")

train_raw = pd.read_csv(raw_dir / "train.csv")
events_raw = pd.read_csv(raw_dir / "events.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")
user_friends = pd.read_csv(raw_dir / "user_friends.csv")

print(f"Raw train data: {len(train_raw)} interactions")
print(f"Events: {len(events_raw)}")
print(f"Event attendees: {len(event_attendees)}")

Raw train data: 15398 interactions
Events: 3137972
Event attendees: 24144


In [17]:
train_df, val_df = temporal_split_per_user(train_raw, train_ratio=0.7, min_interactions=3)

print_split_stats(train_df, val_df)

TEMPORAL SPLIT STATISTICS

TRAIN SET:
  Total interactions: 9955
  Unique users: 2034
  Unique events: 6136
  Interested=1: 1954

VALIDATION SET:
  Total interactions: 5443
  Unique users: 2034
  Unique events: 3660
  Interested=1: 2177

OVERLAP:
  Users in both: 2034
  Events in both: 950


## 2. Preprocess Events Data

This cell processes events (K-means clustering, feature extraction) only if not already cached. Subsequent runs will load from cache.

In [18]:
from utils.preprocessing import EventFeatureExtractor

processed_events_path = Path("../data/processed/events_processed.csv")

if processed_events_path.exists():
    print("Loading cached processed events...")
    events = pd.read_csv(processed_events_path)
else:
    print("Processing events (this will take a few minutes)...")
    extractor = EventFeatureExtractor(n_clusters=30)
    events = extractor.fit_transform(events_raw)
    events.to_csv(processed_events_path, index=False)
    print("Events processed and cached!")

print(f"Processed events shape: {events.shape}")
print(f"Event categories: {events['event_category'].nunique()}")

Loading cached processed events...
Processed events shape: (3137972, 113)
Event categories: 30


## 3. Hyperparameters

In [22]:
CONTENT_BASED_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "temporal_decay": 0.01
}

K = 200
N_TEST_USERS = 100

## 4. Train Content-Based Model

In [23]:
print("Training Content-Based model...")
cb_model = ContentBasedRecommender(**CONTENT_BASED_PARAMS)
cb_model.fit(events, train_df, event_attendees)
print("Training complete!")

Training Content-Based model...
Training complete!


## 5. Evaluate on Validation Set

In [24]:
test_users = val_df["user"].unique()
if N_TEST_USERS:
    test_users = test_users[:N_TEST_USERS]

print(f"Evaluating on {len(test_users)} users...")

cb_predictions = {}
actuals = {}

for user in test_users:
    cb_predictions[user] = cb_model.recommend(user, n=K, exclude_seen=True)
    actuals[user] = val_df[
        (val_df["user"] == user) & (val_df["interested"] == 1)
    ]["event"].tolist()

cb_map = mean_average_precision_at_k(actuals, cb_predictions, k=K)
print(f"\nContent-Based MAP@{K}: {cb_map:.5f}")

Evaluating on 100 users...

Content-Based MAP@200: 0.00032


## 6. Debug: Check Sample Predictions

In [25]:
sample_users = test_users[:5]

for user in sample_users:
    preds = cb_predictions[user]
    actual = actuals[user]
    overlap = set(preds) & set(actual)
    
    print(f"\nUser {user}:")
    print(f"  Predicted: {len(preds)} events")
    print(f"  Actual interested: {len(actual)} events")
    print(f"  Overlap: {len(overlap)} events")
    if len(actual) > 0:
        print(f"  Recall@{K}: {len(overlap) / len(actual):.3f}")


User 3044012:
  Predicted: 200 events
  Actual interested: 0 events
  Overlap: 0 events

User 4236494:
  Predicted: 200 events
  Actual interested: 2 events
  Overlap: 0 events
  Recall@200: 0.000

User 5574997:
  Predicted: 200 events
  Actual interested: 0 events
  Overlap: 0 events

User 7547671:
  Predicted: 200 events
  Actual interested: 0 events
  Overlap: 0 events

User 10329108:
  Predicted: 200 events
  Actual interested: 1 events
  Overlap: 0 events
  Recall@200: 0.000
