# Event Recommendation System - Hybrid Ensemble Evaluation

In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from collections import defaultdict

sys.path.append(str(Path.cwd().parent))

from models.content_based import ContentBasedRecommender
from models.collaborative import CollaborativeFilteringRecommender
from models.social import SocialRecommender
from utils.metrics import evaluate_recommendations
from utils.temporal_split import temporal_split_per_user, print_split_stats
from utils.preprocessing import InteractionMatrix

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load and Split Data

In [2]:
raw_dir = Path("../data/raw")

train_raw = pd.read_csv(raw_dir / "train.csv")
events_raw = pd.read_csv(raw_dir / "events.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")
user_friends = pd.read_csv(raw_dir / "user_friends.csv")

print(f"Raw train data: {len(train_raw)} interactions")
print(f"Events: {len(events_raw)}")
print(f"Event attendees: {len(event_attendees)}")
print(f"User friends: {len(user_friends)}")

Raw train data: 15398 interactions
Events: 3137972
Event attendees: 24144
User friends: 38202


In [3]:
train_df, val_df = temporal_split_per_user(train_raw, train_ratio=0.5, min_interactions=3)

print_split_stats(train_df, val_df)

TEMPORAL SPLIT STATISTICS

TRAIN SET:
  Total interactions: 7393
  Unique users: 2034
  Unique events: 4733
  Interested=1: 1337

VALIDATION SET:
  Total interactions: 8005
  Unique users: 2034
  Unique events: 5127
  Interested=1: 2794

OVERLAP:
  Users in both: 2034
  Events in both: 1014


## 2. Preprocess Events Data

In [4]:
from utils.preprocessing import EventFeatureExtractor

processed_events_path = Path("../data/processed/events_processed.csv")

if processed_events_path.exists():
    print("Loading cached processed events...")
    events = pd.read_csv(processed_events_path)
else:
    print("Processing events (this will take a few minutes)...")
    extractor = EventFeatureExtractor(n_clusters=30)
    events = extractor.fit_transform(events_raw)
    events.to_csv(processed_events_path, index=False)
    print("Events processed and cached!")

print(f"Processed events shape: {events.shape}")
print(f"Event categories: {events['event_category'].nunique()}")

Loading cached processed events...
Processed events shape: (3137972, 113)
Event categories: 30


## 3. Build Interaction Matrices (for Collaborative)

In [5]:
MATRIX_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "weight_not_interested": 0.5,
    "weight_unseen": 0.1
}

print("Building interaction matrices...")
matrix_builder = InteractionMatrix(**MATRIX_PARAMS)
R, W, user_to_idx, event_to_idx = matrix_builder.build_matrices(train_df, event_attendees)

print(f"R matrix shape: {R.shape}")
print(f"W matrix shape: {W.shape}")

Building interaction matrices...
R matrix shape: (2034, 4733)
W matrix shape: (2034, 4733)


## 4. Best Hyperparameters from Individual Models

Based on previous experiments, we use the best hyperparameters found for each approach.

In [6]:
CONTENT_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "temporal_decay": 0.01,
    "geo_top_k": 1000
}

COLLABORATIVE_PARAMS = {
    "n_factors": 10,
    "regularization": 0.01,
    "iterations": 15,
    "random_state": 42,
    "geo_top_k": 3000
}

SOCIAL_PARAMS = {
    "weight_attending": 2.0,
    "weight_interested": 1.0
}

K = 200
N_TEST_USERS = 100

## 5. Train Individual Models

In [7]:
print("Training Content-Based model...")
cb_model = ContentBasedRecommender(**CONTENT_PARAMS)
cb_model.fit(events, train_df, event_attendees)
print("✓ Content-Based trained")

print("\nTraining Collaborative Filtering model...")
cf_model = CollaborativeFilteringRecommender(**COLLABORATIVE_PARAMS)
cf_model.fit(R, W, user_to_idx, event_to_idx, train_df, events)
print("✓ Collaborative trained")

print("\nTraining Social model...")
social_model = SocialRecommender(**SOCIAL_PARAMS)
social_model.fit(user_friends, train_df, event_attendees)
print("✓ Social trained")

print("\n" + "="*50)
print("All models trained successfully!")
print("="*50)

Training Content-Based model...
✓ Content-Based trained

Training Collaborative Filtering model...


  check_blas_config()
100%|██████████| 15/15 [00:00<00:00, 572.15it/s]


✓ Collaborative trained

Training Social model...
✓ Social trained

All models trained successfully!


## 6. Prepare Test Set

In [8]:
val_with_labels = val_df[(val_df["interested"] == 1) | (val_df["not_interested"] == 1)]
users_with_labels = val_with_labels["user"].unique()

print(f"Users with labels in validation: {len(users_with_labels)}")

if N_TEST_USERS:
    test_users = users_with_labels[:N_TEST_USERS]
else:
    test_users = users_with_labels

print(f"Evaluating on {len(test_users)} users...")

actuals = {}
not_interested = {}

for user in test_users:
    actuals[user] = val_df[(val_df["user"] == user) & (val_df["interested"] == 1)]["event"].tolist()
    not_interested[user] = val_df[(val_df["user"] == user) & (val_df["not_interested"] == 1)]["event"].tolist()

Users with labels in validation: 1501
Evaluating on 100 users...


## 7. Get Scores from Individual Models

Get raw scores (not just top-K) from each model for the test users.

In [9]:
print("Getting scores from individual models...")

cb_scores = {}
cf_scores = {}
social_scores = {}

for user in test_users:
    cb_recs = cb_model.recommend(user, n=K, exclude_seen=True)
    cf_recs = cf_model.recommend(user, n=K, exclude_seen=True)
    social_recs = social_model.recommend(user, n=K, exclude_seen=True)
    
    cb_scores[user] = {event: K - i for i, event in enumerate(cb_recs)}
    cf_scores[user] = {event: K - i for i, event in enumerate(cf_recs)}
    social_scores[user] = {event: K - i for i, event in enumerate(social_recs)}

print("✓ Scores collected from all models")

Getting scores from individual models...
✓ Scores collected from all models


## 8. Hybrid Ensemble with Min-Max Normalization

Combine scores from all three models using weighted ensemble with min-max normalization.

In [10]:
def normalize_scores(scores_dict):
    """Min-max normalization: scales scores to [0, 1]"""
    normalized = {}
    
    for user, event_scores in scores_dict.items():
        if not event_scores:
            normalized[user] = {}
            continue
        
        scores = np.array(list(event_scores.values()))
        min_score = scores.min()
        max_score = scores.max()
        
        if max_score == min_score:
            normalized[user] = {event: 1.0 for event in event_scores}
        else:
            normalized[user] = {
                event: (score - min_score) / (max_score - min_score)
                for event, score in event_scores.items()
            }
    
    return normalized

print("Normalizing scores...")
cb_scores_norm = normalize_scores(cb_scores)
cf_scores_norm = normalize_scores(cf_scores)
social_scores_norm = normalize_scores(social_scores)
print("✓ Scores normalized")

Normalizing scores...
✓ Scores normalized


In [11]:
def hybrid_recommend(user, w_cb, w_cf, w_social, n=200):
    """Combine normalized scores with weights"""
    combined_scores = defaultdict(float)
    
    for event, score in cb_scores_norm[user].items():
        combined_scores[event] += w_cb * score
    
    for event, score in cf_scores_norm[user].items():
        combined_scores[event] += w_cf * score
    
    for event, score in social_scores_norm[user].items():
        combined_scores[event] += w_social * score
    
    sorted_events = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
    return [event for event, _ in sorted_events[:n]]

print("Hybrid recommendation function ready")

Hybrid recommendation function ready


## 9. Evaluate with Equal Weights (Baseline)

In [12]:
print("Testing hybrid with equal weights (1/3, 1/3, 1/3)...")

hybrid_predictions = {}
for user in test_users:
    hybrid_predictions[user] = hybrid_recommend(user, w_cb=1/3, w_cf=1/3, w_social=1/3, n=K)

metrics = evaluate_recommendations(actuals, hybrid_predictions, not_interested, k=K)

print(f"\n{'='*50}")
print(f"HYBRID (EQUAL WEIGHTS) RESULTS @ K={K}")
print(f"{'='*50}")
for metric, value in metrics.items():
    print(f"{metric:20s}: {value:.5f}")
print(f"{'='*50}")

Testing hybrid with equal weights (1/3, 1/3, 1/3)...

HYBRID (EQUAL WEIGHTS) RESULTS @ K=200
Recall@K            : 0.46626
Hit_Rate@K          : 0.61000
Contamination@K     : 0.00010


## 10. Hyperparameter Tuning: Weight Combinations

Test different weight combinations to find the optimal ensemble.

In [13]:
import time

weight_combinations = [
    (1/3, 1/3, 1/3, "Equal"),
    (0.5, 0.25, 0.25, "CB Heavy"),
    (0.25, 0.5, 0.25, "CF Heavy"),
    (0.25, 0.25, 0.5, "Social Heavy"),
    (0.4, 0.3, 0.3, "CB Focus"),
    (0.3, 0.4, 0.3, "CF Focus"),
    (0.3, 0.3, 0.4, "Social Focus"),
    (0.2, 0.3, 0.5, "Social Strong"),
    (0.1, 0.4, 0.5, "CF+Social"),
    (0.6, 0.2, 0.2, "CB Dominant"),
]

results_weights = []

for idx, (w_cb, w_cf, w_social, label) in enumerate(weight_combinations, 1):
    print(f"\n{'='*60}")
    print(f"Testing {label}: CB={w_cb:.2f}, CF={w_cf:.2f}, Social={w_social:.2f} ({idx}/{len(weight_combinations)})")
    print(f"{'='*60}")
    
    start_time = time.time()
    
    predictions = {}
    for user in test_users:
        predictions[user] = hybrid_recommend(user, w_cb=w_cb, w_cf=w_cf, w_social=w_social, n=K)
    
    metrics = evaluate_recommendations(actuals, predictions, not_interested, k=K)
    
    elapsed = time.time() - start_time
    
    results_weights.append({
        "label": label,
        "w_cb": w_cb,
        "w_cf": w_cf,
        "w_social": w_social,
        "recall": metrics["Recall@K"],
        "hit_rate": metrics["Hit_Rate@K"],
        "contamination": metrics["Contamination@K"]
    })
    
    print(f"Recall@{K}: {metrics['Recall@K']:.5f}")
    print(f"Hit_Rate@{K}: {metrics['Hit_Rate@K']:.5f}")
    print(f"Time: {elapsed:.1f}s")

print(f"\n{'='*80}")
print("SUMMARY OF RESULTS - Weight Combinations")
print(f"{'='*80}")
print(f"{'Configuration':<20} {'CB':>6} {'CF':>6} {'Social':>6} {'Recall':>10} {'Hit_Rate':>10}")
print(f"{'-'*70}")
for r in results_weights:
    print(f"{r['label']:<20} {r['w_cb']:>6.2f} {r['w_cf']:>6.2f} {r['w_social']:>6.2f} {r['recall']:>10.5f} {r['hit_rate']:>10.5f}")

best = max(results_weights, key=lambda x: x['recall'])
print(f"\n{'='*80}")
print(f"BEST CONFIGURATION: {best['label']}")
print(f"Weights: CB={best['w_cb']:.2f}, CF={best['w_cf']:.2f}, Social={best['w_social']:.2f}")
print(f"Recall@{K}: {best['recall']:.5f}")
print(f"Hit_Rate@{K}: {best['hit_rate']:.5f}")
print(f"{'='*80}")


Testing Equal: CB=0.33, CF=0.33, Social=0.33 (1/10)
Recall@200: 0.46626
Hit_Rate@200: 0.61000
Time: 0.0s

Testing CB Heavy: CB=0.50, CF=0.25, Social=0.25 (2/10)
Recall@200: 0.45007
Hit_Rate@200: 0.59000
Time: 0.0s

Testing CF Heavy: CB=0.25, CF=0.50, Social=0.25 (3/10)
Recall@200: 0.45983
Hit_Rate@200: 0.60000
Time: 0.0s

Testing Social Heavy: CB=0.25, CF=0.25, Social=0.50 (4/10)
Recall@200: 0.49567
Hit_Rate@200: 0.63000
Time: 0.0s

Testing CB Focus: CB=0.40, CF=0.30, Social=0.30 (5/10)
Recall@200: 0.45483
Hit_Rate@200: 0.59000
Time: 0.0s

Testing CF Focus: CB=0.30, CF=0.40, Social=0.30 (6/10)
Recall@200: 0.46626
Hit_Rate@200: 0.61000
Time: 0.0s

Testing Social Focus: CB=0.30, CF=0.30, Social=0.40 (7/10)
Recall@200: 0.46733
Hit_Rate@200: 0.61000
Time: 0.0s

Testing Social Strong: CB=0.20, CF=0.30, Social=0.50 (8/10)
Recall@200: 0.50710
Hit_Rate@200: 0.64000
Time: 0.0s

Testing CF+Social: CB=0.10, CF=0.40, Social=0.50 (9/10)
Recall@200: 0.52995
Hit_Rate@200: 0.65000
Time: 0.0s

Testing