# Event Recommendation System - Social Recommendation Evaluation

In [14]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

sys.path.append(str(Path.cwd().parent))

from models.social import SocialRecommender
from utils.metrics import evaluate_recommendations
from utils.temporal_split import temporal_split_per_user, print_split_stats

## 1. Load and Split Data

In [15]:
raw_dir = Path("../data/raw")

train_raw = pd.read_csv(raw_dir / "train.csv")
events_raw = pd.read_csv(raw_dir / "events.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")
user_friends = pd.read_csv(raw_dir / "user_friends.csv")

print(f"Raw train data: {len(train_raw)} interactions")
print(f"Events: {len(events_raw)}")
print(f"Event attendees: {len(event_attendees)}")
print(f"User friends: {len(user_friends)}")

Raw train data: 15398 interactions
Events: 3137972
Event attendees: 24144
User friends: 38202


In [16]:
train_df, val_df = temporal_split_per_user(train_raw, train_ratio=0.5, min_interactions=3)

print_split_stats(train_df, val_df)

TEMPORAL SPLIT STATISTICS

TRAIN SET:
  Total interactions: 7393
  Unique users: 2034
  Unique events: 4733
  Interested=1: 1337

VALIDATION SET:
  Total interactions: 8005
  Unique users: 2034
  Unique events: 5127
  Interested=1: 2794

OVERLAP:
  Users in both: 2034
  Events in both: 1014


## 2. Hyperparameters

Social recommendation is based on friends' interactions. The model aggregates:
- Events that friends purchased/attended (weighted by `weight_attending`)
- Events that friends marked as interested (weighted by `weight_interested`)

In [17]:
SOCIAL_PARAMS = {
    "weight_attending": 2.0,
    "weight_interested": 1.0
}

K = 200
N_TEST_USERS = 100

## 3. Train Social Recommendation Model

In [18]:
print("Training Social Recommendation model...")
social_model = SocialRecommender(**SOCIAL_PARAMS)
social_model.fit(user_friends, train_df, event_attendees)
print("Training complete!")

Training Social Recommendation model...
Training complete!


## 4. Evaluate on Validation Set

In [19]:
val_with_labels = val_df[(val_df["interested"] == 1) | (val_df["not_interested"] == 1)]
users_with_labels = val_with_labels["user"].unique()

print(f"Users with labels in validation: {len(users_with_labels)}")

if N_TEST_USERS:
    test_users = users_with_labels[:N_TEST_USERS]
else:
    test_users = users_with_labels

print(f"Evaluating on {len(test_users)} users...")

social_predictions = {}
actuals = {}
not_interested = {}

for user in test_users:
    social_predictions[user] = social_model.recommend(user, n=K, exclude_seen=True)
    actuals[user] = val_df[(val_df["user"] == user) & (val_df["interested"] == 1)]["event"].tolist()
    not_interested[user] = val_df[(val_df["user"] == user) & (val_df["not_interested"] == 1)]["event"].tolist()

metrics = evaluate_recommendations(actuals, social_predictions, not_interested, k=K)

print(f"\n{'='*50}")
print(f"SOCIAL RECOMMENDATION RESULTS @ K={K}")
print(f"{'='*50}")
for metric, value in metrics.items():
    print(f"{metric:20s}: {value:.5f}")
print(f"{'='*50}")

Users with labels in validation: 1501
Evaluating on 100 users...

SOCIAL RECOMMENDATION RESULTS @ K=200
Recall@K            : 0.46057
Hit_Rate@K          : 0.58000
Contamination@K     : 0.00055


## 5. Analysis: Friend Graph Statistics

In [20]:
users_with_friends = set(user_friends["user"].unique())
test_users_with_friends = [u for u in test_users if u in users_with_friends]

print(f"Users in test set: {len(test_users)}")
print(f"Users with friends: {len(test_users_with_friends)}")
print(f"Coverage: {len(test_users_with_friends)/len(test_users)*100:.1f}%")

user_friends_copy = user_friends.copy()
user_friends_copy['friend_count'] = user_friends_copy['friends'].str.split().str.len()
print(f"\nFriend count statistics:")
print(f"Mean friends per user: {user_friends_copy['friend_count'].mean():.1f}")
print(f"Median friends per user: {user_friends_copy['friend_count'].median():.1f}")
print(f"Max friends: {user_friends_copy['friend_count'].max()}")
print(f"Min friends: {user_friends_copy['friend_count'].min()}")

Users in test set: 100
Users with friends: 100
Coverage: 100.0%

Friend count statistics:
Mean friends per user: 798.3
Median friends per user: 484.0
Max friends: 4964.0
Min friends: 1.0


## 6. Analysis: Recommendation Coverage

In [21]:
users_without_recs = [u for u in test_users if len(social_predictions.get(u, [])) == 0]
users_with_recs = [u for u in test_users if len(social_predictions.get(u, [])) > 0]

print(f"Users without recommendations: {len(users_without_recs)}")
print(f"Users with recommendations: {len(users_with_recs)}")

if users_without_recs:
    print(f"\nReasons for no recommendations:")
    no_friends = [u for u in users_without_recs if u not in users_with_friends]
    print(f"  - No friends in network: {len(no_friends)}")
    print(f"  - Friends have no activity: {len(users_without_recs) - len(no_friends)}")

if users_with_recs:
    rec_counts = [len(social_predictions[u]) for u in users_with_recs]
    print(f"\nRecommendation count statistics:")
    print(f"Mean: {np.mean(rec_counts):.1f}")
    print(f"Median: {np.median(rec_counts):.1f}")
    print(f"Max: {max(rec_counts)}")
    print(f"Min: {min(rec_counts)}")

if users_with_recs:
    actuals_filtered = {u: actuals[u] for u in users_with_recs}
    predictions_filtered = {u: social_predictions[u] for u in users_with_recs}
    not_interested_filtered = {u: not_interested[u] for u in users_with_recs}
    
    metrics_filtered = evaluate_recommendations(actuals_filtered, predictions_filtered, not_interested_filtered, k=K)
    
    print(f"\n{'='*50}")
    print(f"RESULTS FOR USERS WITH RECOMMENDATIONS @ K={K}")
    print(f"{'='*50}")
    for metric, value in metrics_filtered.items():
        print(f"{metric:20s}: {value:.5f}")
    print(f"{'='*50}")

Users without recommendations: 2
Users with recommendations: 98

Reasons for no recommendations:
  - No friends in network: 0
  - Friends have no activity: 2

Recommendation count statistics:
Mean: 37.0
Median: 28.0
Max: 200
Min: 1

RESULTS FOR USERS WITH RECOMMENDATIONS @ K=200
Recall@K            : 0.46997
Hit_Rate@K          : 0.59184
Contamination@K     : 0.00055
