# Event Recommendation System - Experiments

Notebook para treinar e avaliar os modelos de recomendação.

In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

sys.path.append(str(Path.cwd().parent))

from models.content_based import ContentBasedRecommender
from models.collaborative import CollaborativeFilteringRecommender
from models.social import SocialRecommender
from models.hybrid import HybridRecommender
from utils.metrics import mean_average_precision_at_k

  from tqdm.autonotebook import tqdm, trange


## 0. Data Preprocessing

In [2]:
from utils.preprocessing import DataPreprocessor

preprocessor = DataPreprocessor(
    raw_dir="../data/raw",
    processed_dir="../data/processed"
)

preprocessor.preprocess(
    n_clusters=30,
    weight_purchase=100.0,
    weight_interested=10.0,
    weight_not_interested=1.0,
    weight_unseen=0.1
)

KeyboardInterrupt: 

## 1. Load Data

In [3]:
processed_dir = Path("../data/processed")
raw_dir = Path("../data/raw")

events = pd.read_csv(processed_dir / "events_processed.csv")
train = pd.read_csv(processed_dir / "train.csv")
event_attendees = pd.read_csv(raw_dir / "event_attendees.csv")
user_friends = pd.read_csv(processed_dir / "user_friends.csv")

R = np.load(processed_dir / "R_matrix.npy")
W = np.load(processed_dir / "W_matrix.npy")

user_to_idx = dict(zip(
    pd.read_csv(processed_dir / "user_to_idx.csv")["user"],
    pd.read_csv(processed_dir / "user_to_idx.csv")["idx"]
))

event_to_idx = dict(zip(
    pd.read_csv(processed_dir / "event_to_idx.csv")["event"],
    pd.read_csv(processed_dir / "event_to_idx.csv")["idx"]
))

print(f"Events: {len(events)}")
print(f"Users: {len(user_to_idx)}")
print(f"Interactions: {len(train)}")

Events: 3137972
Users: 2034
Interactions: 15398


## 2. Hyperparameters

In [None]:
CONTENT_BASED_PARAMS = {
    "weight_purchase": 3.0,
    "weight_interested": 1.0,
    "temporal_decay": 0.01
}

COLLABORATIVE_PARAMS = {
    "n_factors": 20,
    "regularization": 0.01,
    "iterations": 15,
    "random_state": 42
}

SOCIAL_PARAMS = {
    "weight_attending": 2.0,
    "weight_interested": 1.0
}

HYBRID_PARAMS = {
    "weight_content": 0.3,
    "weight_collaborative": 0.3,
    "weight_social": 0.4
}

N_TEST_USERS = 100
K = 200

## 3. Content-Based Filtering

In [5]:
cb_model = ContentBasedRecommender(**CONTENT_BASED_PARAMS)
cb_model.fit(events, train, event_attendees)

In [6]:
test_users = train["user"].unique()[:N_TEST_USERS]

cb_predictions = {}
actuals = {}

for user in test_users:
    cb_predictions[user] = cb_model.recommend(user, n=K)
    actuals[user] = train[(train["user"] == user) & (train["interested"] == 1)]["event"].tolist()

cb_map = mean_average_precision_at_k(actuals, cb_predictions, k=K)
print(f"Content-Based MAP@{K}: {cb_map:.5f}")

KeyboardInterrupt: 

## 4. Collaborative Filtering

In [None]:
cf_model = CollaborativeFilteringRecommender(**COLLABORATIVE_PARAMS)
cf_model.fit(R, W, user_to_idx, event_to_idx, train)

In [None]:
cf_predictions = {}

for user in test_users:
    cf_predictions[user] = cf_model.recommend(user, n=K)

cf_map = mean_average_precision_at_k(actuals, cf_predictions, k=K)
print(f"Collaborative Filtering MAP@{K}: {cf_map:.5f}")

## 5. Social Recommendation

In [None]:
social_model = SocialRecommender(**SOCIAL_PARAMS)
social_model.fit(user_friends, train, event_attendees)

In [None]:
social_predictions = {}

for user in test_users:
    social_predictions[user] = social_model.recommend(user, n=K)

social_map = mean_average_precision_at_k(actuals, social_predictions, k=K)
print(f"Social Recommendation MAP@{K}: {social_map:.5f}")

## 6. Hybrid Model

In [None]:
hybrid_model = HybridRecommender(
    content_based_model=cb_model,
    collaborative_model=cf_model,
    social_model=social_model,
    **HYBRID_PARAMS
)

In [None]:
hybrid_predictions = {}

for user in test_users:
    hybrid_predictions[user] = hybrid_model.recommend(user, n=K)

hybrid_map = mean_average_precision_at_k(actuals, hybrid_predictions, k=K)
print(f"Hybrid Model MAP@{K}: {hybrid_map:.5f}")

## 7. Results Summary

In [None]:
results_df = pd.DataFrame({
    "Method": ["Content-Based", "Collaborative Filtering", "Social", "Hybrid"],
    "MAP@200": [cb_map, cf_map, social_map, hybrid_map]
})

results_df = results_df.sort_values("MAP@200", ascending=False).reset_index(drop=True)
print("\n" + "="*50)
print("FINAL RESULTS")
print("="*50)
print(results_df.to_string(index=False))
print("="*50)
print(f"Baseline: 0.51382")
print(f"Competition Winner: 0.72809")