# Phase 5 — Predictive Models & Recommendations

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier

attendance = pd.read_csv('../data/attendance.csv')
sessions = pd.read_csv('../data/sessions.csv')

# Example label: attended vs not (per booking entry if available)
y = (attendance['status'] == 'attended').astype(int)

# Simple features (replace with engineered ones)
X = attendance[['member_id','session_id']].copy()
# Encode IDs
X = X.apply(lambda c: pd.factorize(c)[0])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8)
model.fit(X_train, y_train)
pred = model.predict_proba(X_test)[:,1]
print("AUC:", roc_auc_score(y_test, pred))


In [None]:

# Recommendation stub: top-N popular sessions by member segment
import pandas as pd
attendance = pd.read_csv('../data/attendance.csv')
popular = attendance[attendance['status']=='attended'].groupby('session_id').size().sort_values(ascending=False).head(5)
popular
