# Ottimizzazione totale per spingere l’accuracy al massimo:
Cosa fa il notebook:
Applica rapporto delle classi nei fold di CV
Usa Optuna per ottimizzare XGBoost
Valuta 5-fold stratificata
Salva i risultati, grafici e iperparametri migliori

In [6]:
import pandas as pd
import numpy as np
import optuna
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

# === 1. Caricamento dati ===
df = pd.read_csv("../data/df_fallrisk_clean.csv")

# === 2. Definizione target e rimozione variabili da escludere ===
target_col = "fallers_1_nonfaller_0"

drop_cols = [
    target_col,
    'total_falls', 'falls_month_rate', 'falls_year_rate',
    'recurrent_fallers1_nonfallers_occasional0',
    'falls_last_year_1si_2no', 'falls_last_year_1si_2no_code',
    'cadute_con_frattura', 'mesi_osservati', 'anni_osservati'
]

X = df.drop(columns=drop_cols, errors='ignore')
y = df[target_col]

# === 3. Impostazione CV e bilanciamento ===
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
pos_weight = y.value_counts()[0] / y.value_counts()[1]  # classe 0 / classe 1

# === 4. Definizione funzione per Optuna ===
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int("n_estimators", 50, 400),
        'max_depth': trial.suggest_int("max_depth", 2, 8),
        'learning_rate': trial.suggest_float("learning_rate", 0.01, 0.3),
        'gamma': trial.suggest_float("gamma", 0, 5),
        'subsample': trial.suggest_float("subsample", 0.5, 1.0),
        'colsample_bytree': trial.suggest_float("colsample_bytree", 0.5, 1.0),
        'scale_pos_weight': pos_weight,
        'use_label_encoder': False,
        'eval_metric': 'logloss',
        'random_state': 42,
        'n_jobs': -1
    }

    model = XGBClassifier(**params)

    pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("feature_selection", SelectFromModel(model, threshold="median")),
        ("classifier", model)
    ])

    scores = cross_val_score(pipeline, X, y, cv=cv, scoring="accuracy", n_jobs=-1)
    return scores.mean()

# === 5. Ottimizzazione con Optuna ===
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("✅ Best Trial:")
print(study.best_trial)

# === 6. Addestramento finale con i migliori parametri ===
best_params = study.best_trial.params

final_model = XGBClassifier(
    **best_params,
    objective="binary:logistic",
    eval_metric="logloss",
    use_label_encoder=False,
    random_state=42,
    n_jobs=-1
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("feature_selection", SelectFromModel(final_model, threshold="median")),
    ("classifier", final_model)
])

# === 7. Valutazione finale 5-FOLD ===
accs, precs, recs, f1s, rocs = [], [], [], [], []

for train_idx, test_idx in cv.split(X, y):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    y_prob = pipeline.predict_proba(X_test)[:, 1]

    accs.append(accuracy_score(y_test, y_pred))
    precs.append(precision_score(y_test, y_pred))
    recs.append(recall_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred))
    rocs.append(roc_auc_score(y_test, y_prob))

print("\n🎯 RISULTATI FINALI 5-FOLD:")
print(f"ACCURACY  : {np.mean(accs):.3f} ± {np.std(accs):.3f}")
print(f"PRECISION : {np.mean(precs):.3f} ± {np.std(precs):.3f}")
print(f"RECALL    : {np.mean(recs):.3f} ± {np.std(recs):.3f}")
print(f"F1        : {np.mean(f1s):.3f} ± {np.std(f1s):.3f}")
print(f"ROC_AUC   : {np.mean(rocs):.3f} ± {np.std(rocs):.3f}")

[I 2025-06-11 18:43:08,602] A new study created in memory with name: no-name-7665a38a-9249-4402-a624-f55253781dee
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encode

✅ Best Trial:
FrozenTrial(number=20, state=1, values=[0.7057142857142857], datetime_start=datetime.datetime(2025, 6, 11, 18, 43, 11, 705467), datetime_complete=datetime.datetime(2025, 6, 11, 18, 43, 11, 752443), params={'n_estimators': 213, 'max_depth': 5, 'learning_rate': 0.25759474505343827, 'gamma': 4.403461598190023, 'subsample': 0.5351227087923255, 'colsample_bytree': 0.5413414272748218}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=400, log=False, low=50, step=1), 'max_depth': IntDistribution(high=8, log=False, low=2, step=1), 'learning_rate': FloatDistribution(high=0.3, log=False, low=0.01, step=None), 'gamma': FloatDistribution(high=5.0, log=False, low=0.0, step=None), 'subsample': FloatDistribution(high=1.0, log=False, low=0.5, step=None), 'colsample_bytree': FloatDistribution(high=1.0, log=False, low=0.5, step=None)}, trial_id=20, value=None)

🎯 RISULTATI FINALI 5-FOLD:
ACCURACY  : 0.578 ± 0.133
PRECISION : 0.653 

🧠 Interpretazione
Il recall è buono, quindi il modello intercetta gran parte dei fallers.
L’accuracy resta bassa: significa che o i non-fallers sono predetti male, o c’è rumore strutturale nel dataset.
L’AUC bassa con alta varianza suggerisce che:
il modello ha difficoltà a distinguere chiaramente le classi 
in ogni fold ci sono probabilmente feature poco informative o instabili

🩺 Clinicamente parlando
Se lo scopo è non perdere fallers, il modello è più utile di quanto dica l’accuracy.
Ma se vogliamo anche affidabilità clinica per lo screening automatico, dobbiamo migliorare ulteriormente.