In [None]:
# --- Installazioni necessarie (per Kaggle) ---
# !pip install -q xgboost catboost lightgbm

# --- Import di base ---
import numpy as np
import pandas as pd
import os
import sys
import json
import warnings
from collections import Counter
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# --- Import Modelli e Utility ---
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier





# --- Impostazioni ---
warnings.filterwarnings('ignore')

In [None]:
# --- Clona il repository ---
GIT_REPO_URL = "https://github.com/Gabriele-mp/FDS-DITTO-DATI.git"
REPO_NAME = GIT_REPO_URL.split('/')[-1].replace('.git', '')

if not os.path.exists(REPO_NAME):
    print(f"Clonazione repository: {GIT_REPO_URL}...")
    !git clone -q {GIT_REPO_URL}
else:
    print(f"Repository {REPO_NAME} giÃ  presente.")

# --- Aggiungi 'src' al path di sistema ---
SRC_PATH = os.path.join(os.getcwd(), REPO_NAME, 'src')
if SRC_PATH not in sys.path:
    sys.path.append(SRC_PATH)
    print(f"Aggiunto '{SRC_PATH}' a sys.path")

print("Repository e path pronti.")

In [None]:
try:
    # --- Import per Modello 1 ---
    print("Importazione funzioni per Modello 1 (Stacking)...")
    from data_processing import load_and_clean_data
    from feature_builder import (
        extract_features_v8,
        extract_features_v21,
        extract_moveset_features,
        extract_features_CRITICAL_MISSING
    )
    from train_utils import build_feature_dataframe

    # --- Import per Modello 2 ---
    print("Importazione funzioni per Modello 2...")
    from feature_builder_Model2 import (
        extract_features_v8 as extract_v8_M2,
        extract_features_v20 as extract_v20_M2,
        build_feature_dataframe as build_df_M2
    )
    
    # --- Import per Modello 3 ---
    print("Importazione funzioni per Modello 3...")
    from config_Model3 import *
    from feature_builder_Model3 import (
        extract_features_v8 as extract_v8_M3,
        extract_features_v19 as extract_v19_M3,
        extract_features_v20 as extract_v20_M3,
        build_feature_dataframe as build_df_M3
    )
    
    print("âœ… Funzioni importate con successo per tutti i modelli!")

except ImportError as e:
    print(f"Errore nell'import: {e}")
    print("Verifica che il path nella Cella 2 sia corretto e che i file .py in 'src/' siano aggiornati.")

In [None]:
# ===================================================================
# CELLA 4 (AGGIORNATA): Configurazione e Caricamento Dati
# ===================================================================

# --- Configurazione Globale ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('../input', COMPETITION_NAME)
SEED = 123
N_SPLITS = 5

# Definisci KFold (usato da entrambi i modelli)
kfold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

# --- Caricamento Dati di Training ---
print("Caricamento e pulizia dati di TRAINING...")
train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
# Usiamo la TUA funzione di pulizia (load_and_clean_data)
df_train_shuffled = load_and_clean_data(train_file_path, seed=SEED, is_train=True)
y_train = df_train_shuffled['player_won'].astype(int) # Target, usato da tutti
print(f"Dati di training pronti: {df_train_shuffled.shape}")

# --- Caricamento Dati di Test ---
print("\nCaricamento dati di TEST...")
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')
df_test_raw = load_and_clean_data(test_file_path, is_train=False)
battle_ids = df_test_raw['battle_id'] # ID per la submission, usati da tutti
print(f"Dati di test pronti: {df_test_raw.shape}")

## ðŸ¤– Submission 1: 3-Model Stacking Ensemble

This submission is generated by a **Stacking Ensemble** designed to maximize accuracy by combining the predictions of 3 heterogeneous base models.

**Architecture:**
* **Level 0 (Feature Sets):**
    * **Set 1 (v8):** A compact set of ~30 features (used by LR).
    * **Set 2 (v21):** A broad set of ~100+ features (used by LGBM).
    * **Set 3 (Mega-Set):** A "supreme" set of 145 features (v21 + Moveset + Critical), reduced to 120 via feature selection (used by the second LGBM).
* **Level 1 (Base Models):**
    1.  `lr_v8`: **Optimized LogisticRegression** (on Set v8).
    2.  `lgbm_v21`: **LightGBM** (on Set v21).
    3.  `lgbm_mega`: **LightGBM** (on the selected Mega-Set).
* **Level 2 (Meta-Model):**
    * The Out-of-Fold (OOF) predictions from the 3 base models become the *meta-features* used to train a final **LogisticRegression**, which acts as a "judge" to weigh and combine the results.

**Expected CV (Cross-Validation) Score:** 0.8528

In [None]:
print("--- Inizio Pipeline di Training ---")

# ===================================================================
# 1. GENERAZIONE FEATURE SET DI TRAINING
# ===================================================================
print("\n[Fase 1/6] Generazione Feature Set di Training...")

# Set 1 (LR)
X_train_v8, y_train = build_feature_dataframe(df_train_shuffled, extract_features_v8, is_test_set=False)
# Set 2 (LGBM)
X_train_v21, _ = build_feature_dataframe(df_train_shuffled, extract_features_v21, is_test_set=False)
# Set 3 (per Mega-Set)
X_train_moveset, _ = build_feature_dataframe(df_train_shuffled, extract_moveset_features, is_test_set=False)
X_train_ultimate, _ = build_feature_dataframe(df_train_shuffled, extract_features_CRITICAL_MISSING, is_test_set=False)

# ===================================================================
# 2. CREAZIONE E PULIZIA "MEGA-SET"
# ===================================================================
print("\n[Fase 2/6] Creazione e Pulizia 'Mega-Set'...")

X_train_v21_safe = X_train_v21.reset_index(drop=True)
X_train_moveset_safe = X_train_moveset.reset_index(drop=True)
X_train_ultimate_safe = X_train_ultimate.reset_index(drop=True)

X_train_MEGASUPERSET = pd.concat([X_train_v21_safe, X_train_moveset_safe, X_train_ultimate_safe], axis=1)
X_train_MEGASUPERSET = X_train_MEGASUPERSET.loc[:,~X_train_MEGASUPERSET.columns.duplicated()]
print(f"Shape Mega-Set (grezzo): {X_train_MEGASUPERSET.shape}")

# Feature Selection
lgbm_selector = LGBMClassifier(n_estimators=500, learning_rate=0.05, max_depth=6, num_leaves=31, random_state=SEED, verbose=-1)
lgbm_selector.fit(X_train_MEGASUPERSET, y_train)
importances = pd.Series(lgbm_selector.feature_importances_, index=X_train_MEGASUPERSET.columns)

top_120_features = importances.nlargest(120).index
X_train_MEGA_SELECTED = X_train_MEGASUPERSET[top_120_features]
print(f"Shape Mega-Set (pulito): {X_train_MEGA_SELECTED.shape}")

# ===================================================================
# 3. OTTIMIZZAZIONE LR
# ===================================================================
print("\n[Fase 3/6] Ottimizzazione Iperparametri (LR)...")

param_grid_lr = {'model__C': [1.0, 5.0, 10.0, 15.0, 20.0, 25.0]}
model_lr_v8_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(penalty='l2', solver='saga', max_iter=5000, random_state=SEED))
])
grid_lr = GridSearchCV(estimator=model_lr_v8_pipeline, param_grid=param_grid_lr, cv=kfold, scoring='accuracy', n_jobs=-1, verbose=0)
grid_lr.fit(X_train_v8, y_train)
lr_v8_OPTIMIZED = grid_lr.best_estimator_

print(f"LR Ottimizzato. Miglior Score: {grid_lr.best_score_:.4f}, Migliori Parametri: {grid_lr.best_params_}")

# ===================================================================
# 4. DEFINIZIONE MODELLI BASE
# ===================================================================
print("\n[Fase 4/6] Definizione Modelli Base L1...")
base_models = {}

# Modello 1 (LR Ottimizzato)
base_models['lr_v8'] = (lr_v8_OPTIMIZED, X_train_v8)

# Modello 2 (LGBM v21)
lgbm_v21 = LGBMClassifier(n_estimators=500, learning_rate=0.05, max_depth=6, num_leaves=31, random_state=SEED, verbose=-1)
base_models['lgbm_v21'] = (lgbm_v21, X_train_v21)

# Modello 3 (LGBM Mega)
lgbm_mega = LGBMClassifier(n_estimators=500, learning_rate=0.05, max_depth=6, num_leaves=31, random_state=SEED, verbose=-1)
base_models['lgbm_mega'] = (lgbm_mega, X_train_MEGA_SELECTED)

print(f"Definiti {len(base_models)} modelli base: {list(base_models.keys())}")

# ===================================================================
# 5. ESECUZIONE STACKING (OOF)
# ===================================================================
print("\n[Fase 5/6] Esecuzione Stacking (OOF)...")

X_meta_df = pd.DataFrame(np.zeros((len(y_train), len(base_models))), columns=base_models.keys())
final_base_models = {} # Modelli addestrati su tutto il training set

for fold, (train_idx, val_idx) in enumerate(tqdm(kfold.split(y_train, y_train), total=N_SPLITS, desc="Folds")):
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]
    
    for name, (model, X_data) in base_models.items():
        X_train_fold = X_data.iloc[train_idx]
        X_val_fold = X_data.iloc[val_idx]
        
        model.fit(X_train_fold, y_train_fold)
        X_meta_df.loc[val_idx, name] = model.predict_proba(X_val_fold)[:, 1]

print("Addestramento modelli base finali su tutti i dati...")
for name, (model, X_data) in tqdm(base_models.items(), desc="Modelli Finali L1"):
    final_base_models[name] = model.fit(X_data, y_train)

print("âœ… Meta-Features (X_meta_df) create.")

# ===================================================================
# 6. ADDESTRAMENTO META-MODELLO L2
# ===================================================================
print("\n[Fase 6/6] Addestramento Meta-Modello L2...")

# Analisi e selezione (usiamo tutti e 3 i modelli come da analisi precedente)
optimal_models = ['lr_v8', 'lgbm_v21', 'lgbm_mega']
X_meta_df_optimal = X_meta_df[optimal_models].copy()

# Addestra Meta-Modello Finale
meta_model = LogisticRegression(random_state=SEED, max_iter=1000)
final_ensemble_model = meta_model.fit(X_meta_df_optimal, y_train)

# Stampa CV score
final_cv_score = cross_val_score(meta_model, X_meta_df_optimal, y_train, cv=kfold, scoring='accuracy', n_jobs=-1).mean()

print(f"âœ… Pipeline di Training completata.")
print(f"ðŸŽ¯ CV Score Finale (stimato): {final_cv_score:.4f}")

In [None]:
print("--- Inizio Pipeline di Predizione (Test Set) ---")

# ===================================================================
# 1. CARICAMENTO DATI DI TEST
# ===================================================================
print("\n[Fase 1/3] Caricamento Dati di Test...")
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')
df_test_raw = load_and_clean_data(test_file_path, is_train=False)
battle_ids = df_test_raw['battle_id']

# ===================================================================
# 2. GENERAZIONE FEATURE SET DI TEST
# ===================================================================
print("\n[Fase 2/3] Generazione Feature Set di Test...")

# Set 1 (LR)
X_test_v8, _ = build_feature_dataframe(df_test_raw, extract_features_v8, is_test_set=True)
X_test_v8 = X_test_v8[X_train_v8.columns] # Allinea colonne

# Set 2 (LGBM)
X_test_v21, _ = build_feature_dataframe(df_test_raw, extract_features_v21, is_test_set=True)
X_test_v21 = X_test_v21[X_train_v21.columns] # Allinea colonne

# Set 3 (per Mega-Set)
X_test_moveset, _ = build_feature_dataframe(df_test_raw, extract_moveset_features, is_test_set=True)
X_test_moveset = X_test_moveset[X_train_moveset.columns] # Allinea colonne
X_test_ultimate, _ = build_feature_dataframe(df_test_raw, extract_features_CRITICAL_MISSING, is_test_set=True)
X_test_ultimate = X_test_ultimate[X_train_ultimate.columns] # Allinea colonne

# Costruisci X_test_MEGA_SELECTED
X_test_v21_safe = X_test_v21.reset_index(drop=True)
X_test_moveset_safe = X_test_moveset.reset_index(drop=True)
X_test_ultimate_safe = X_test_ultimate.reset_index(drop=True)

X_test_MEGASUPERSET = pd.concat([X_test_v21_safe, X_test_moveset_safe, X_test_ultimate_safe], axis=1)
X_test_MEGASUPERSET = X_test_MEGASUPERSET.loc[:,~X_test_MEGASUPERSET.columns.duplicated()]

# Usa le top_120_features definite nella Cella 6
X_test_MEGA_SELECTED = X_test_MEGASUPERSET[top_120_features]
print(f"Shape X_test_MEGA_SELECTED: {X_test_MEGA_SELECTED.shape}")

# ===================================================================
# 3. GENERAZIONE SUBMISSION
# ===================================================================
print("\n[Fase 3/3] Generazione Submission Finale...")

# Mappa per i dati di test
test_data_map = {
    'lr_v8': X_test_v8,
    'lgbm_v21': X_test_v21,
    'lgbm_mega': X_test_MEGA_SELECTED
}

# Crea Meta-Features di Test L2
X_meta_test_df = pd.DataFrame(columns=optimal_models)
for name, model in tqdm(final_base_models.items(), desc="Predizioni Test L1"):
    if name in optimal_models:
        X_test_data = test_data_map.get(name)
        X_meta_test_df[name] = model.predict_proba(X_test_data)[:, 1]

# Predizione Finale L2
final_predictions = final_ensemble_model.predict(X_meta_test_df[optimal_models])

# --- Creazione File Submission ---
submission_df = pd.DataFrame({
    'battle_id': battle_ids,
    'player_won': final_predictions.astype(int)
})

submission_filename = 'submission.csv'
submission_df.to_csv(submission_filename, index=False)

print(f"\nâœ… File '{submission_filename}' creato con successo!")
print(f"ðŸ“Š Predizioni: {len(final_predictions)}")
print(f"ðŸ“ˆ Distribuzione (0 vs 1): {np.bincount(final_predictions)}")
print(f"ðŸŽ¯ CV Score Atteso: {final_cv_score:.4f}")

display(submission_df.head(10))

## ðŸ¤– Submission 2: 2-Model Stacking Ensemble (LR + XGB)

This submission is generated by a 2-model stacking ensemble, based on the `Model2` development pipeline.

**Architecture:**
* **Level 0 (Feature Sets):**
    * **Set 1 (v8_M2):** The compact `v8` feature set (from `feature_builder_Model2.py`) used by the Logistic Regression.
    * **Set 2 (v20_M2):** The larger `v20` feature set (from `feature_builder_Model2.py`) used by the XGBoost model.
* **Level 1 (Base Models):**
    1.  `lr_v8`: **LogisticRegression** (on Set v8_M2).
    2.  `xgb_v20`: **XGBoost** (on Set v20_M2).
* **Level 2 (Meta-Model):**
    * The OOF predictions from these two models are used as meta-features to train a final **LogisticRegression** meta-model.

In [None]:
# ===================================================================
# CELLA 1 : PIPELINE MODELLO 2 
# ===================================================================
print("\n--- Inizio Pipeline Modello 2: Stacking (LR+XGB) ---")

try:
    # --- 1. Generazione Feature (da 02_Feature_Engineering) ---
    print("[M2 - Fase 1/5] Generazione Feature Set (v8, v20)...")
    
    # Usiamo le funzioni _M2 importate con alias
    # Il train set restituisce 2 valori (X, y) - QUESTO Ãˆ CORRETTO
    X_train_v8_M2, _ = build_df_M2(df_train_shuffled, extract_v8_M2, is_test_set=False)
    X_train_v20_M2, _ = build_df_M2(df_train_shuffled, extract_v20_M2, is_test_set=False)
    
    # === CORREZIONE QUI ===
    # Il test set restituisce 1 valore (X) - Rimuoviamo il ", _"
    print("\nGenerazione feature di test per M2...")
    X_test_v8_M2 = build_df_M2(df_test_raw, extract_v8_M2, is_test_set=True)
    X_test_v20_M2 = build_df_M2(df_test_raw, extract_v20_M2, is_test_set=True)
    # === FINE CORREZIONE ===
    
    # Allinea colonne
    X_test_v8_M2 = X_test_v8_M2[X_train_v8_M2.columns] 
    X_test_v20_M2 = X_test_v20_M2[X_train_v20_M2.columns]
    
    print(f"Feature M2 (Train): v8({X_train_v8_M2.shape}), v20({X_train_v20_M2.shape})")
    print(f"Feature M2 (Test): v8({X_test_v8_M2.shape}), v20({X_test_v20_M2.shape})")

    # --- 2. Definizione Modelli Base (L0) (da 02_All_Base_Models_Training) ---
    print("\n[M2 - Fase 2/5] Definizione Modelli Base (L0)...")
    base_models_M2 = {}

    # Modello LR (v8)
    model_lr_v8_M2 = Pipeline([
        ('scaler', StandardScaler()),
        ('model', LogisticRegression(C=10.0, penalty='l2', solver='saga', max_iter=5000, random_state=SEED))
    ])
    base_models_M2['lr_v8'] = (model_lr_v8_M2, X_train_v8_M2)

    # Modello XGB (v20)
    model_xgb_v20_M2 = XGBClassifier(
        colsample_bytree=0.7, learning_rate=0.05, max_depth=3, n_estimators=200,
        reg_lambda=5, subsample=0.7, objective='binary:logistic',
        eval_metric='logloss', use_label_encoder=False, random_state=SEED
    )
    base_models_M2['xgb_v20'] = (model_xgb_v20_M2, X_train_v20_M2)
    
    optimal_models_M2 = ['lr_v8', 'xgb_v20']

    # --- 3. Stacking (OOF) (da 02_All_Base_Models_Training) ---
    print("\n[M2 - Fase 3/5] Esecuzione Stacking (OOF)...")
    
    # y_train Ã¨ giÃ  definito nella Cella 4
    X_meta_train_M2 = pd.DataFrame(np.zeros((len(y_train), len(optimal_models_M2))), columns=optimal_models_M2)
    final_base_models_M2 = {} 

    for fold, (train_idx, val_idx) in enumerate(tqdm(kfold.split(y_train, y_train), total=N_SPLITS, desc="[M2] Folds")):
        y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]
        
        for name, (model, X_data) in base_models_M2.items():
            if name in optimal_models_M2:
                X_train_fold = X_data.iloc[train_idx]
                X_val_fold = X_data.iloc[val_idx]
                model.fit(X_train_fold, y_train_fold)
                X_meta_train_M2.loc[val_idx, name] = model.predict_proba(X_val_fold)[:, 1]

    print("Addestramento modelli base L0 finali (su tutti i dati)...")
    for name, (model, X_data) in tqdm(base_models_M2.items(), desc="[M2] Modelli Finali L0"):
        if name in optimal_models_M2:
            final_base_models_M2[name] = model.fit(X_data, y_train)

    # --- 4. Addestramento Meta-Modello (L1) (da 02_Ensemble_Submission) ---
    print("\n[M2 - Fase 4/5] Addestramento Meta-Modello (L1)...")
    
    final_ensemble_model_M2 = LogisticRegression(random_state=SEED, max_iter=1000)
    final_ensemble_model_M2.fit(X_meta_train_M2, y_train)
    print("Meta-Modello (L1) addestrato.")

    # --- 5. Predizione e Salvataggio (da 02_Ensemble_Submission) ---
    print("\n[M2 - Fase 5/5] Generazione Submission Modello 2...")
    
    X_meta_test_M2 = pd.DataFrame()
    X_meta_test_M2['lr_v8'] = final_base_models_M2['lr_v8'].predict_proba(X_test_v8_M2)[:, 1]
    X_meta_test_M2['xgb_v20'] = final_base_models_M2['xgb_v20'].predict_proba(X_test_v20_M2)[:, 1]

    final_predictions_M2 = final_ensemble_model_M2.predict(X_meta_test_M2)

    submission_df_2 = pd.DataFrame({
        'battle_id': battle_ids,
        'player_won': final_predictions_M2.astype(int)
    })

    submission_filename_2 = 'submission_model_2.csv'
    submission_df_2.to_csv(submission_filename_2, index=False)

    print(f"âœ… File '{submission_filename_2}' creato con successo!")
    display(submission_df_2.head())

except Exception as e:
    print(f"!!! ERRORE DURANTE LA PIPELINE DEL MODELLO 2 !!!")
    print(e)



## ðŸ¤– Submission 3: Multi-Model Ensemble with Advanced Feature Engineering

This submission uses a **3-model ensemble** combining Logistic Regression, XGBoost, and Random Forest, each trained on specialized feature sets.

**Architecture:**
* **Feature Sets:**
    * **v8_M3:** ~200 optimized features for Logistic Regression
    * **v19_M3:** ~250 advanced features for Random Forest
    * **v20_M3:** ~230 tree-optimized features for XGBoost
* **Models:**
    1. Logistic Regression (C=10, L2 regularization, StandardScaler)
    2. XGBoost (300 trees, depth=6, lr=0.05)
    3. Random Forest (200 trees, depth=15)
* **Ensemble:** Weighted averaging (30% LR + 50% XGB + 20% RF)

**Implementation:** Feature extraction via `feature_builder_Model3.py`, hyperparameters in `config_Model3.py`.




In [None]:


# ==============================================================================
# CELLA 3: NUOVA CELLA CODE - SUBMISSION MODEL 3
# ==============================================================================
print("="*60)
print("SUBMISSION 3: MODEL 3 - ADVANCED FEATURE ENGINEERING")
print("="*60)

# Usa il test data giÃ  caricato
df_test = df_test_raw




# Genera features test
X_test_v8_M3 = build_df_M3(df_test, extract_v8_M3, is_test_set=True)
X_test_v19_M3 = build_df_M3(df_test, extract_v19_M3, is_test_set=True)
X_test_v20_M3 = build_df_M3(df_test, extract_v20_M3, is_test_set=True)

X_train_v8_M3, y_train = build_df_M3(df_train_shuffled, extract_v8_M3, is_test_set=False)
X_train_v19_M3 = build_df_M3(df_train_shuffled, extract_v19_M3, is_test_set=False)
X_train_v20_M3 = build_df_M3(df_train_shuffled, extract_v20_M3, is_test_set=False)

#debug:
print(f"X_train_v8_M3 type: {type(X_train_v8_M3)}, shape: {X_train_v8_M3.shape if hasattr(X_train_v8_M3, 'shape') else 'N/A'}")
print(f"X_train_v20_M3 type: {type(X_train_v20_M3)}, shape: {X_train_v20_M3.shape if hasattr(X_train_v20_M3, 'shape') else 'N/A'}")

# SE Ã¨ un DataFrame, converti:
if isinstance(X_train_v20_M3, pd.DataFrame):
    X_train_v20_M3 = X_train_v20_M3.values
if isinstance(X_train_v19_M3, pd.DataFrame):
    X_train_v19_M3 = X_train_v19_M3.values

# Train Logistic Regression
model_lr_m3 = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(C=10.0, penalty='l2', solver='lbfgs', 
                                  max_iter=1000, random_state=SEED))
])
model_lr_m3.fit(X_train_v8_M3, y_train)

# Train XGBoost
model_xgb_m3 = XGBClassifier(
    n_estimators=300, max_depth=6, learning_rate=0.05,
    subsample=0.8, colsample_bytree=0.8, 
    random_state=SEED, eval_metric='logloss'
)
model_xgb_m3.fit(X_train_v20_M3, y_train)

# Train Random Forest
model_rf_m3 = RandomForestClassifier(
    n_estimators=200, max_depth=15, 
    min_samples_split=10, random_state=SEED
)
model_rf_m3.fit(X_train_v19_M3, y_train)

# Predizioni
pred_lr_m3 = model_lr_m3.predict_proba(X_test_v8_M3)[:, 1]
pred_xgb_m3 = model_xgb_m3.predict_proba(X_test_v20_M3)[:, 1]
pred_rf_m3 = model_rf_m3.predict_proba(X_test_v19_M3)[:, 1]

# Ensemble
pred_ensemble_m3 = 0.3 * pred_lr_m3 + 0.5 * pred_xgb_m3 + 0.2 * pred_rf_m3
pred_final_m3 = (pred_ensemble_m3 >= 0.5).astype(int)

# Save
submission_m3 = pd.DataFrame({'id': df_test['id'], 'winner': pred_final_m3})
submission_m3.to_csv('submission_3.csv', index=False)

print(f"âœ… Submission salvata | Distribuzione: {Counter(pred_final_m3)}")
print("="*60)