In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import pickle
import os
from datetime import datetime

def train_obesity_model(data_path: str = "data/ObesityDataSet_raw_and_data_sinthetic.csv"):
    """
    Entra√Æne le mod√®le de classification d'ob√©sit√©
    """
    print("üöÄ D√©but de l'entra√Ænement du mod√®le...")
    
    # Charger les donn√©es
    try:
        df = pd.read_csv(data_path)
        print(f"‚úÖ Donn√©es charg√©es: {df.shape}")
    except FileNotFoundError:
        print(f"‚ùå Fichier non trouv√©: {data_path}")
        return
    
    # Pr√©paration des donn√©es
    print("üîÑ Pr√©paration des donn√©es...")
    
    # Variables cat√©gorielles √† encoder
    categorical_columns = [
        'Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 
        'SMOKE', 'SCC', 'CALC', 'MTRANS'
    ]
    
    # Cr√©er les encodeurs
    label_encoders = {}
    for col in categorical_columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    
    # S√©parer les features et le target
    X = df.drop('NObeyesdad', axis=1)
    y = df['NObeyesdad']
    
    # Encoder le target
    target_encoder = LabelEncoder()
    y_encoded = target_encoder.fit_transform(y)
    label_encoders['target'] = target_encoder
    
    # Normaliser les features num√©riques
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    print("ü§ñ Entra√Ænement du mod√®le...")
    
    # Entra√Æner le mod√®le
    model = RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2
    )
    
    model.fit(X_train, y_train)
    
    # √âvaluation
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"‚úÖ Pr√©cision du mod√®le: {accuracy:.4f}")
    print("\nüìä Rapport de classification:")
    print(classification_report(y_test, y_pred, 
                               target_names=target_encoder.classes_))
    
    # Cr√©er le dossier models s'il n'existe pas
    os.makedirs("models", exist_ok=True)
    
    # Sauvegarder le mod√®le et les pr√©processeurs
    print("üíæ Sauvegarde du mod√®le et des pr√©processeurs...")
    
    with open("models/model.pkl", "wb") as f:
        pickle.dump(model, f)
    
    with open("models/scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)
    
    with open("models/label_encoders.pkl", "wb") as f:
        pickle.dump(label_encoders, f)
    
    # Sauvegarder les m√©tadonn√©es du mod√®le
    metadata = {
        'model_name': 'RandomForestClassifier',
        'features': list(X.columns),
        'classes': list(target_encoder.classes_),
        'accuracy': accuracy,
        'training_date': datetime.now().isoformat(),
        'n_samples': len(df)
    }
    
    with open("models/metadata.pkl", "wb") as f:
        pickle.dump(metadata, f)
    
    print("‚úÖ Mod√®le sauvegard√© avec succ√®s!")
    return model, scaler, label_encoders, metadata

if __name__ == "__main__":
    train_obesity_model()

üöÄ D√©but de l'entra√Ænement du mod√®le...
‚úÖ Donn√©es charg√©es: (2111, 17)
üîÑ Pr√©paration des donn√©es...
ü§ñ Entra√Ænement du mod√®le...
‚úÖ Pr√©cision du mod√®le: 0.9504

üìä Rapport de classification:
                     precision    recall  f1-score   support

Insufficient_Weight       1.00      0.93      0.96        54
      Normal_Weight       0.82      0.97      0.89        58
     Obesity_Type_I       0.96      0.97      0.96        70
    Obesity_Type_II       0.98      0.98      0.98        60
   Obesity_Type_III       1.00      0.98      0.99        65
 Overweight_Level_I       0.94      0.88      0.91        58
Overweight_Level_II       0.96      0.93      0.95        58

           accuracy                           0.95       423
          macro avg       0.95      0.95      0.95       423
       weighted avg       0.95      0.95      0.95       423

üíæ Sauvegarde du mod√®le et des pr√©processeurs...
‚úÖ Mod√®le sauvegard√© avec succ√®s!
