In [10]:
import os
import pandas as pd
from contextlib import asynccontextmanager
from fastapi import FastAPI
import mlflow
import mlflow.xgboost
import numpy as np
from xgboost import XGBClassifier
from pydantic import BaseModel
from dotenv import load_dotenv
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [11]:
# Charger les données
data = pd.read_csv('/home/onyxia/formation-mlops/DSA-2025_clean_data.csv', sep='\t')

# Afficher les premières lignes du DataFrame
print(data.head())

# Afficher des informations sur le DataFrame
print(data.info())

   readmission       chol        crp       phos
0            1   8.880291  15.001677   6.862829
1            1  10.455361  10.946142   8.598409
2            1   8.240882  10.249403  15.327369
3            1   9.294992  14.690765   8.428716
4            0   9.720638  14.872264   8.485291
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3450 entries, 0 to 3449
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   readmission  3450 non-null   int64  
 1   chol         3450 non-null   float64
 2   crp          3450 non-null   float64
 3   phos         3450 non-null   float64
dtypes: float64(3), int64(1)
memory usage: 107.9 KB
None


In [12]:
# Séparer les caractéristiques et la cible
X = data.drop(columns=['readmission'])
y = data['readmission']

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Vérifier les classes de y_train
print(f"Classes uniques dans y_train: {set(y_train)}")

# Définir une grille d'hyper-paramètres
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1]
}

def train_and_evaluate(params):
    with mlflow.start_run():
        model = XGBClassifier(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(classification_report(y_test, y_pred))  # Affichage du report

        # Enregistrer les paramètres et les métriques
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.xgboost.log_model(model, "model")

for n_estimators in param_grid['n_estimators']:
    for learning_rate in param_grid['learning_rate']:
        params = {'n_estimators': n_estimators, 'learning_rate': learning_rate}
        train_and_evaluate(params)

Classes uniques dans y_train: {0, 1}
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       488
           1       0.97      0.89      0.93       202

    accuracy                           0.96       690
   macro avg       0.96      0.94      0.95       690
weighted avg       0.96      0.96      0.96       690





🏃 View run nebulous-newt-912 at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0/runs/b0f207f41a8041d289c661e01ce5fa31
🧪 View experiment at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       488
           1       0.95      0.93      0.94       202

    accuracy                           0.97       690
   macro avg       0.96      0.96      0.96       690
weighted avg       0.97      0.97      0.97       690





🏃 View run exultant-mule-824 at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0/runs/b931e0db60274708b7b56db65c1dd213
🧪 View experiment at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       488
           1       0.97      0.92      0.94       202

    accuracy                           0.97       690
   macro avg       0.97      0.95      0.96       690
weighted avg       0.97      0.97      0.97       690





🏃 View run amusing-duck-345 at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0/runs/301ec57b7e224a3f88af933d1b8732cb
🧪 View experiment at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       488
           1       0.95      0.94      0.94       202

    accuracy                           0.97       690
   macro avg       0.96      0.96      0.96       690
weighted avg       0.97      0.97      0.97       690





🏃 View run treasured-doe-714 at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0/runs/a4e6f438f3a448acaf785cee4ae4f06f
🧪 View experiment at: https://user-nbacke-mlflow.user.lab.sspcloud.fr/#/experiments/0
