# Exploration et pipeline
Ce notebook charge les X/y fournis, montre un aperçu, exécute un petit entrainement local et illustre l'utilisation de MLflow localement.

In [None]:
import pandas as pd, joblib, os
print('list data files:')
print(os.listdir('data'))
X_train = pd.read_csv('data/X_train.csv')
y_train = pd.read_csv('data/y_train.csv')
X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv')
print('X_train shape:', X_train.shape)
display(X_train.head())
print('y_train shape:', y_train.shape)
display(y_train.head())

In [None]:
# quick local train
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
clf = RandomForestClassifier(n_estimators=50, random_state=42)
clf.fit(X_train, y_train.values.ravel())
preds = clf.predict(X_test)
print('accuracy:', accuracy_score(y_test, preds))
import joblib, os
os.makedirs('outputs', exist_ok=True)
joblib.dump(clf, 'outputs/model.pkl')
print('saved model to outputs/model.pkl')

## Lancer MLflow localement

Dans un terminal:
```
pip install mlflow
mlflow ui --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlflow/artifacts -p 5000
```
Ensuite, utiliser le script `src/training/train.py` ou exécuter la cellule suivante pour démarrer un run MLflow.

In [None]:
!python src/training/train.py --x-train data/X_train.csv --y-train data/y_train.csv --x-test data/X_test.csv --y-test data/y_test.csv --output-dir outputs --mlflow-uri http://127.0.0.1:5000