## Importation et entrainement du model

In [1]:
# notebooks/train_model.py
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import os

print("Chargement du dataset Iris...")
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

print(f"Features: {feature_names}")
print(f"Classes: {target_names}")
print(f"Shape des donnees: {X.shape}")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train: {X_train.shape[0]} echantillons")
print(f"Test: {X_test.shape[0]} echantillons")

print("Entrainement du Random Forest...")
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(classification_report(y_test, y_pred, target_names=target_names))

Chargement du dataset Iris...
Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Classes: ['setosa' 'versicolor' 'virginica']
Shape des donnees: (150, 4)
Train: 120 echantillons
Test: 30 echantillons
Entrainement du Random Forest...
Accuracy: 0.9000 (90.00%)
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.82      0.90      0.86        10
   virginica       0.89      0.80      0.84        10

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30



## Sauvegarde du model

In [2]:
os.makedirs('../model', exist_ok=True)
model_path = os.path.join('../model', 'model.pkl')
joblib.dump(model, model_path)
print(f"Modele sauvegarde dans: {os.path.abspath(model_path)}")

# VÃ©rification
if os.path.exists(model_path):
    print(f"Taille du fichier: {os.path.getsize(model_path)} octets")
    loaded_model = joblib.load(model_path)
    test_acc = loaded_model.score(X_test, y_test)
    print(f"Modele recharge - Accuracy verifiee: {test_acc:.2%}")
else:
    print("ERREUR: Le fichier n'a pas ete cree!")

Modele sauvegarde dans: /home/vizz/Projects/mlops/model/model.pkl
Taille du fichier: 167745 octets
Modele recharge - Accuracy verifiee: 90.00%
