In [1]:
import pandas as pd 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
data = load_iris()

reference_data = pd.DataFrame(data=data.data, columns=data.feature_names)
reference_data['target'] = data.target
reference_data.to_csv("reference_data.csv", index=False)


X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

X = X.rename(columns={
    'sepal length (cm)': 'sepal_length_cm',
    'sepal width (cm)': 'sepal_width_cm', 
    'petal length (cm)': 'petal_length_cm',
    'petal width (cm)': 'petal_width_cm'
})


X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_test, X_reference, y_test, y_reference = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Test Set: {accuracy:.2f}")


Model Accuracy on Test Set: 0.86


In [4]:
import joblib

model_path = 'iris_model.pkl'
joblib.dump(model, model_path)
print(f"Model saved to {model_path}")

Model saved to iris_model.pkl


In [5]:
reference_data = X_reference.copy()
reference_data['target'] = y_reference
reference_data.to_csv('reference_data.csv', index=False)

# Créer un petit échantillon de test pour vérification
sample_data = X_test.head()
sample_predictions = model.predict(sample_data)
print("\nÉchantillon de données de test avec prédictions :")
print(sample_data)
print("\nPrédictions pour l'échantillon :")
print(sample_predictions)


Échantillon de données de test avec prédictions :
     sepal_length_cm  sepal_width_cm  petal_length_cm  petal_width_cm
77               6.7             3.0              5.0             1.7
111              6.4             2.7              5.3             1.9
69               5.6             2.5              3.9             1.1
133              6.3             2.8              5.1             1.5
141              6.9             3.1              5.1             2.3

Prédictions pour l'échantillon :
[2 2 1 1 2]
