## 
Chargement des librairies

In [94]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score  



##
Fonctions pour optimiser

In [102]:
def prepare_data(df, target_column):
    X = df.drop(target_column, axis=1)
    y = df[target_column]
    return train_test_split(X, y, test_size=0.2, random_state=2)

def create_train_save_pipeline_with_grid_search(X_train, y_train, filename):
    pipeline = Pipeline([
        ('scaler', StandardScaler()),S
        ('rf', RandomForestClassifier(random_state=2))
    ])

    param_grid = {
        'rf__n_estimators': [100, 200, 300],
        'rf__max_depth': [None, 10, 20, 30]
    }

    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    # Imprimer les meilleurs paramètres et le meilleur score
    print("Meilleurs paramètres:", grid_search.best_params_)
    print("Meilleur score de cross-validation:", grid_search.best_score_)

    joblib.dump(grid_search, filename)
    return grid_search.best_estimator_

def load_model_and_predict(filename, X_test):
    loaded_grid_search = joblib.load(filename)
    return loaded_grid_search.predict(X_test)

##
 Application sur le jeu de données IRIS

In [103]:
iris = load_iris()
df = pd.DataFrame(data=np.c_[iris.data, iris.target], columns=iris.feature_names + ['species'])
df['species'] = df['species'].astype('int')
target_column = 'species'
X_train, X_test, y_train, y_test = prepare_data(df, target_column)


pipeline = create_train_save_pipeline_with_grid_search(X_train, y_train, 'grid_search.joblib')
predictions = load_model_and_predict('grid_search.joblib', X_test)

Meilleurs paramètres: {'rf__max_depth': None, 'rf__n_estimators': 100}
Meilleur score de cross-validation: 0.9583333333333333


In [104]:
accuracy = accuracy_score(y_test, predictions)
print(f"Précision: {accuracy}")

report = classification_report(y_test, predictions)
print(report)

Précision: 0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      0.88      0.93         8
           2       0.89      1.00      0.94         8

    accuracy                           0.97        30
   macro avg       0.96      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30

