Importation

In [20]:
import numpy as np
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score,f1_score
import matplotlib.pyplot as plt

Préparation des données

In [18]:
# Récupération des données
arbre = pd.read_csv("Data_Arbre.csv")

# Encodage
LE = LabelEncoder()
arbre['fk_arb_etat'] = LE.fit_transform(arbre['fk_arb_etat'])

# Sélection des données
x = arbre[['longitude', 'latitude', 'haut_tot', 'tronc_diam', 'haut_tronc', 'age_estim']].copy()
y = arbre[['fk_arb_etat']].copy()

# Normalisation
SC = StandardScaler()
x_norm = SC.fit_transform(x)

# Division de la base de données
x_train, x_test, y_train, y_test = train_test_split(x_norm, y, train_size=0.8, random_state=42)

# Reshape y_train and y_test
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

Enregistrement des modèles

In [24]:
with open('standard_scaler3.pkl', 'wb') as f:
    pickle.dump(SC, f)

Déterminer les modèles et hyperparamètres

In [16]:
# Définir les modèles et les hyperparamètres
models = {
    'RandomForest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'n_estimators': [10, 50, 100],
            'max_depth': [None, 10, 20, 30]
        }
    },
    'GradientBoostingClassifier': {
        'model': GradientBoostingClassifier(random_state=42),
        'params' : {
            'n_estimators': [50, 100, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
    }
}

Calcul GridSearch

In [21]:
best_models = {}
best_scores = {}
for name, model_info in models.items():
    grid_search = GridSearchCV(model_info['model'], model_info['params'], cv=3, n_jobs=-1, scoring='accuracy')
    grid_search.fit(x_train, y_train)
    best_models[name] = grid_search.best_estimator_
    best_scores[name] = grid_search.best_score_
    print(f"Meilleur paramètre {name}: {grid_search.best_params_}")
    print(f"Meilleur model {name} : {grid_search.best_score_}")


# Sélection des meilleurs modèles
RF_best_model = best_models['RandomForest']
GBC_best_model = best_models['GradientBoostingClassifier']

# Sauvegarder le meilleur modèle
# joblib.dump(best_model, 'best_model.pkl')

# Prédiction sur le jeu de test
RF_y_pred = RF_best_model.predict(x_test)
GBC_y_pred = GBC_best_model.predict(x_test)

# Calcul des métriques
RF_precision = precision_score(y_test, RF_y_pred, average='weighted')
RF_rappel = recall_score(y_test, RF_y_pred, average='weighted')
RF_f1 = f1_score(y_test, RF_y_pred, average='weighted')

print(f"Précision: {RF_precision}")
print(f"Rappel: {RF_rappel}")
print(f"F1 Score: {RF_f1}")

GBC_precision = precision_score(y_test, GBC_y_pred, average='weighted')
GBC_rappel = recall_score(y_test, GBC_y_pred, average='weighted')
GBC_f1 = f1_score(y_test, GBC_y_pred, average='weighted')

print(f"Précision: {GBC_precision}")
print(f"Rappel: {GBC_rappel}")
print(f"F1 Score: {GBC_f1}")

Meilleur paramètre RandomForest: {'max_depth': 30, 'n_estimators': 100}
Meilleur model RandomForest : 0.9254264678248689
Meilleur paramètre GradientBoostingClassifier: {'learning_rate': 0.01, 'max_depth': 7, 'n_estimators': 200}
Meilleur model GradientBoostingClassifier : 0.9212078272604588
Précision: 0.9210374761003376
Rappel: 0.9345479082321188
F1 Score: 0.9220333572744505
Précision: 0.9124720683056177
Rappel: 0.9271255060728745
F1 Score: 0.9117208061512742


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [25]:
print(RF_y_pred)

[1 1 1 ... 1 1 1]


Enregistrement des modèles

In [22]:
with open('RF_best.pkl', 'wb') as f:
    pickle.dump(RF_best_model, f)

with open('GBC_best.pkl', 'wb') as f:
    pickle.dump(GBC_best_model, f)

In [None]:
dict_pickle = {
    'label' : LE,
    'Randdomforest': RF_best_model,
    'GradientBoostingClassifier': GBC_best_model
}

with open('dict_pickle', 'wb') as f:
    pickle.dump(dict_pickle, f)

Préparation du script

In [None]:
def open_pickle(fileName):
    with open(fileName, 'rb') as f:
        return pickle.load(f)

def prediction3(dataFrame):

    # Importation des données
    SC = open_pickle('standard_scaler3.pkl')
    RF = open_pickle('RF_best.pkl')
    GBC = open_pickle('GBC_best.pkl')

    # Lecture des données
    data = pd.read_json(dataFrame)

    # Vérifier qu'il y ait bien les données
    # Vérifier que ce soit encodé

    # Sélection des données
    x = data[['longitude', 'latitude', 'haut_tot', 'tronc_diam', 'haut_tronc', 'age_estim']].copy()

    # Normalisation
    x_norm = SC.fit_transform(x)

    # Prédiction
    RF_pred = RF.predict(x_norm)
    GBR_pred = GBC.predict(x_norm)
    print(RF_pred)

    # Affichage sur carte
    # utilisation inverse transforme