# Construction du classifieur


In [1]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import random
import pandas as pd
import cv2
import os
from skimage import io, util
from skimage.transform import resize


# Ah stylé d'avoir trouvé ça !
from sklearn.metrics import (
    accuracy_score,
    recall_score,
    precision_score,
    average_precision_score,
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

from skimage.feature import hog
from skimage.feature import ORB
from skimage.feature import SIFT
from skimage.feature import daisy
from skimage.feature import local_binary_pattern

## Récupération des jeux


## Découper les écocups et les négatifs


On découpe les positifs en fonction de bbox


On va découper des bouts d'images négatives


## Transformation


Supprimer les images vides


In [2]:
# Chargement des données
pos_patch_fs = [f for f in  os.listdir(os.path.join("local_data", "4_normalized_patches", "pos")) if f.endswith(".jpg")]
neg_patch_fs = [f for f in  os.listdir(os.path.join("local_data", "4_normalized_patches", "neg")) if f.endswith(".jpg")]

pos_patchs = []
neg_patchs = []

for f in pos_patch_fs:
    try:
        patch = plt.imread(
            os.path.join("local_data", "4_normalized_patches", "pos", f)
        )
        pos_patchs.append(patch[:,:,0]) # le channel grayscale est dupliqué sur les 3 chanaux RGB (on en isole 1)

    except FileNotFoundError:
        continue

for f in neg_patch_fs:
    try:
        patch = plt.imread(
            os.path.join("local_data", "4_normalized_patches", "neg", f)
        )
        neg_patchs.append(patch[:,:,0]) # le channel grayscale est dupliqué sur les 3 chanaux RGB (on en isole 1)

    except FileNotFoundError:
        continue

In [3]:
print(len(pos_patchs))
print(len(neg_patchs))

print(pos_patchs[0].shape) # on est bien en grayscale

nb_patchs_tot = len(pos_patchs) + len(neg_patchs)

1204
6020
(128, 72)


In [4]:
train_part = 80 / 100
train_size = int(nb_patchs_tot * train_part)

# Mélange des indices, pour que les données soient mélangées "en même temps" que les classes vérités
indices = np.arange(nb_patchs_tot)
np.random.shuffle(indices)

# Fabrication des classes : 1 : positif, 0 : négatif
y = []
for _ in range(len(pos_patchs)):
    y.append(1)

for _ in range(len(neg_patchs)):
    y.append(0)
y_shuffled = np.array(y)[indices]
y_train = y_shuffled[0:train_size]
y_test = y_shuffled[train_size:]

print(len(y_shuffled))
print(len(y_train))
print(len(y_test))

7224
5779
1445


In [5]:
patchs = np.array(pos_patchs + neg_patchs)
print(patchs.shape)

def get_X_train_and_test_for_extractor(extractor):
        
    X = extractor(patchs)
    # print(X.shape)
    
    X_shuffled = X[indices]
    X_train = X_shuffled[0:train_size]
    X_test = X_shuffled[train_size:]

    return X_train, X_test

(7224, 128, 72)


In [6]:
def HOG_extractor(patchs):
    first_features = hog(patchs[0]) # valeurs par défaut
    features = np.zeros(shape=(len(patchs),first_features.shape[0]), dtype=first_features.dtype)
    for i, patch in enumerate(patchs):
        features[i] = hog(patch)

    return features

X_train_HOG, X_test_HOG = get_X_train_and_test_for_extractor(HOG_extractor)

print(X_train_HOG.shape)
print(X_test_HOG.shape)
print(X_train_HOG.dtype)

(5779, 7938)
(1445, 7938)
float64


In [9]:
mandatory_n_keypoints = 25 # le nombre de keypoints détecté par image n'est pas garanti, il peut être moins -> padding de zéros
orb = ORB(n_keypoints=mandatory_n_keypoints) # valeurs par défaut

def ORB_extractor(patchs):
    orb.detect_and_extract(patchs[0])

    features = np.zeros(shape=(len(patchs),mandatory_n_keypoints*orb.descriptors.shape[1]), dtype=orb.descriptors.dtype)
    
    for i, patch in enumerate(patchs):
        try:
            orb.detect_and_extract(patch)
        except RuntimeError:
            pass

        if orb.descriptors.shape[0] == 0:
            print(f"Warning, aucun descripteur trouvé pour i = {i}")
        if orb.descriptors.shape[0] == mandatory_n_keypoints:
            features[i] = orb.descriptors.flatten()
        else:
        # obligatoirement plus bas (déjà cappé par le paramètre n_keypoints)
            pad = np.zeros((1,(mandatory_n_keypoints - orb.descriptors.shape[0])*orb.descriptors.shape[1]),dtype=orb.descriptors.dtype)
            features[i] = np.hstack([orb.descriptors.reshape((1,-1)), pad])

    return features

X_train_ORB, X_test_ORB = get_X_train_and_test_for_extractor(ORB_extractor)

print(X_train_ORB.shape)
print(X_test_ORB.shape)
print(X_train_ORB.dtype)

(5779, 6400)
(1445, 6400)
bool


In [10]:
mandatory_n_keypoints = 50
# le nombre de keypoints détecté par image n'est pas garanti, il peut être moins -> padding de zéros*
# (par contre, il a l'air de détecter plus de keypoints qu'orb ?) (1 key point a 2 fois moins de données, donc je double la valeur finalement)

sift = SIFT() # valeurs par défaut

def SIFT_extractor(patchs):
    sift.detect_and_extract(patchs[0])

    features = np.zeros(shape=(len(patchs),mandatory_n_keypoints*sift.descriptors.shape[1]), dtype=sift.descriptors.dtype)
    
    for i, patch in enumerate(patchs):
        try:
            sift.detect_and_extract(patch)
        except RuntimeError:
            pass

        if sift.descriptors.shape[0] == 0:
            print(f"Warning, aucun descripteur trouvé pour i = {i}")
        if sift.descriptors.shape[0] >= mandatory_n_keypoints:
            features[i] = sift.descriptors[0:mandatory_n_keypoints, :].flatten()
        else:
            pad = np.zeros((1,(mandatory_n_keypoints - sift.descriptors.shape[0])*sift.descriptors.shape[1]),dtype=sift.descriptors.dtype)
            features[i] = np.hstack([sift.descriptors.reshape((1,-1)), pad])

    return features

X_train_SIFT, X_test_SIFT = get_X_train_and_test_for_extractor(SIFT_extractor)

print(X_train_SIFT.shape)
print(X_test_SIFT.shape)
print(X_train_SIFT.dtype)

(5779, 6400)
(1445, 6400)
uint8


In [11]:
# daisy renvoie des vecteurs de description de taille fixe pour des images de tailles fixes

def DAISY_extractor(patchs):
    first_features = daisy(patchs[0]) # valeurs par défaut

    features = np.zeros(shape=(len(patchs),first_features.size), dtype=first_features.dtype)
    
    for i, patch in enumerate(patchs):
        features[i] = daisy(patch).flatten()

    return features

X_train_DAISY, X_test_DAISY = get_X_train_and_test_for_extractor(DAISY_extractor)

print(X_train_DAISY.shape)
print(X_test_DAISY.shape) # 55000 valeurs pour une image !
print(X_train_DAISY.dtype)

(5779, 55000)
(1445, 55000)
float64


In [12]:
# lpb renvoie des vecteurs de description de taille fixe pour des images de tailles fixes

# la fonction n'a pas de paramètres par défaut pour les 2 premiers params, j'ai copié ceux de l'exemple officiel : 
# https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_local_binary_pattern.html#sphx-glr-auto-examples-features-detection-plot-local-binary-pattern-py

# settings for LBP
radius = 3
n_points = 8 * radius

def LPB_extractor(patchs):
    first_features = local_binary_pattern(patchs[0],n_points,radius) # les autres valeurs sont par défaut

    features = np.zeros(shape=(len(patchs),first_features.size), dtype=first_features.dtype)
    
    for i, patch in enumerate(patchs):
        features[i] = local_binary_pattern(patch,n_points,radius).flatten()

    return features

X_train_LPB, X_test_LPB = get_X_train_and_test_for_extractor(LPB_extractor)

print(X_train_LPB.shape)
print(X_test_LPB.shape)
print(X_train_LPB.dtype)

(5779, 9216)
(1445, 9216)
float64


In [13]:
def PIXEL_extractor(patchs):
    # vecteur de description = les pixels de l'image
    features = np.zeros(shape=(len(patchs),patchs[0].size), dtype=patchs.dtype)
    
    for i, patch in enumerate(patchs):
        features[i] = patch.flatten()

    return features

X_train_PIXEL, X_test_PIXEL = get_X_train_and_test_for_extractor(PIXEL_extractor)

print(X_train_PIXEL.shape)
print(X_test_PIXEL.shape)
print(X_train_PIXEL.dtype)

(5779, 9216)
(1445, 9216)
uint8


## Construire les jeux


In [14]:
X_trains = [X_train_HOG, X_train_PIXEL, X_train_ORB, X_train_SIFT, X_train_DAISY, X_train_LPB]
X_tests = [X_test_HOG, X_test_PIXEL, X_test_ORB, X_test_SIFT, X_test_DAISY, X_test_LPB]
X_names = ["HOG", "PIXEL","ORB","SIFT","DAISY","LPB"]

#### Diviser le jeu

TODO : peut-être une validation croisée (déjà implémentée par des bibliothèques, cf. TD6)


## Choix du classifieur


Nous allons choisir le classifieur le plus efficace parmis un certain nombre de classifieurs + combinaison avec les extracteurs de features


In [22]:
import time
def test_model(
    name, model, X_train, y_train, X_test, y_test, X_name, df_resultat_clf
):

    print(f"Testing {name} (x) {X_name} ...")

    start = time.time()
    model.fit(X_train, y_train)
    time_train = time.time() - start

    start = time.time()
    y_pred = model.predict(X_test)
    time_pred = time.time() - start

    accuracy = accuracy_score(y_test, y_pred)

    error = (1 - accuracy) * 100
    # Calculer le rappel et la précision
    rappel = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1_score = 2 * (precision * rappel) / (precision + rappel)
    avg_precision_score = average_precision_score(y_test, y_pred)

    df_resultat_clf = pd.concat(
        [
            df_resultat_clf,
            pd.DataFrame(
                [
                    {
                        "model": name,
                        "features": X_name,
                        "accuracy": accuracy,
                        "error%": error,
                        "rappel": rappel,
                        "precision": precision,
                        "f1_score": f1_score,
                        "average_precision_score": avg_precision_score,
                        "time_train":time_train,
                        "time_pred":time_pred,
                    }
                ]
            ),
        ],
        ignore_index=True,
    )
    return df_resultat_clf

In [23]:
def get_models_dict():

    models_dict = {}
    # KNN
    neighbors_list = [3, 25, 100, 250]
    for neighbors in neighbors_list:
        knn = KNeighborsClassifier(n_neighbors=neighbors)
        models_dict[f"KNN (k={neighbors})"] = knn

    # Decision Tree
    decision_tree = DecisionTreeClassifier()
    models_dict["Decision Tree"] = decision_tree

    # Random Forest
    estimators_list = [3, 25, 100, 250]
    for estimators in estimators_list:
        random_forest = RandomForestClassifier(n_estimators=estimators)
        models_dict[f"Random Forest (n_estimators={estimators})"] = random_forest

    # SVC
    svc = SVC()
    models_dict["SVC"] = svc

    # LinearSVC
    linear_svc = LinearSVC()
    models_dict["Linear SVC"] = linear_svc

    # SVC kernel
    svc_kernel_list = ["poly", "rbf", "sigmoid"]
    for kernel in svc_kernel_list:
        svc_kernel = SVC(kernel=kernel)
        models_dict[f"SVC (kernel={kernel})"] = svc_kernel

    # Logistic Regression
    iter_list = [25, 200, 400]
    for n_iter in iter_list:
        logistic_regression = LogisticRegression(max_iter=n_iter)
        models_dict[f"Logistic Regression (max_iter={n_iter})"] = logistic_regression

    # AdaBoost
    estimators_list = [10, 25, 100]
    for estimators in estimators_list:
        ada_boost = AdaBoostClassifier(n_estimators=estimators)
        models_dict[f"AdaBoost (n_estimators={estimators})"] = ada_boost

    return models_dict

    # Gradient Boosting
    learning_rate_list = [0.01, 0.2, 0.5]
    for learning_rate in learning_rate_list:
        gradient_boosting = GradientBoostingClassifier(learning_rate=learning_rate)
        models_dict[f"Gradient Boosting (learning_rate={learning_rate})"] = (
            gradient_boosting
        )
    return models_dict

In [31]:
df_resultat_clf = pd.DataFrame(
    columns=[
        "model",
        "features",
        "accuracy",
        "error%",
        "rappel",
        "precision",
        "f1_score",
        "average_precision_score",
        "time_train",
        "time_pred"
    ]
)

models_dict = get_models_dict()

for name, model in models_dict.items():
    for X_train, X_test, X_name in zip(X_trains, X_tests, X_names):
        df_resultat_clf = test_model(name, model, X_train, y_train, X_test, y_test, X_name, df_resultat_clf)

df_resultat_clf = df_resultat_clf.sort_values(by="error%")
print(df_resultat_clf)

Testing KNN (k=3) (x) HOG ...


  df_resultat_clf = pd.concat(


Testing KNN (k=3) (x) PIXEL ...
Testing KNN (k=3) (x) ORB ...
Testing KNN (k=3) (x) SIFT ...
Testing KNN (k=3) (x) DAISY ...
Testing KNN (k=3) (x) LPB ...
Testing KNN (k=25) (x) HOG ...
Testing KNN (k=25) (x) PIXEL ...
Testing KNN (k=25) (x) ORB ...
Testing KNN (k=25) (x) SIFT ...
Testing KNN (k=25) (x) DAISY ...
Testing KNN (k=25) (x) LPB ...
Testing KNN (k=100) (x) HOG ...
Testing KNN (k=100) (x) PIXEL ...
Testing KNN (k=100) (x) ORB ...
Testing KNN (k=100) (x) SIFT ...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  f1_score = 2 * (precision * rappel) / (precision + rappel)


Testing KNN (k=100) (x) DAISY ...
Testing KNN (k=100) (x) LPB ...
Testing KNN (k=250) (x) HOG ...
Testing KNN (k=250) (x) PIXEL ...
Testing KNN (k=250) (x) ORB ...
Testing KNN (k=250) (x) SIFT ...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  f1_score = 2 * (precision * rappel) / (precision + rappel)


Testing KNN (k=250) (x) DAISY ...
Testing KNN (k=250) (x) LPB ...
Testing Decision Tree (x) HOG ...
Testing Decision Tree (x) PIXEL ...
Testing Decision Tree (x) ORB ...
Testing Decision Tree (x) SIFT ...
Testing Decision Tree (x) DAISY ...
Testing Decision Tree (x) LPB ...
Testing Random Forest (n_estimators=3) (x) HOG ...
Testing Random Forest (n_estimators=3) (x) PIXEL ...
Testing Random Forest (n_estimators=3) (x) ORB ...
Testing Random Forest (n_estimators=3) (x) SIFT ...
Testing Random Forest (n_estimators=3) (x) DAISY ...
Testing Random Forest (n_estimators=3) (x) LPB ...
Testing Random Forest (n_estimators=25) (x) HOG ...
Testing Random Forest (n_estimators=25) (x) PIXEL ...
Testing Random Forest (n_estimators=25) (x) ORB ...
Testing Random Forest (n_estimators=25) (x) SIFT ...
Testing Random Forest (n_estimators=25) (x) DAISY ...
Testing Random Forest (n_estimators=25) (x) LPB ...
Testing Random Forest (n_estimators=100) (x) HOG ...
Testing Random Forest (n_estimators=100) (x)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  f1_score = 2 * (precision * rappel) / (precision + rappel)


Testing Random Forest (n_estimators=250) (x) DAISY ...
Testing Random Forest (n_estimators=250) (x) LPB ...
Testing SVC (x) HOG ...
Testing SVC (x) PIXEL ...
Testing SVC (x) ORB ...
Testing SVC (x) SIFT ...
Testing SVC (x) DAISY ...
Testing SVC (x) LPB ...
Testing Linear SVC (x) HOG ...




Testing Linear SVC (x) PIXEL ...




Testing Linear SVC (x) ORB ...




Testing Linear SVC (x) SIFT ...
Testing Linear SVC (x) DAISY ...




Testing Linear SVC (x) LPB ...




Testing SVC (kernel=poly) (x) HOG ...
Testing SVC (kernel=poly) (x) PIXEL ...
Testing SVC (kernel=poly) (x) ORB ...
Testing SVC (kernel=poly) (x) SIFT ...
Testing SVC (kernel=poly) (x) DAISY ...
Testing SVC (kernel=poly) (x) LPB ...
Testing SVC (kernel=rbf) (x) HOG ...
Testing SVC (kernel=rbf) (x) PIXEL ...
Testing SVC (kernel=rbf) (x) ORB ...
Testing SVC (kernel=rbf) (x) SIFT ...
Testing SVC (kernel=rbf) (x) DAISY ...
Testing SVC (kernel=rbf) (x) LPB ...
Testing SVC (kernel=sigmoid) (x) HOG ...
Testing SVC (kernel=sigmoid) (x) PIXEL ...
Testing SVC (kernel=sigmoid) (x) ORB ...
Testing SVC (kernel=sigmoid) (x) SIFT ...
Testing SVC (kernel=sigmoid) (x) DAISY ...
Testing SVC (kernel=sigmoid) (x) LPB ...
Testing Logistic Regression (max_iter=25) (x) HOG ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=25) (x) PIXEL ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=25) (x) ORB ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=25) (x) SIFT ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=25) (x) DAISY ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=25) (x) LPB ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=200) (x) HOG ...
Testing Logistic Regression (max_iter=200) (x) PIXEL ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=200) (x) ORB ...
Testing Logistic Regression (max_iter=200) (x) SIFT ...
Testing Logistic Regression (max_iter=200) (x) DAISY ...
Testing Logistic Regression (max_iter=200) (x) LPB ...
Testing Logistic Regression (max_iter=400) (x) HOG ...
Testing Logistic Regression (max_iter=400) (x) PIXEL ...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Testing Logistic Regression (max_iter=400) (x) ORB ...
Testing Logistic Regression (max_iter=400) (x) SIFT ...
Testing Logistic Regression (max_iter=400) (x) DAISY ...
Testing Logistic Regression (max_iter=400) (x) LPB ...
Testing AdaBoost (n_estimators=10) (x) HOG ...




Testing AdaBoost (n_estimators=10) (x) PIXEL ...




Testing AdaBoost (n_estimators=10) (x) ORB ...




Testing AdaBoost (n_estimators=10) (x) SIFT ...




Testing AdaBoost (n_estimators=10) (x) DAISY ...




Testing AdaBoost (n_estimators=10) (x) LPB ...




Testing AdaBoost (n_estimators=25) (x) HOG ...




Testing AdaBoost (n_estimators=25) (x) PIXEL ...




Testing AdaBoost (n_estimators=25) (x) ORB ...




Testing AdaBoost (n_estimators=25) (x) SIFT ...




Testing AdaBoost (n_estimators=25) (x) DAISY ...




Testing AdaBoost (n_estimators=25) (x) LPB ...




Testing AdaBoost (n_estimators=100) (x) HOG ...




Testing AdaBoost (n_estimators=100) (x) PIXEL ...




Testing AdaBoost (n_estimators=100) (x) ORB ...




Testing AdaBoost (n_estimators=100) (x) SIFT ...




Testing AdaBoost (n_estimators=100) (x) DAISY ...




Testing AdaBoost (n_estimators=100) (x) LPB ...




                                 model features  accuracy     error%  \
66                   SVC (kernel=poly)      HOG  0.986851   1.314879   
70                   SVC (kernel=poly)    DAISY  0.981315   1.868512   
96  Logistic Regression (max_iter=400)      HOG  0.980623   1.937716   
90  Logistic Regression (max_iter=200)      HOG  0.980623   1.937716   
72                    SVC (kernel=rbf)      HOG  0.979931   2.006920   
..                                 ...      ...       ...        ...   
62                          Linear SVC      ORB  0.787543  21.245675   
27                       Decision Tree     SIFT  0.773702  22.629758   
26                       Decision Tree      ORB  0.759862  24.013841   
83                SVC (kernel=sigmoid)      LPB  0.745329  25.467128   
79                SVC (kernel=sigmoid)    PIXEL  0.669204  33.079585   

      rappel  precision  f1_score  average_precision_score  time_train  \
66  0.936508   0.987448  0.961303                 0.935825   

In [32]:
df_resultat_clf.head() # attention, il y a des warnings dans l'output, ça change peut-être des valeurs à pas mal interpréter ici

Unnamed: 0,model,features,accuracy,error%,rappel,precision,f1_score,average_precision_score,time_train,time_pred
66,SVC (kernel=poly),HOG,0.986851,1.314879,0.936508,0.987448,0.961303,0.935825,22.418278,5.19568
70,SVC (kernel=poly),DAISY,0.981315,1.868512,0.924603,0.966805,0.945233,0.90706,108.014973,24.313498
96,Logistic Regression (max_iter=400),HOG,0.980623,1.937716,0.920635,0.966667,0.943089,0.903788,0.687378,0.003687
90,Logistic Regression (max_iter=200),HOG,0.980623,1.937716,0.920635,0.966667,0.943089,0.903788,0.700054,0.012753
72,SVC (kernel=rbf),HOG,0.979931,2.00692,0.896825,0.9869,0.939709,0.90307,24.489752,9.351647


In [None]:
## Affichons les 3 meilleurs résultats pour chaque score
# accuracy
top_accuracy = df_resultat_clf.nlargest(3, "accuracy")
print("Top 3 Accuracy:")
print(top_accuracy[["model","features", "accuracy"]])

# rappel
top_recall = df_resultat_clf.nlargest(3, "rappel")
print("\nTop 3 Rappel:")
print(top_recall[["model","features", "rappel"]])

# précision
top_precision = df_resultat_clf.nlargest(3, "precision")
print("\nTop 3 Précision:")
print(top_precision[["model","features", "precision"]])

# f1_score
top_f1_score = df_resultat_clf.nlargest(3, "f1_score")
print("\nTop 3 F1 Score:")
print(top_f1_score[["model","features", "f1_score"]])

# average_precision_score
top_avg_precision = df_resultat_clf.nlargest(3, "average_precision_score")
print("\nTop 3 Average Precision Score:")
print(top_avg_precision[["model","features", "average_precision_score"]])

Top 3 Accuracy:
                                 model  accuracy
66                   SVC (kernel=poly)  0.986851
70                   SVC (kernel=poly)  0.981315
96  Logistic Regression (max_iter=400)  0.980623

Top 3 Rappel:
                                 model    rappel
66                   SVC (kernel=poly)  0.936508
70                   SVC (kernel=poly)  0.924603
96  Logistic Regression (max_iter=400)  0.920635

Top 3 Précision:
                               model  precision
48  Random Forest (n_estimators=250)        1.0
52  Random Forest (n_estimators=250)        1.0
46  Random Forest (n_estimators=100)        1.0

Top 3 F1 Score:
                                 model  f1_score
66                   SVC (kernel=poly)  0.961303
70                   SVC (kernel=poly)  0.945233
96  Logistic Regression (max_iter=400)  0.943089

Top 3 Average Precision Score:
                                 model  average_precision_score
66                   SVC (kernel=poly)                 0.9

In [None]:
# enregistrer le dataframe
df_resultat_clf.to_csv("resultats_classification.csv", index=False)