In [1]:
from Classification import classification_class as classification
from Features_extraction.feature_extraction_class import FeatureExtraction
from Data_processing.images_processing_class import ImagesProcessing
from utils import perform_classification

import pandas as pd
import numpy as np
import cv2
import json

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

In [3]:
data_normal_path = "Data/normal/"
data_pothole_path = "Data/potholes/"

env_images = ImagesProcessing(folder_normal=data_normal_path, folder_potholes=data_pothole_path, img_size=(256, 256))

Y = env_images.labels
df_Y = pd.DataFrame(Y)

In [4]:
with open("best_configs_processing.json", "r") as f:
    best_configs_processing = json.load(f)

with open("best_configs_methods.json", "r") as f:
    best_configs_methods = json.load(f)

# I. Unprocessed images

In [5]:
env_feature_extraction_unprocessed_images = FeatureExtraction(env_images)

## 1. Point of interest detection

In [6]:
df_features_SIFT = pd.DataFrame(env_feature_extraction_unprocessed_images.method_SIFT())

metrics_results_best_methods, _ = perform_classification(
    'SIFT', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_SIFT
)

[1mExtracting SIFT Features[0;0m
Performing Classification for SIFT
Best method name for SIFT: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.76      0.79        86
           1       0.64      0.74      0.69        50

    accuracy                           0.75       136
   macro avg       0.74      0.75      0.74       136
weighted avg       0.76      0.75      0.75       136



In [7]:
df_features_ORB = pd.DataFrame(env_feature_extraction_unprocessed_images.method_ORB())

metrics_results_best_methods, _ = perform_classification(
    'ORB', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_ORB
)

[1mExtracting ORB Features[0;0m
Performing Classification for ORB
Best method name for ORB: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.63      0.67        86
           1       0.48      0.58      0.52        50

    accuracy                           0.61       136
   macro avg       0.60      0.60      0.60       136
weighted avg       0.63      0.61      0.62       136



In [8]:
df_features_harris = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Harris())

metrics_results_best_methods, _ = perform_classification(
    'Harris', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_harris
)

[1mExtracting Harris Corner Features[0;0m
Performing Classification for Harris
Best method name for Harris: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.70      0.79        86
           1       0.63      0.88      0.73        50

    accuracy                           0.76       136
   macro avg       0.77      0.79      0.76       136
weighted avg       0.81      0.76      0.77       136



## 2. Edge detection

In [9]:
df_features_edge = env_feature_extraction_unprocessed_images.method_EDGE()

metrics_results_best_methods, _ = perform_classification(
    'EDGE', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_edge
)

[1mExtracting Edge features[0;0m
Performing Classification for EDGE
Best method name for EDGE: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.81      0.86        86
           1       0.73      0.86      0.79        50

    accuracy                           0.83       136
   macro avg       0.82      0.84      0.82       136
weighted avg       0.84      0.83      0.83       136



## 3. Thresholding Method

### a. Otsu's Thresholding

In [10]:
df_features_otsu = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Otsu())

metrics_results_best_methods, _ = perform_classification(
    "Otsu", df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_otsu
)

[1mData Segmentation using Otsu's Thresholding[0;0m
Performing Classification for Otsu
Best method name for Otsu: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.76      0.73        86
           1       0.51      0.44      0.47        50

    accuracy                           0.64       136
   macro avg       0.61      0.60      0.60       136
weighted avg       0.63      0.64      0.63       136



### b. Adaptive's Thresholding

In [11]:
df_features_adaptive = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Adaptive())

metrics_results_best_methods, _ = perform_classification(
    'Adaptive', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_adaptive
)

[1mData Segmentation using Adaptive's Thresholding[0;0m
Performing Classification for Adaptive
Best method name for Adaptive: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.34      0.50        86
           1       0.46      0.96      0.62        50

    accuracy                           0.57       136
   macro avg       0.70      0.65      0.56       136
weighted avg       0.76      0.57      0.54       136



## 4. Texture Feature

### a. Frequency-Based Methods

In [12]:
df_features_gabor = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Gabor())

metrics_results_best_methods, _ = perform_classification(
    'Gabor', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_gabor
)

[1mExtracting Surface Textures Features using Gabor filters[0;0m
Performing Classification for Gabor
Best method name for Gabor: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.62      0.63        86
           1       0.38      0.40      0.39        50

    accuracy                           0.54       136
   macro avg       0.51      0.51      0.51       136
weighted avg       0.54      0.54      0.54       136



### b. Spatial Texture Descriptors

In [13]:
df_features_LBP = pd.DataFrame(env_feature_extraction_unprocessed_images.method_LBP())

metrics_results_best_methods, _ = perform_classification(
    'LBP', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_LBP
)

[1mExtracting Spatial Texture Features using LBP[0;0m
Performing Classification for LBP
Best method name for LBP: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.79      0.83        86
           1       0.69      0.82      0.75        50

    accuracy                           0.80       136
   macro avg       0.79      0.81      0.79       136
weighted avg       0.81      0.80      0.80       136



## 5. Structural Features

- ### Histogram of Oriented Gradients (HOG)

In [14]:
df_features_HOG = pd.DataFrame(env_feature_extraction_unprocessed_images.method_HOG())

metrics_results_best_methods, _ = perform_classification(
    'HOG', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_HOG
)

[1mExtracting Structural Features using HOG[0;0m
Performing Classification for HOG
Best method name for HOG: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.80      0.86        86
           1       0.73      0.90      0.80        50

    accuracy                           0.84       136
   macro avg       0.83      0.85      0.83       136
weighted avg       0.86      0.84      0.84       136



# II. Processed images: using best configurations for processing and feature detection methods

In [15]:
dict_features = {}
dict_num_features = {}

for method_name, best_config in best_configs_processing.items():

    if method_name in best_configs_methods:
        best_params_method = best_configs_methods[method_name]
    else:
        best_params_method = None

    print("=====================================================================")
    print(f"\033[1mComputing results for {method_name}\033[0;0m")
    print(f"Applying best preprocessing :\n {best_config} \n")
    print(f"Applying best method parameters:\n {best_params_method} \n")

    env_images.apply_preprocessing(best_config)

    env_feature_extraction = FeatureExtraction(env_images)
    feature_extraction_method = getattr(env_feature_extraction, f"method_{method_name}")
    
    method_name_preprocessed = f"{method_name} + {best_config['filter']}"

    metrics_results_best_methods, df_features = perform_classification(
        method_name_preprocessed, df_Y, name_best_models, metrics_results_best_methods, best_params_method, feature_extraction_method=feature_extraction_method
    )

    # To be able to reuse features later on
    dict_features[method_name] = df_features
    
    # To have informations about number of features in final dataframe
    dict_num_features[method_name] = df_features.shape[1]
    dict_num_features[method_name_preprocessed] = df_features.shape[1]

[1mComputing results for SIFT[0;0m
Applying best preprocessing :
 {'filter': 'bilateral', 'filter_params': [12, 100, 100], 'histogram': 'clahe', 'normalize': True, 'f1-score': 0.7940731126973826} 

Applying best method parameters:
 {'nOctaveLayers': 3, 'nfeatures': 500, 'num_clusters': 5, 'sigma': 1.2} 

[1mExtracting SIFT Features[0;0m
Performing Classification for SIFT + bilateral
Best method name for SIFT + bilateral: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.66      0.75        86
           1       0.58      0.80      0.67        50

    accuracy                           0.71       136
   macro avg       0.72      0.73      0.71       136
weighted avg       0.75      0.71      0.72       136

[1mComputing results for ORB[0;0m
Applying best preprocessing :
 {'filter': 'bilateral', 'filter_params': [12, 100, 100], 'histogram': 'none', 'normalize': False, 'f1-score': 0.6660664593223113} 

Applying 

# III. Overall results

In [16]:
df_best_models_f1_score = pd.DataFrame(metrics_results_best_methods)
df_best_models_f1_score.insert(0, 'Best models', name_best_models)
df_best_models_f1_score.insert(6, 'Number of features', dict_num_features)
df_best_models_f1_score= df_best_models_f1_score.map(lambda x: f'{x:.4f}' if isinstance(x, float) else x)
df_best_models_f1_score.style.highlight_max(axis=0, subset=df_best_models_f1_score.columns[1:6])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc,Number of features
SIFT,RandomForest,0.75,0.7389,0.7479,0.7356,0.7479,5
ORB,LogReg,0.6103,0.5967,0.604,0.5977,0.604,5
Harris,LogReg,0.7647,0.7614,0.7888,0.7688,0.7888,7
EDGE,LogReg,0.8309,0.8239,0.837,0.819,0.837,20
Otsu,RandomForest,0.6397,0.5997,0.5979,0.6053,0.5979,256
Adaptive,RandomForest,0.5662,0.5575,0.6486,0.6963,0.6486,256
Gabor,RandomForest,0.5368,0.5078,0.5081,0.508,0.5081,12
LBP,LogReg,0.8015,0.7933,0.8053,0.789,0.8053,26
HOG,RandomForest,0.8382,0.833,0.8512,0.8291,0.8512,980
SIFT + bilateral,RandomForest,0.7132,0.7087,0.7314,0.7152,0.7314,5


# IV. Combining some features

In [17]:
metrics_results_concatenated_features = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models_concatenated_features = {}

dict_num_concatenated_features = {}


## 1. SIFT + Edge

In [18]:
df_features_SIFT_Edge = env_feature_extraction_unprocessed_images.concatenate_features([dict_features["SIFT"], dict_features["EDGE"]])

metrics_results_concatenated_features, _ = perform_classification(
    'SIFT + Edge', df_Y, name_best_models_concatenated_features, metrics_results_concatenated_features, df_features=df_features_SIFT_Edge
)

dict_num_concatenated_features['SIFT + Edge'] = df_features_SIFT_Edge.shape[1]

Performing Classification for SIFT + Edge
Best method name for SIFT + Edge: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.74      0.81        86
           1       0.66      0.84      0.74        50

    accuracy                           0.78       136
   macro avg       0.77      0.79      0.77       136
weighted avg       0.80      0.78      0.78       136



## 2. SIFT + HOG

In [19]:
df_features_SIFT_HOG = env_feature_extraction_unprocessed_images.concatenate_features([dict_features["SIFT"], dict_features["HOG"]])

metrics_results_concatenated_features, _ = perform_classification(
    'SIFT + HOG', df_Y, name_best_models_concatenated_features, metrics_results_concatenated_features, df_features=df_features_SIFT_HOG # , dim_reduction="PCA"
)

dict_num_concatenated_features['SIFT + HOG'] = df_features_SIFT_HOG.shape[1]

Performing Classification for SIFT + HOG
Best method name for SIFT + HOG: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.86      0.90        86
           1       0.79      0.92      0.85        50

    accuracy                           0.88       136
   macro avg       0.87      0.89      0.88       136
weighted avg       0.89      0.88      0.88       136



## 3. HOG with PCA + SIFT

In [20]:
metrics_results_concatenated_features, df_features_HOG_PCA, n_pca = perform_classification(
    'HOG with PCA', df_Y, name_best_models_concatenated_features, metrics_results_concatenated_features, df_features=dict_features["HOG"], dim_reduction="PCA"
)

dict_num_concatenated_features['HOG with PCA'] = n_pca

Performing Classification for HOG with PCA
Number of components for the chosen cumulative variance: 209
Best method name for HOG with PCA: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.81      0.88        86
           1       0.75      0.94      0.83        50

    accuracy                           0.86       136
   macro avg       0.85      0.88      0.86       136
weighted avg       0.88      0.86      0.86       136



In [21]:
df_features_SIFT_HOG_PCA = env_feature_extraction_unprocessed_images.concatenate_features([dict_features["SIFT"], dict_features["HOG"]])

metrics_results_concatenated_features, _ , n_pca = perform_classification(
    'SIFT + HOG with PCA', df_Y, name_best_models_concatenated_features, metrics_results_concatenated_features, df_features=df_features_SIFT_HOG_PCA, dim_reduction="PCA"
)

dict_num_concatenated_features['SIFT + HOG with PCA'] = n_pca

Performing Classification for SIFT + HOG with PCA
Number of components for the chosen cumulative variance: 211
Best method name for SIFT + HOG with PCA: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.80      0.87        86
           1       0.73      0.94      0.82        50

    accuracy                           0.85       136
   macro avg       0.85      0.87      0.85       136
weighted avg       0.88      0.85      0.86       136



In [22]:
df_best_models_mixed_features_best_f1 = pd.DataFrame(metrics_results_concatenated_features)
df_best_models_mixed_features_best_f1.insert(0, 'Best models', name_best_models_concatenated_features)
df_best_models_mixed_features_best_f1.insert(6, 'Number of features', dict_num_concatenated_features)
df_best_models_mixed_features_best_f1= df_best_models_mixed_features_best_f1.map(lambda x: f'{x:.4f}' if isinstance(x, float) else x)
df_best_models_mixed_features_best_f1.style.highlight_max(axis=0, subset=df_best_models_mixed_features_best_f1.columns[1:6])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc,Number of features
SIFT + Edge,LogReg,0.7794,0.7735,0.7921,0.7726,0.7921,25
SIFT + HOG,RandomForest,0.8824,0.8771,0.8902,0.8709,0.8902,985
HOG with PCA,LogReg,0.8603,0.8562,0.877,0.8525,0.877,209
SIFT + HOG with PCA,LogReg,0.8529,0.849,0.8712,0.8464,0.8712,211
