In [1]:
from Classification import classification_class as classification
from Features_extraction.feature_extraction_class import FeatureExtraction
from Data_processing.images_processing_class import ImagesProcessing
from utils import perform_classification

import pandas as pd
import numpy as np
import cv2
import json

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

In [3]:
data_normal_path = "Data/normal/"
data_pothole_path = "Data/potholes/"

env_images = ImagesProcessing(folder_normal=data_normal_path, folder_potholes=data_pothole_path, img_size=(256, 256))

Y = env_images.labels
df_Y = pd.DataFrame(Y)

In [4]:
with open("best_configs_processing.json", "r") as f:
    best_configs_processing = json.load(f)

with open("best_configs_methods.json", "r") as f:
    best_configs_methods = json.load(f)

# I. Unprocessed images

In [5]:
env_feature_extraction_unprocessed_images = FeatureExtraction(env_images)

## 1. Point of interest detection

In [6]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_SIFT())

metrics_results_best_methods, _ = perform_classification(
    'SIFT', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features
)

[1mExtracting SIFT Features[0;0m
Performing Classification for SIFT
Best method name for SIFT: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.76      0.79        86
           1       0.64      0.74      0.69        50

    accuracy                           0.75       136
   macro avg       0.74      0.75      0.74       136
weighted avg       0.76      0.75      0.75       136



In [7]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_ORB())

metrics_results_best_methods, _ = perform_classification(
    'ORB', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features
)

[1mExtracting ORB Features[0;0m
Performing Classification for ORB
Best method name for ORB: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.60      0.66        86
           1       0.48      0.62      0.54        50

    accuracy                           0.61       136
   macro avg       0.60      0.61      0.60       136
weighted avg       0.64      0.61      0.62       136



In [8]:
df_features_harris = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Harris())

metrics_results_best_methods, _ = perform_classification(
    'Harris', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_harris
)

[1mExtracting Harris Corner Features[0;0m
Performing Classification for Harris
Best method name for Harris: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.73      0.79        86
           1       0.63      0.78      0.70        50

    accuracy                           0.75       136
   macro avg       0.74      0.76      0.74       136
weighted avg       0.77      0.75      0.75       136



## 2. Edge detection

In [9]:
df_features_edge = env_feature_extraction_unprocessed_images.method_EDGE()

metrics_results_best_methods, _ = perform_classification(
    'EDGE', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_edge
)

[1mExtracting Edge features[0;0m
Performing Classification for EDGE
Best method name for EDGE: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.81      0.86        86
           1       0.73      0.86      0.79        50

    accuracy                           0.83       136
   macro avg       0.82      0.84      0.82       136
weighted avg       0.84      0.83      0.83       136



## 3. Thresholding Method

### a. Otsu's Thresholding

In [10]:
df_features_otsu = pd.DataFrame(env_feature_extraction_unprocessed_images.method_otsu())

metrics_results_best_methods, _ = perform_classification(
    "Otsu's thresholding", df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_otsu
)

[1mData Segmentation using Otsu's Thresholding[0;0m
Performing Classification for Otsu's thresholding
Best method name for Otsu's thresholding: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.73      0.76        86
           1       0.59      0.66      0.62        50

    accuracy                           0.71       136
   macro avg       0.69      0.70      0.69       136
weighted avg       0.71      0.71      0.71       136



### b. Adaptive's Thresholding

In [11]:
df_features_adaptive = pd.DataFrame(env_feature_extraction_unprocessed_images.method_adaptive())

metrics_results_best_methods, _ = perform_classification(
    'Adaptive', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_adaptive
)

[1mData Segmentation using Adaptive's Thresholding[0;0m
Performing Classification for Adaptive
Best method name for Adaptive: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.34      0.50        86
           1       0.46      0.96      0.62        50

    accuracy                           0.57       136
   macro avg       0.70      0.65      0.56       136
weighted avg       0.76      0.57      0.54       136



## 4. Texture Feature

### a. Frequency-Based Methods

In [12]:
df_features_gabor = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Gabor())

metrics_results_best_methods, _ = perform_classification(
    'Gabor', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_gabor
)

[1mExtracting Surface Textures Features using Gabor filters[0;0m
Performing Classification for Gabor
Best method name for Gabor: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.62      0.63        86
           1       0.38      0.40      0.39        50

    accuracy                           0.54       136
   macro avg       0.51      0.51      0.51       136
weighted avg       0.54      0.54      0.54       136



### b. Spatial Texture Descriptors

In [13]:
df_features_LBP = pd.DataFrame(env_feature_extraction_unprocessed_images.method_LBP())

metrics_results_best_methods, _ = perform_classification(
    'LBP', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_LBP
)

[1mExtracting Spatial Texture Features using LBP[0;0m
Performing Classification for LBP
Best method name for LBP: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.79      0.83        86
           1       0.69      0.82      0.75        50

    accuracy                           0.80       136
   macro avg       0.79      0.81      0.79       136
weighted avg       0.81      0.80      0.80       136



## 5. Structural Features

- ### Histogram of Oriented Gradients (HOG)

In [14]:
df_features_HOG = pd.DataFrame(env_feature_extraction_unprocessed_images.method_HOG())

metrics_results_best_methods, _ = perform_classification(
    'HOG', df_Y, name_best_models, metrics_results_best_methods, df_features=df_features_HOG
)

[1mExtracting Structural Features using HOG[0;0m
Performing Classification for HOG
Best method name for HOG: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.80      0.86        86
           1       0.73      0.90      0.80        50

    accuracy                           0.84       136
   macro avg       0.83      0.85      0.83       136
weighted avg       0.86      0.84      0.84       136



# II. Processed images: using best configurations for processing and feature detection methods

In [15]:
best_configs_methods

{'SIFT': {'nOctaveLayers': 4,
  'nfeatures': 500,
  'num_clusters': 10,
  'sigma': 1.2},
 'ORB': {'nfeatures': 300, 'num_clusters': 5, 'scaleFactor': 1.5},
 'Harris': {'blockSize': 2, 'k': 0.04, 'ksize': 3, 'threshold': 0.01},
 'EDGE': {'canny_threshold1': 100,
  'canny_threshold2': 200,
  'laplacian_ksize': 3,
  'sobel_ksize': 5},
 'Adaptive': {'C': 5, 'block_size': 9},
 'Gabor': {'gamma': 1.0, 'ksize': 7, 'lambd': 5, 'sigma': 6},
 'LBP': {'num_points': 24, 'radius': 2},
 'HOG': {'orientations': 6, 'pixels_per_cell': [32, 32]}}

In [16]:
dict_features = {}

for method_name, best_config in best_configs_processing.items():

    if method_name in best_configs_methods:
        best_params_method = best_configs_methods[method_name]
    else:
        best_params_method = None

    print("=====================================================================")
    print(f"\033[1mComputing results for {method_name}\033[0;0m")
    print(f"Applying best preprocessing :\n {best_config} \n")
    print(f"Applying best method parameters:\n {best_params_method} \n")

    env_images.apply_preprocessing(best_config)

    env_feature_extraction = FeatureExtraction(env_images)
    feature_extraction_method = getattr(env_feature_extraction, f"method_{method_name}")
    
    method_name_preprocessed = f"{method_name} + {best_config['filter']}"

    metrics_results_best_methods, df_features = perform_classification(
        method_name_preprocessed, df_Y, name_best_models, metrics_results_best_methods, best_params_method, feature_extraction_method=feature_extraction_method
    )

    dict_features[method_name] = df_features

[1mComputing results for SIFT[0;0m
Applying best preprocessing :
 {'filter': 'bilateral', 'filter_params': [9, 75, 75], 'histogram': 'clahe', 'normalize': False, 'f1-score': 0.804676279657609} 

Applying best method parameters:
 {'nOctaveLayers': 4, 'nfeatures': 500, 'num_clusters': 10, 'sigma': 1.2} 

[1mExtracting SIFT Features[0;0m
Performing Classification for SIFT + bilateral
Best method name for SIFT + bilateral: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.87      0.85        86
           1       0.76      0.70      0.73        50

    accuracy                           0.81       136
   macro avg       0.80      0.79      0.79       136
weighted avg       0.81      0.81      0.81       136

[1mComputing results for ORB[0;0m
Applying best preprocessing :
 {'filter': 'bilateral', 'filter_params': [5, 50, 50], 'histogram': 'standard', 'normalize': False, 'f1-score': 0.5966649879693358} 

Applying best me

# III. Overall results

In [17]:
df_best_models_all_datasets_macro_f1 = pd.DataFrame(metrics_results_best_methods)
df_best_models_all_datasets_macro_f1.insert(0, 'Best models', name_best_models)
df_best_models_all_datasets_macro_f1.style.highlight_max(axis=0, subset=df_best_models_all_datasets_macro_f1.columns[1:])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc
SIFT,RandomForest,0.75,0.738934,0.747907,0.735632,0.747907
ORB,RandomForest,0.610294,0.600775,0.612326,0.604659,0.612326
Harris,RandomForest,0.75,0.741964,0.756279,0.740192,0.756279
EDGE,LogReg,0.830882,0.823943,0.836977,0.818952,0.836977
Otsu's thresholding,LogReg,0.705882,0.690839,0.696279,0.688393,0.696279
Adaptive,RandomForest,0.566176,0.557541,0.648605,0.696313,0.648605
Gabor,RandomForest,0.536765,0.507784,0.50814,0.507956,0.50814
LBP,LogReg,0.801471,0.793325,0.805349,0.789016,0.805349
HOG,RandomForest,0.838235,0.833036,0.851163,0.829119,0.851163
SIFT + bilateral,LogReg,0.808824,0.79072,0.786047,0.797101,0.786047


# IV. Combining some features

In [None]:
# TODO use dict_features