In [1]:
from Classification import classification_class as classification
from Features_extraction import feature_extraction_class as feature_extraction
from Data_processing.images_processing_class import ImagesProcessing
from utils import perform_classification, best_preprocessing

import pandas as pd
import numpy as np
import cv2
import json

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

In [3]:
data_normal_path = "Data/normal/"
data_pothole_path = "Data/potholes/"

i = ImagesProcessing(folder_normal=data_normal_path, folder_potholes=data_pothole_path, img_size=(256, 256))
image_process = ImagesProcessing(folder_normal=data_normal_path, folder_potholes=data_pothole_path, img_size=(256, 256))

Y = i.labels
df_Y = pd.DataFrame(Y)

In [4]:
with open("best_configs_processing.json", "r") as f:
    best_configs_processing = json.load(f)

with open("best_configs_methods.json", "r") as f:
    best_configs_methods = json.load(f)

# I. Unprocessed images

In [19]:
env_feature_extraction_unprocessed_images = feature_extraction.FeatureExtraction(i)

## 1. Point of interest detection

In [8]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["SIFT + BoW"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.78      0.82        86
           1       0.67      0.78      0.72        50

    accuracy                           0.78       136
   macro avg       0.77      0.78      0.77       136
weighted avg       0.79      0.78      0.78       136



In [9]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["ORB + BoW"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW", metrics_results_best_methods)

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.63      0.68        86
           1       0.48      0.60      0.54        50

    accuracy                           0.62       136
   macro avg       0.61      0.61      0.61       136
weighted avg       0.64      0.62      0.62       136



## 2. Edge detection

In [None]:
df_features_edge = env_feature_extraction_unprocessed_images.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["EDGE"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE", metrics_results_best_methods)

[1mExtracting Edge features[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.86      0.88        86
           1       0.77      0.82      0.80        50

    accuracy                           0.85       136
   macro avg       0.83      0.84      0.84       136
weighted avg       0.85      0.85      0.85       136



## 3. Thresholding Method

In [None]:
def perform_classification2(df_features, df_Y, feature_name):
    metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }
    print("Performing Classification")
    data = classification.DataProcessing(df_features, df_Y, stratified=False)
    env = classification.BinaryClassification(data, average="macro")
    metrics_results = env.CrossValidationKFold()
    labels = list(metrics_results['f1-score'].keys())
    results_train_KFold, results_test_KFold = env.createMeansDataframe(metrics_results, labels)
    results_train_KFold.style.highlight_max(axis=0)

    best_method_name = env.get_best_method(results_test_KFold, "F1-score", ens="Test")
    print(f"Best method name: {best_method_name}")
    name_best_models[feature_name] = best_method_name
    metrics_results, predictions, models = env.TrainTest()
    env.evaluate_model(models[best_method_name])
    metrics_results_best_methods = env.get_metrics(models[best_method_name], feature_name, metrics_results_best_methods)

### a. Otsu's Thresholding

In [None]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_otsu())

perform_classification2(df_features, df_Y, "Otsu's Thresholding")

[1mData Segmentation using Otsu's Thresholding[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.73      0.76        86
           1       0.59      0.66      0.62        50

    accuracy                           0.71       136
   macro avg       0.69      0.70      0.69       136
weighted avg       0.71      0.71      0.71       136



### b. Adaptive's Thresholding

In [None]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_adaptive())

perform_classification2(df_features, df_Y, "Adaptive's Thresholding")

[1mData Segmentation using Adaptive's Thresholding[0;0m
Performing Classification
Best method name: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.34      0.50        86
           1       0.46      0.96      0.62        50

    accuracy                           0.57       136
   macro avg       0.70      0.65      0.56       136
weighted avg       0.76      0.57      0.54       136



## 4. Texture Feature

### a. Frequency-Based Methods

In [None]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_Gabor())

perform_classification2(df_features, df_Y, "Gabor filters")

[1mExtracting Surface Textures Features using Gabor filters[0;0m
Performing Classification
Best method name: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.62      0.63        86
           1       0.38      0.40      0.39        50

    accuracy                           0.54       136
   macro avg       0.51      0.51      0.51       136
weighted avg       0.54      0.54      0.54       136



### b. Spatial Texture Descriptors

In [None]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_LBP())

perform_classification2(df_features, df_Y, "LBP")

[1mExtracting Spatial Texture Features using LBP[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.79      0.83        86
           1       0.69      0.82      0.75        50

    accuracy                           0.80       136
   macro avg       0.79      0.81      0.79       136
weighted avg       0.81      0.80      0.80       136



# II. Processed images: using best configurations for processing and feature detection methods

In [5]:
filters = {
    "SIFT": "gaussian",
    "ORB": "median",
    "EDGE": "bilateral"
}

In [None]:
for method_name, best_config in best_configs_processing.items():

    best_params_method = best_configs_methods[method_name]

    print("=====================================================================")
    print(f"\033[1mComputing results for {method_name}\033[0;0m")
    print(f"Applying best preprocessing :\n {best_config} \n")
    print(f"Applying best method parameters:\n {best_params_method} \n")

    image_process.apply_preprocessing(best_config)

    env_feature_extraction = feature_extraction.FeatureExtraction(image_process)
    feature_extraction_method = getattr(env_feature_extraction, f"method_{method_name}")
    
    method_name_preprocessed = f"{method_name} + {filters[method_name]}"

    metrics_results_best_methods = perform_classification(
        feature_extraction_method, method_name_preprocessed, df_Y, name_best_models, metrics_results_best_methods, best_params_method
    )

[1mComputing results for SIFT[0;0m
Applying best preprocessing :
 {'filter': 'gaussian', 'filter_params': [[3, 3], 0], 'histogram': 'standard', 'gamma': 0.8, 'normalize': False, 'f1-score': 0.7926342950973493} 

Applying best method parameters:
 {'nOctaveLayers': 4, 'nfeatures': 500, 'num_clusters': 5, 'sigma': 1.6} 

[1mExtracting SIFT Features[0;0m
Performing Classification for SIFT + gaussian
Best method name for SIFT + gaussian: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.72      0.77        86
           1       0.61      0.76      0.68        50

    accuracy                           0.74       136
   macro avg       0.73      0.74      0.73       136
weighted avg       0.76      0.74      0.74       136

[1mComputing results for ORB[0;0m
Applying best preprocessing :
 {'filter': 'median', 'filter_params': [3], 'histogram': 'standard', 'gamma': 0.8, 'normalize': False, 'f1-score': 0.5966649879693

# III. Overall results

In [12]:
df_best_models_all_datasets_macro_f1 = pd.DataFrame(metrics_results_best_methods)
df_best_models_all_datasets_macro_f1.insert(0, 'Best models', name_best_models)
df_best_models_all_datasets_macro_f1.style.highlight_max(axis=0, subset=df_best_models_all_datasets_macro_f1.columns[1:])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc
SIFT + gaussian,LogReg,0.838235,0.833889,0.855349,0.831597,0.855349
ORB + median,LogReg,0.588235,0.583825,0.603256,0.596354,0.603256
EDGE + bilateral,LogReg,0.852941,0.845419,0.854419,0.840179,0.854419
