In [4]:
from Data_processing import import_images as i
from Data_processing import data_preprocessing as dp
from Classification import classification_class as classification
from Features_extraction import feature_extraction_class as feature_extraction
from Data_processing.images_processing_class import ImagesProcessing

import pandas as pd
import numpy as np
import cv2

In [5]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

In [6]:
data_normal_path = "Data/normal/"
data_pothole_path = "Data/potholes/"

i = ImagesProcessing(folder_normal=data_normal_path, folder_potholes=data_pothole_path, img_size=(256, 256))
images = i.images
Y = i.labels
df_Y = pd.DataFrame(Y)

# I. Unprocessed images

In [7]:
env_feature_extraction_unprocessed_images = feature_extraction.FeatureExtraction(images)

## 1. Point of interest detection

In [8]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["SIFT + BoW"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.78      0.82        86
           1       0.67      0.78      0.72        50

    accuracy                           0.78       136
   macro avg       0.77      0.78      0.77       136
weighted avg       0.79      0.78      0.78       136



In [9]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["ORB + BoW"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW", metrics_results_best_methods)

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.63      0.68        86
           1       0.48      0.60      0.54        50

    accuracy                           0.62       136
   macro avg       0.61      0.61      0.61       136
weighted avg       0.64      0.62      0.62       136



## 2. Edge detection

In [None]:
df_features_edge = env_feature_extraction_unprocessed_images.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["EDGE"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE", metrics_results_best_methods)

[1mExtracting Edge features[0;0m
Performing Classification
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.86      0.88        86
           1       0.77      0.82      0.80        50

    accuracy                           0.85       136
   macro avg       0.83      0.84      0.84       136
weighted avg       0.85      0.85      0.85       136



# II. Processed images

In [10]:
original_images = [np.copy(img) for img in images]

## 1. First configuration

In [11]:
# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "gaussian"
equalization_method = "hist"
apply_gamma = None
normalize_output = True
      
images2 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in images2
]

# Convert list to NumPy array
preprocessed_images = np.array(preprocessed_images)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images = (preprocessed_images * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images1 = feature_extraction.FeatureExtraction(preprocessed_images)



In [None]:
# SIFT
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images1.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 1)", metrics_results_best_methods)

# ORB
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images1.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 1)", metrics_results_best_methods)

# Edge detection

df_features_edge = env_feature_extraction_preprocessed_images1.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 1)", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.73      0.77        86
           1       0.61      0.72      0.66        50

    accuracy                           0.73       136
   macro avg       0.71      0.73      0.72       136
weighted avg       0.74      0.73      0.73       136

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name for ORB: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.70      0.71        86
           1       0.50      0.52      0.51        50

    accuracy                           0.63       136
   macro avg       0.61      0.61      0.61       136
weighted avg       0.64      0.63      0.63       136

[1mExtracting Edge features[0;0m
Performing Classification
Best method name for Edge Detection: LogReg
Classif

In [None]:
# Manon code to use for comparing Data Processing on images using Logistic Regression

# 1. Do data Processing on images
env_feature_extraction_preprocessed_images = feature_extraction.FeatureExtraction(preprocessed_images)
# 2. Create an env using a features extraction method: for instance edge
df_features_edge = env_feature_extraction_preprocessed_images.method_EDGE()
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
# 3. Compute Logistic Regression: here for instance 
metrics_results, predictions, models = env_edge.TrainTestLogisticRegression()
print(metrics_results)
metrics_results['f1-score']['LogReg Test'][0] # Get f1-score for instance to compare and store it

# 4. Repeat with other data processing and get f1-score again etc
# 5. Compare different f1-score: highest means best Data Processing on images

[1mExtracting Edge features[0;0m
{'accuracy': {'LogReg Train': [0.8308823529411765], 'LogReg Test': [0.7941176470588235]}, 'f1-score': {'LogReg Train': [0.8305882352941176], 'LogReg Test': [0.7885853875194315]}, 'recall': {'LogReg Train': [0.8308823529411764], 'LogReg Test': [0.807906976744186]}, 'precision': {'LogReg Train': [0.8331962155491568], 'LogReg Test': [0.7873263888888888]}, 'roc_auc': {'LogReg Train': [0.8308823529411764], 'LogReg Test': [0.807906976744186]}}


0.7885853875194315

## 2. Second configuration

In [11]:
# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "median"
equalization_method = "clahe"
apply_gamma = None
normalize_output = True

preprocessed_images2 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images2 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images2
]

# Convert list to NumPy array
preprocessed_images2 = np.array(preprocessed_images2)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images2 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images2])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images2 = (preprocessed_images2 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images2 = feature_extraction.FeatureExtraction(preprocessed_images2)



In [None]:
# SIFT
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images2.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 2)", metrics_results_best_methods)

# ORB
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images2.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 2)", metrics_results_best_methods)


# Edge detection
df_features_edge = env_feature_extraction_preprocessed_images2.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 2)", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name for SIFT: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.73      0.79        86
           1       0.63      0.80      0.71        50

    accuracy                           0.76       136
   macro avg       0.75      0.77      0.75       136
weighted avg       0.78      0.76      0.76       136

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name for ORB: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.53      0.60        86
           1       0.41      0.56      0.47        50

    accuracy                           0.54       136
   macro avg       0.54      0.55      0.54       136
weighted avg       0.58      0.54      0.55       136

[1mExtracting Edge features[0;0m
Performing Classification
Best method name for Edge Detection: Lo

## 3. Third configuration

In [13]:
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "bilateral"
equalization_method = "clahe"
apply_gamma = 1.5  # Adjust gamma to brighten images
normalize_output = True

preprocessed_images3 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images3 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images3
]

# Convert list to NumPy array
preprocessed_images3 = np.array(preprocessed_images3)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images3 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images3])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images3 = (preprocessed_images3 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images3 = feature_extraction.FeatureExtraction(preprocessed_images3)



In [None]:
# SIFT
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images3.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 3)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 3)", metrics_results_best_methods)

# ORB
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images3.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 3)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 3)", metrics_results_best_methods)

# Edge detection
df_features_edge = env_feature_extraction_preprocessed_images3.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 3)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 3)", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.79      0.80        86
           1       0.65      0.66      0.65        50

    accuracy                           0.74       136
   macro avg       0.72      0.73      0.72       136
weighted avg       0.74      0.74      0.74       136

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name for ORB: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.53      0.60        86
           1       0.42      0.58      0.49        50

    accuracy                           0.55       136
   macro avg       0.55      0.56      0.54       136
weighted avg       0.59      0.55      0.56       136

[1mExtracting Edge features[0;0m
Performing Classification
Best method name for Edge Detection: LogReg
C

## 4. Fourth configuration

In [15]:
print("=========  Applying Preprocessing to Images =========")

target_size = (128, 128)

# Preprocessing parameters 
filter_method = "bilateral"
equalization_method = "hist"
apply_gamma = 1.2
normalize_output = True

preprocessed_images4 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images4 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images4
]

# Convert list to NumPy array
preprocessed_images4 = np.array(preprocessed_images4)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images4 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images4])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images4 = (preprocessed_images4 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images4 = feature_extraction.FeatureExtraction(preprocessed_images4)



In [None]:
# SIFT
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images4.method_SIFT())

print("Performing Classification")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 4)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 4)", metrics_results_best_methods)

# ORB
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images4.method_ORB())

print("Performing Classification")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 4)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 4)", metrics_results_best_methods)

# Edge detection
df_features_edge = env_feature_extraction_preprocessed_images4.method_EDGE()

print("Performing Classification")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 4)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 4)", metrics_results_best_methods)

[1mExtracting SIFT Features[0;0m
Performing Classification
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.71      0.73        86
           1       0.55      0.62      0.58        50

    accuracy                           0.68       136
   macro avg       0.66      0.66      0.66       136
weighted avg       0.69      0.68      0.68       136

[1mExtracting ORB Features[0;0m
Performing Classification
Best method name for ORB: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.59      0.65        86
           1       0.46      0.60      0.52        50

    accuracy                           0.60       136
   macro avg       0.59      0.60      0.59       136
weighted avg       0.62      0.60      0.60       136

[1mExtracting Edge features[0;0m
Performing Classification
Best method name for Edge Detection: LogReg
C

# III. Overall results

In [17]:
df_best_models_all_datasets_macro_f1 = pd.DataFrame(metrics_results_best_methods)
df_best_models_all_datasets_macro_f1.insert(0, 'Best models', name_best_models)
df_best_models_all_datasets_macro_f1.style.highlight_max(axis=0, subset=df_best_models_all_datasets_macro_f1.columns[1:])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc
SIFT + BoW,LogReg,0.705882,0.671498,0.666977,0.681864,0.666977
ORB + BoW,LogReg,0.507353,0.50281,0.518372,0.517178,0.518372
EDGE,LogReg,0.845588,0.835928,0.840233,0.832576,0.840233
SIFT + BoW (Preprocessed - 1),RandomForest,0.735294,0.729443,0.748837,0.731602,0.748837
ORB + BoW (Preprocessed - 1),LogReg,0.669118,0.645649,0.646279,0.645098,0.646279
EDGE (Preprocessed - 1),LogReg,0.808824,0.804598,0.827907,0.805195,0.827907
SIFT + BoW (Preprocessed - 2),RandomForest,0.757353,0.750209,0.766279,0.748967,0.766279
ORB + BoW (Preprocessed - 2),RandomForest,0.544118,0.535989,0.547442,0.544118,0.547442
EDGE (Preprocessed - 2),LogReg,0.823529,0.814503,0.822791,0.809821,0.822791
SIFT + BoW (Preprocessed - 3),LogReg,0.742647,0.724393,0.725349,0.723529,0.725349
