In [1]:
from Data_processing import import_images as i
from Data_processing import data_preprocessing as dp
from Classification import classification_class as classification
from Features_extraction import feature_extraction_class as feature_extraction
from Data_processing import import_images as i

import pandas as pd
import numpy as np
import cv2

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

In [21]:
images_normal = i.load_images_cv2("Data/normal/")
images_potholes = i.load_images_cv2("Data/potholes/")

images = images_normal + images_potholes
Y = [0]*len(images_normal) +[1]*len(images_potholes)

df_Y = pd.DataFrame(Y)

# I. Unprocessed images

In [4]:
env_feature_extraction_unprocessed_images = feature_extraction.FeatureExtraction(images)

## 1. Point of interest detection

In [5]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_SIFT())

print("\033[1mStarting Classification\033[0;0m")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["SIFT + BoW"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW", metrics_results_best_methods)

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.81      0.78        86
           1       0.62      0.52      0.57        50

    accuracy                           0.71       136
   macro avg       0.68      0.67      0.67       136
weighted avg       0.70      0.71      0.70       136



In [7]:
df_features = pd.DataFrame(env_feature_extraction_unprocessed_images.method_ORB())

print("\033[1mStarting Classification\033[0;0m")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["ORB + BoW"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.38      0.52        86
           1       0.44      0.82      0.57        50

    accuracy                           0.54       136
   macro avg       0.61      0.60      0.54       136
weighted avg       0.66      0.54      0.54       136



## 2. Edge detection

In [8]:
df_features_edge = env_feature_extraction_unprocessed_images.edge_detection_methods()

print("\033[1mStarting Classification\033[0;0m")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["EDGE"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.86      0.88        86
           1       0.77      0.82      0.80        50

    accuracy                           0.85       136
   macro avg       0.83      0.84      0.84       136
weighted avg       0.85      0.85      0.85       136



# II. Processed images

In [9]:
original_images = [np.copy(img) for img in images]

## 1. First configuration

In [10]:
# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "gaussian"
equalization_method = "hist"
apply_gamma = None
normalize_output = True
      
images2 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in images2
]

# Convert list to NumPy array
preprocessed_images = np.array(preprocessed_images)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images = (preprocessed_images * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images1 = feature_extraction.FeatureExtraction(preprocessed_images)



In [11]:
print("========= Extracting SIFT Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images1.method_SIFT())

print("\033[1mStarting Classification\033[0;0m")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 1)", metrics_results_best_methods)

print("========= Extracting ORB Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images1.method_ORB())

print("\033[1mStarting Classification\033[0;0m")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 1)", metrics_results_best_methods)


print("========= Extracting Edge Detection Features After Preprocessing =========")

print("\033[1mStarting Classification\033[0;0m")
df_features_edge = env_feature_extraction_preprocessed_images1.edge_detection_methods()

data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 1)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 1)", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for SIFT: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.70      0.77        86
           1       0.61      0.80      0.69        50

    accuracy                           0.74       136
   macro avg       0.73      0.75      0.73       136
weighted avg       0.76      0.74      0.74       136

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for ORB: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.73      0.74        86
           1       0.55      0.56      0.55        50

    accuracy                           0.67       136
   macro avg       0.65      0.65      0.65       136
weighted a

## 2. Second configuration

In [12]:
# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "median"
equalization_method = "clahe"
apply_gamma = None
normalize_output = True

preprocessed_images2 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images2 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images2
]

# Convert list to NumPy array
preprocessed_images2 = np.array(preprocessed_images2)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images2 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images2])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images2 = (preprocessed_images2 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images2 = feature_extraction.FeatureExtraction(preprocessed_images2)



In [13]:
print("========= Extracting SIFT Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images2.method_SIFT())

print("\033[1mStarting Classification\033[0;0m")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 2)", metrics_results_best_methods)

print("========= Extracting ORB Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images2.method_ORB())

print("\033[1mStarting Classification\033[0;0m")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 2)", metrics_results_best_methods)


print("========= Extracting Edge Detection Features After Preprocessing =========")

df_features_edge = env_feature_extraction_preprocessed_images2.edge_detection_methods()

print("\033[1mStarting Classification\033[0;0m")
data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed - 2)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 2)", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.72      0.78        86
           1       0.62      0.80      0.70        50

    accuracy                           0.75       136
   macro avg       0.74      0.76      0.74       136
weighted avg       0.77      0.75      0.75       136

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for ORB: RandomForest
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.53      0.60        86
           1       0.41      0.56      0.47        50

    accuracy                           0.54       136
   macro avg       0.54      0.55      0.54       136
weighted a

## 3. Third configuration

In [14]:
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "bilateral"
equalization_method = "clahe"
apply_gamma = 1.5  # Adjust gamma to brighten images
normalize_output = True

preprocessed_images3 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images3 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images3
]

# Convert list to NumPy array
preprocessed_images3 = np.array(preprocessed_images3)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images3 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images3])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images3 = (preprocessed_images3 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images3 = feature_extraction.FeatureExtraction(preprocessed_images3)



In [None]:
print("========= Extracting SIFT Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images3.method_SIFT())

print("\033[1mStarting Classification\033[0;0m")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 3)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 3)", metrics_results_best_methods)

print("========= Extracting ORB Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images3.method_ORB())


# Debugging
print(f"Shape de df_features (features extraites) : {df_features.shape}")
print(f"Shape de df_Y (labels) : {df_Y.shape}")
valid_indices = [idx for idx, feature in enumerate(env_feature_extraction_preprocessed_images3.method_ORB()) if feature is not None and len(feature) > 0]
df_features = pd.DataFrame([feature for idx, feature in enumerate(env_feature_extraction_preprocessed_images3.method_ORB()) if idx in valid_indices])
df_Y = df_Y.iloc[valid_indices].reset_index(drop=True)

print("\033[1mStarting Classification\033[0;0m")
data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed - 3)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 3)", metrics_results_best_methods)

print("========= Extracting Edge Detection Features After Preprocessing =========")

# df_features_edge = env_feature_extraction_preprocessed_images3.edge_detection_methods()

# TODO LISA Solve error edge detection shape Y different from shape features

# print("\033[1mStarting Classification\033[0;0m")
# data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
# env_edge = classification.BinaryClassification(data_edge, average="macro")
# metrics_results = env_edge.CrossValidationKFold()
# labels = list(metrics_results['f1-score'].keys())
# results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
# best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

# print(f"Best method name for Edge Detection: {best_method_name}")
# name_best_models["EDGE (Preprocessed - 3)"] = best_method_name
# metrics_results, predictions, models = env_edge.TrainTest()
# env_edge.evaluate_model(models[best_method_name])
# metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 3)", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.79      0.80        86
           1       0.65      0.66      0.65        50

    accuracy                           0.74       136
   macro avg       0.72      0.73      0.72       136
weighted avg       0.74      0.74      0.74       136

Shape de df_features (features extraites) : (676, 5)
Shape de df_Y (labels) : (680, 1)
[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for ORB: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.70      0.66        82
           1       0.46      0.39      0.42        54

    accuracy                         

ValueError: Found input variables with inconsistent numbers of samples: [680, 676]

## 4. Fourth configuration

In [22]:
print("=========  Applying Preprocessing to Images =========")

# Define target image size for resizing
target_size = (128, 128)

# Preprocessing parameters 
filter_method = "bilateral"
equalization_method = "hist"
apply_gamma = 1.2
normalize_output = True

preprocessed_images4 = [np.copy(img) for img in original_images]

# Apply preprocessing to all images
preprocessed_images4 = [
    dp.preprocess_image(
        cv2.resize(img, target_size, interpolation=cv2.INTER_AREA), 
        method=filter_method, 
        equalization=equalization_method, 
        gamma=apply_gamma, 
        normalize=normalize_output
    ) 
    for img in preprocessed_images4
]

# Convert list to NumPy array
preprocessed_images4 = np.array(preprocessed_images4)

# Ensure images are in grayscale (avoid issues with SIFT)
preprocessed_images4 = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img for img in preprocessed_images4])

# Ensure images are in uint8 format (SIFT requires this)
preprocessed_images4 = (preprocessed_images4 * 255).astype(np.uint8)

env_feature_extraction_preprocessed_images4 = feature_extraction.FeatureExtraction(preprocessed_images4)



In [24]:
print("========= Extracting SIFT Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images4.method_SIFT())

print("\033[1mStarting Classification\033[0;0m")
data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed - 4)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed - 4)", metrics_results_best_methods)

print("========= Extracting ORB Features After Preprocessing =========")
df_features = pd.DataFrame(env_feature_extraction_preprocessed_images4.method_ORB())

# # Debugging
# print(f"Shape de df_features (features extraites) : {df_features.shape}")
# print(f"Shape de df_Y (labels) : {df_Y.shape}")
# valid_indices = [idx for idx, feature in enumerate(env_feature_extraction_preprocessed_images4.method_ORB()) if feature is not None and len(feature) > 0]
# df_features = pd.DataFrame([feature for idx, feature in enumerate(env_feature_extraction_preprocessed_images4.method_ORB()) if idx in valid_indices])
# df_Y = df_Y.iloc[valid_indices].reset_index(drop=True)

# TODO LISA

# print("\033[1mStarting Classification\033[0;0m")
# data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
# env_ORB = classification.BinaryClassification(data_ORB, average="macro")
# metrics_results = env_ORB.CrossValidationKFold()
# labels = list(metrics_results['f1-score'].keys())
# results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
# best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

# print(f"Best method name for ORB: {best_method_name}")
# name_best_models["ORB + BoW (Preprocessed - 4)"] = best_method_name
# metrics_results, predictions, models = env_ORB.TrainTest()
# env_ORB.evaluate_model(models[best_method_name])
# metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed - 4)", metrics_results_best_methods)


print("========= Extracting Edge Detection Features After Preprocessing =========")

# TODO LISA

# df_features_edge = env_feature_extraction_preprocessed_images4.edge_detection_methods()

# print("\033[1mStarting Classification\033[0;0m")
# data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
# env_edge = classification.BinaryClassification(data_edge, average="macro")
# metrics_results = env_edge.CrossValidationKFold()
# labels = list(metrics_results['f1-score'].keys())
# results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
# best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

# print(f"Best method name for Edge Detection: {best_method_name}")
# name_best_models["EDGE (Preprocessed - 4)"] = best_method_name
# metrics_results, predictions, models = env_edge.TrainTest()
# env_edge.evaluate_model(models[best_method_name])
# metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed - 4)", metrics_results_best_methods)

[1mStarting Classification[0;0m
Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.71      0.73        86
           1       0.55      0.62      0.58        50

    accuracy                           0.68       136
   macro avg       0.66      0.66      0.66       136
weighted avg       0.69      0.68      0.68       136



# IV. Overall results

In [None]:
df_best_models_all_datasets_macro_f1 = pd.DataFrame(metrics_results_best_methods)
df_best_models_all_datasets_macro_f1.insert(0, 'Best models', name_best_models)
df_best_models_all_datasets_macro_f1.style.highlight_max(axis=0, subset=df_best_models_all_datasets_macro_f1.columns[1:])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc
SIFT + BoW,LogReg,0.705882,0.674874,0.671163,0.681818,0.671163
ORB + BoW,LogReg,0.507353,0.50281,0.518372,0.517178,0.518372
EDGE,LogReg,0.845588,0.835928,0.840233,0.832576,0.840233
SIFT + BoW (Preprocessed - 1),LogReg,0.75,0.738934,0.747907,0.735632,0.747907
ORB + BoW (Preprocessed - 1),LogReg,0.669118,0.645649,0.646279,0.645098,0.646279
EDGE (Preprocessed - 1),LogReg,0.808824,0.804598,0.827907,0.805195,0.827907
SIFT + BoW (Preprocessed - 2),RandomForest,0.727941,0.722526,0.743023,0.726044,0.743023
ORB + BoW (Preprocessed - 2),RandomForest,0.595588,0.578995,0.583953,0.579463,0.583953
EDGE (Preprocessed - 2),LogReg,0.823529,0.814503,0.822791,0.809821,0.822791
SIFT + BoW (Preprocessed - 3),LogReg,0.786765,0.775078,0.781163,0.77138,0.781163
