In [1]:
from Data_processing import import_images as i
from Classification import classification_class as classification
from Features_extraction import interest_point_detection as interest_point_detection
import pandas as pd

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }

name_best_models = {}

# I. Point of interest detection

In [3]:
df_features = pd.DataFrame(interest_point_detection.method_SIFT(i.images))
df_Y = pd.DataFrame(i.Y)

data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["SIFT + BoW"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW", metrics_results_best_methods)

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.80      0.78        86
           1       0.61      0.54      0.57        50

    accuracy                           0.71       136
   macro avg       0.68      0.67      0.67       136
weighted avg       0.70      0.71      0.70       136



In [4]:
df_features = pd.DataFrame(interest_point_detection.method_ORB(i.images))
df_Y = pd.DataFrame(i.Y)

data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["ORB + BoW"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW", metrics_results_best_methods)

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: ClassificationDecisionTree
Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.34      0.43        86
           1       0.34      0.60      0.44        50

    accuracy                           0.43       136
   macro avg       0.47      0.47      0.43       136
weighted avg       0.50      0.43      0.43       136



# II. Edge detection

In [5]:
from Features_extraction import edge_detection as edges

df_features_edge = edges.features_df

data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["EDGE"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE", metrics_results_best_methods)


Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.83      0.86        86
           1       0.74      0.84      0.79        50

    accuracy                           0.83       136
   macro avg       0.82      0.83      0.82       136
weighted avg       0.84      0.83      0.83       136



# III. Comparison after preprocessing

In [6]:
from Data_processing import data_preprocessing as dp
import cv2
import os
import numpy as np

# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Load images
target_size = (128,128)

preprocessed_images = [dp.preprocess_image(cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)) for img in i.images]
preprocessed_images = np.array(preprocessed_images)


# Update dataset with preprocessed images
i.images = preprocessed_images



In [7]:
print("========= Extracting SIFT Features After Preprocessing =========")
df_features = pd.DataFrame(interest_point_detection.method_SIFT(i.images))
df_Y = pd.DataFrame(i.Y)

data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for SIFT: {best_method_name}")
name_best_models["SIFT + BoW (Preprocessed)"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW (Preprocessed)", metrics_results_best_methods)

print("========= Extracting ORB Features After Preprocessing =========")
df_features = pd.DataFrame(interest_point_detection.method_ORB(i.images))
df_Y = pd.DataFrame(i.Y)

data_ORB = classification.DataProcessing(df_features, df_Y, stratified=False)
env_ORB = classification.BinaryClassification(data_ORB, average="macro")
metrics_results = env_ORB.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_ORB.createMeansDataframe(metrics_results, labels)
best_method_name = env_ORB.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for ORB: {best_method_name}")
name_best_models["ORB + BoW (Preprocessed)"] = best_method_name
metrics_results, predictions, models = env_ORB.TrainTest()
env_ORB.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_ORB.get_metrics(models[best_method_name], "ORB + BoW (Preprocessed)", metrics_results_best_methods)


print("========= Extracting Edge Detection Features After Preprocessing =========")

df_features_edge = edges.extract_features_from_images(i.images)  # Recalculate features
df_Y = pd.DataFrame(i.Y)

data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")

print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed)", metrics_results_best_methods)

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for SIFT: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.76      0.79        86
           1       0.64      0.74      0.69        50

    accuracy                           0.75       136
   macro avg       0.74      0.75      0.74       136
weighted avg       0.76      0.75      0.75       136

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for ORB: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.73      0.74        86
           1       0.55      0.56      0.55        50

    accuracy                           0.67       136
   macro avg       0.65      0.65      0.65       136
weighted avg       0.67      0.67      0.67       136

Optimization Logistic Regress

# IV. Overall results

In [8]:
df_best_models_all_datasets_macro_f1 = pd.DataFrame(metrics_results_best_methods)
df_best_models_all_datasets_macro_f1.insert(0, 'Best models', name_best_models)
df_best_models_all_datasets_macro_f1.style.highlight_max(axis=0, subset=df_best_models_all_datasets_macro_f1.columns[1:])

Unnamed: 0,Best models,accuracy,f1-score,recall,precision,roc_auc
SIFT + BoW,LogReg,0.705882,0.674874,0.671163,0.681818,0.671163
ORB + BoW,ClassificationDecisionTree,0.433824,0.433793,0.468605,0.468332,0.468605
EDGE,LogReg,0.830882,0.822826,0.832791,0.817788,0.832791
SIFT + BoW (Preprocessed),LogReg,0.75,0.738934,0.747907,0.735632,0.747907
ORB + BoW (Preprocessed),LogReg,0.669118,0.645649,0.646279,0.645098,0.646279
EDGE (Preprocessed),LogReg,0.816176,0.812517,0.837907,0.814298,0.837907
