In [1]:
from Data_processing import import_images as i
from Classification import classification_class as classification
import pandas as pd

In [2]:
metrics_results_best_methods = {
        "accuracy" : {},
        "f1-score" : {},
        "recall" : {},
        "precision" : {},
        "roc_auc" : {}
        }


name_best_models = {}

In [3]:
from Features_extraction import SIFT as SIFT
print("========= 5. Classification =========")
df_features = pd.DataFrame(SIFT.feature_vectors)
df_Y = pd.DataFrame(i.Y)

data_SIFT = classification.DataProcessing(df_features, df_Y, stratified=False)
env_SIFT = classification.BinaryClassification(data_SIFT, average="macro")
metrics_results = env_SIFT.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_SIFT.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_SIFT.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["SIFT + BoW"] = best_method_name
metrics_results, predictions, models = env_SIFT.TrainTest()
env_SIFT.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_SIFT.get_metrics(models[best_method_name], "SIFT + BoW", metrics_results_best_methods)

Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.80      0.78        86
           1       0.61      0.54      0.57        50

    accuracy                           0.71       136
   macro avg       0.68      0.67      0.67       136
weighted avg       0.70      0.71      0.70       136



In [4]:
metrics_results_best_methods

{'accuracy': {'SIFT + BoW': 0.7058823529411765},
 'f1-score': {'SIFT + BoW': 0.6748744919913938},
 'recall': {'SIFT + BoW': 0.6711627906976745},
 'precision': {'SIFT + BoW': 0.6818181818181819},
 'roc_auc': {'SIFT + BoW': 0.6711627906976745}}

In [3]:
from Features_extraction import edge_detection as edges
print("========= 4. Classification =========")
df_features_edge = edges.features_df
df_Y = pd.DataFrame(i.Y)
assert len(df_features_edge) == len(df_Y), "Mismatch between features and labels !"



data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name: {best_method_name}")
name_best_models["EDGE"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE", metrics_results_best_methods)



Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.83      0.86        86
           1       0.74      0.84      0.79        50

    accuracy                           0.83       136
   macro avg       0.82      0.83      0.82       136
weighted avg       0.84      0.83      0.83       136



In [4]:
metrics_results_best_methods

{'accuracy': {'EDGE': 0.8308823529411765},
 'f1-score': {'EDGE': 0.8228263947890115},
 'recall': {'EDGE': 0.8327906976744186},
 'precision': {'EDGE': 0.8177881412391739},
 'roc_auc': {'EDGE': 0.8327906976744185}}

# Comparison after preprocessing

In [6]:
from Data_processing import data_preprocessing as dp
import cv2
import os
import numpy as np


# Apply preprocessing to all images
print("=========  Applying Preprocessing to Images =========")

# Load images 
target_size = (128, 128)  

preprocessed_images = [cv2.resize(img, target_size, interpolation=cv2.INTER_AREA) for img in i.images]
preprocessed_images = np.array(preprocessed_images)

# Convert back to NumPy array 
preprocessed_images = np.array(preprocessed_images)

# Update dataset with preprocessed images
i.images = preprocessed_images




In [7]:
# Recompute edge detection features
print("========= Extracting Edge Detection Features After Preprocessing =========")

df_features_edge = edges.extract_features_from_images(i.images)  # Recalculate features
df_Y = pd.DataFrame(i.Y)

assert len(df_features_edge) == len(df_Y), "Mismatch between features and labels!"

data_edge = classification.DataProcessing(df_features_edge, df_Y, stratified=False)
env_edge = classification.BinaryClassification(data_edge, average="macro")
metrics_results = env_edge.CrossValidationKFold()
labels = list(metrics_results['f1-score'].keys())
results_train_KFold, results_test_KFold = env_edge.createMeansDataframe(metrics_results, labels)
results_train_KFold.style.highlight_max(axis=0)

best_method_name = env_edge.get_best_method(results_test_KFold, "F1-score", ens="Test")
print(f"Best method name for Edge Detection: {best_method_name}")
name_best_models["EDGE (Preprocessed)"] = best_method_name
metrics_results, predictions, models = env_edge.TrainTest()
env_edge.evaluate_model(models[best_method_name])
metrics_results_best_methods = env_edge.get_metrics(models[best_method_name], "EDGE (Preprocessed)", metrics_results_best_methods)


Optimization Logistic Regression
Optimization Classification Decision Tree
Optimization Random Forest
Best method name for Edge Detection: LogReg
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.83      0.88        86
           1       0.75      0.92      0.83        50

    accuracy                           0.86       136
   macro avg       0.85      0.87      0.86       136
weighted avg       0.88      0.86      0.86       136



In [8]:
# Convert results to DataFrame for better visualization
df_results = pd.DataFrame(metrics_results_best_methods)
print("\n========= Comparison of Classification Results =========")
print(df_results)


                     accuracy  f1-score    recall  precision   roc_auc
EDGE                 0.830882  0.822826  0.832791   0.817788  0.832791
EDGE (Preprocessed)  0.860294  0.855408  0.872791   0.850383  0.872791
