In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error

import os
os.chdir("../../")
from Sources.tools import *

# Load data

In [2]:
X = pd.read_csv("Data/Databases/exctracted_features_MF_and_CLAHE.csv", sep=";")

In [3]:
X

Unnamed: 0,autocorrelation,contrast,cluster_prominence,entropy,SRE,LRE,GLNU,SRLGE,LRLGE
0,1.107479e+10,104485570.0,-1.041509e+18,-3.442912e+07,0.826441,1465.745404,1267.426498,0.000117,10.225427
1,1.330231e+10,141993511.0,-7.869720e+17,-3.293486e+07,0.817172,1099.559048,1458.301789,0.000118,7.666694
2,1.277266e+10,81951861.0,5.317789e+17,-3.396351e+07,0.814548,1351.084634,1270.915475,0.000121,9.410225
3,1.445555e+10,86446093.5,2.932962e+18,-3.285042e+07,0.827747,1056.612211,1509.088403,0.000123,7.370075
4,1.839991e+10,254145354.5,2.460722e+18,-2.995867e+07,0.773066,595.005095,2199.838882,0.000107,4.134429
...,...,...,...,...,...,...,...,...,...
317,8.720863e+09,157410801.5,3.051055e+18,-3.763372e+07,0.753639,3144.379611,786.612662,0.000110,21.869717
318,1.882581e+10,109407390.5,3.279692e+18,-2.980832e+07,0.800996,568.213141,2077.092000,0.000107,3.960973
319,1.873385e+10,112199978.0,-2.256802e+18,-3.054695e+07,0.791133,597.366292,1660.331536,0.000125,4.151681
320,1.459246e+10,185159291.0,9.736272e+16,-3.275639e+07,0.781222,1047.454377,1514.665393,0.000098,7.306552


In [4]:
infos = load_infos()

In [5]:
infos

Unnamed: 0,image_idx,tissue,abnormality,severity,x_coord,y_coord,radius
0,mdb001,G,CIRC,B,535,425,197.0
1,mdb002,G,CIRC,B,522,280,69.0
2,mdb003,D,NORM,0,0,0,0.0
3,mdb004,D,NORM,0,0,0,0.0
4,mdb005,F,CIRC,B,477,133,30.0
...,...,...,...,...,...,...,...
325,mdb318,D,NORM,0,0,0,0.0
326,mdb319,D,NORM,0,0,0,0.0
327,mdb320,D,NORM,0,0,0,0.0
328,mdb321,D,NORM,0,0,0,0.0


In [6]:
infos = infos.drop_duplicates(subset=["image_idx"])
y = infos["abnormality"]
y = [1 if ab == "NORM" else 0 for ab in y]

# Pre process

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model

## Optuna optimization

In [8]:
best_params = get_best_hyperparameters_optuna(X_train, y_train, n_trials=500)

[I 2025-02-25 15:29:25,385] A new study created in memory with name: no-name-dee00e6f-c898-4d5c-a169-017ce0d49920
[I 2025-02-25 15:29:25,484] Trial 0 finished with value: 0.5735294117647058 and parameters: {'n_estimators': 90, 'max_depth': 19, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.5735294117647058.
[I 2025-02-25 15:29:25,528] Trial 1 finished with value: 0.6176470588235294 and parameters: {'n_estimators': 50, 'max_depth': 2, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 1 with value: 0.6176470588235294.
[I 2025-02-25 15:29:25,711] Trial 2 finished with value: 0.5882352941176471 and parameters: {'n_estimators': 160, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 1 with value: 0.6176470588235294.
[I 2025-02-25 15:29:25,874] Trial 3 finished with value: 0.5882352941176471 and parameters: {'n_estimators': 150, 'max_depth': 19, 'min_sam


Meilleurs hyperparamètres trouvés : {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'}
Meilleure accuracy (LOO) : 0.6471


## Create Model

In [9]:
model = RandomForestClassifier(**best_params, random_state=42)

model.fit(X_train, y_train)

# Results

In [10]:
y_pred_train = model.predict(X_train)

metrics_train = compute_classification_metrics(y_train, y_pred_train)

Accuracy: 0.782
Sensitivity (Recall): 1.000
Specificity: 0.380
PPV (Precision): 0.749
NPV: 1.000
F1-score: 0.856
AUC: 0.690
Balanced Accuracy: 0.690


In [11]:
y_pred = model.predict(X_test)

metrics_test = compute_classification_metrics(y_test, y_pred)

Accuracy: 0.608
Sensitivity (Recall): 0.937
Specificity: 0.000
PPV (Precision): 0.634
NPV: 0.000
F1-score: 0.756
AUC: 0.468
Balanced Accuracy: 0.468
