In [1]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
# from ROI_extraction import preprocess_image
import cv2
import os
# Set the path to dataset
dataset_path = '../images/3regimes/'

ids = []
labels = {}
classes = {'excess':1,'normal':0,'insufficient':2}
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path) :
            if filename.endswith((".jpg", ".jpeg", ".png")):
                img_path = os.path.join(class_path, filename) 
                ids.append(img_path)
                labels[img_path]=classes[class_name]

# Set the input image dimensions
img_width, img_height = 800, 800
n_channels = 3

params = {'dim': (img_height,img_width),
          'batch_size': 64,
          'n_classes': 3,
          'n_channels': n_channels,
          'shuffle': False}

# Set the number of classes
num_classes = 3

def data_generation(list_IDs_temp):
    y = np.empty((len(list_IDs_temp)), dtype=int)
    X = []
    for i, ID in enumerate(list_IDs_temp):
        img= cv2.imread(ID)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        H_val,S_val,V_val = img[:,:,0],img[:,:,1],img[:,:,2]
        # Extract the mean values of hue, saturation, and value
        H_mean = np.mean(H_val)
        S_mean = np.mean(S_val)
        V_mean = np.mean(V_val)
        # Extract the standard deviation of hue, saturation, and value
        H_std = np.std(H_val)
        S_std = np.std(S_val)
        V_std = np.std(V_val)
        # Extract the contrast values of hue, saturation, and value
        # H_max,S_max,V_max = np.max(H_val),np.max(S_val),np.max(V_val)
        # H_min,S_min,V_min = np.min(H_val),np.min(S_val),np.min(V_val)
        # H_contrast = (H_max - H_min) / (H_max + H_min)
        # S_contrast = (S_max - S_min) / (S_max + S_min)
        # V_contrast = (V_max - V_min) / (V_max + V_min)
        X.append([H_mean,H_std,S_mean,S_std,V_mean,V_std])
        y[i] = labels[ID]
    return pd.DataFrame(X,columns=['H_mean','H_std','S_mean','S_std','V_mean','V_std']),keras.utils.to_categorical(y, num_classes=params['n_classes']),y

In [2]:
X,y_categorical,y = data_generation(ids)

In [11]:
X

Unnamed: 0,H_mean,H_std,S_mean,S_std,V_mean,V_std
0,14.758906,33.948031,148.830005,87.145388,45.801897,47.203750
1,14.556730,33.592003,148.381962,87.335365,45.624836,47.234723
2,14.837844,33.925269,148.214169,87.023253,45.590538,47.581999
3,14.748334,33.839303,148.248800,86.980995,45.550758,47.703246
4,14.679506,33.763385,148.464030,87.164434,45.587787,47.762653
...,...,...,...,...,...,...
5005,24.348134,45.770313,154.926159,77.465459,130.112372,96.289801
5006,24.162250,45.672516,155.431187,77.409717,130.604225,96.665978
5007,23.694936,45.112664,155.725892,77.489974,131.253547,96.947176
5008,23.444848,44.932243,155.743709,77.861103,131.076917,96.623173


In [3]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
# Nombre de plis pour la validation croisée k-fold
k = 5

# Créer une instance de StratifiedKFold avec k plis
skf = StratifiedKFold(n_splits=k)


In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
results = {i:{} for i in range(k)}
from sklearn.tree import export_text

# Initialize a list to store the accuracy scores
accuracy_scores = []

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

    # Diviser les données d'entraînement et de validation pour ce pli
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]

    y_train, y_val = y[train_index], y[test_index]

    # scaler = StandardScaler()
    # X_train = scaler.fit_transform(X_train)
    # X_val = scaler.fit_transform(X_val)

    # Création de l'arbre de décision
    decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)
    
    # Entraînement de l'arbre de décision
    decision_tree.fit(X_train, y_train)

    # Prédiction sur les données de test
    y_val_pred = decision_tree.predict(X_val)

    # Calculate the accuracy score
    accuracy = metrics.accuracy_score(y_val, y_val_pred)

    print(f'Fold {i}: {accuracy}')
    # Append the accuracy score to the list
    accuracy_scores.append(accuracy)

    # Affichage des poids des caractéristiques
    for i, poids in enumerate(decision_tree.feature_importances_):
        print(f"Poids caractéristique {i+1}: {poids}")
    r = export_text(decision_tree, feature_names=X_train.columns.values.tolist())
    print(r)
# Compute the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / k

# Print the average accuracy
print("Average Accuracy:", average_accuracy)

Fold 0:
  Train: index=[ 334  335  336 ... 5007 5008 5009]
  Test:  index=[   0    1    2 ... 3671 3672 3673]
Fold 0: 0.8992015968063872
Poids caractéristique 1: 0.0
Poids caractéristique 2: 0.7375235024622421
Poids caractéristique 3: 0.0
Poids caractéristique 4: 0.2624764975377579
Poids caractéristique 5: 0.0
Poids caractéristique 6: 0.0
|--- H_std <= 39.91
|   |--- S_std <= 84.54
|   |   |--- class: 2
|   |--- S_std >  84.54
|   |   |--- class: 1
|--- H_std >  39.91
|   |--- class: 0

Fold 1:
  Train: index=[   0    1    2 ... 5007 5008 5009]
  Test:  index=[ 334  335  336 ... 4005 4006 4007]
Fold 1: 0.8183632734530938
Poids caractéristique 1: 0.0
Poids caractéristique 2: 0.7137465763943994
Poids caractéristique 3: 0.0
Poids caractéristique 4: 0.2862534236056005
Poids caractéristique 5: 0.0
Poids caractéristique 6: 0.0
|--- H_std <= 39.85
|   |--- S_std <= 84.82
|   |   |--- class: 2
|   |--- S_std >  84.82
|   |   |--- class: 1
|--- H_std >  39.85
|   |--- class: 0

Fold 2:
  Train:

In [5]:
X_train.columns.values.tolist()

['H_mean', 'H_std', 'S_mean', 'S_std', 'V_mean', 'V_std']

In [6]:
X[y==0].mean()

H_mean     23.552977
H_std      45.330900
S_mean    155.847251
S_std      78.091102
V_mean    128.596100
V_std      94.845919
dtype: float64

In [7]:
X[y==1].mean()

H_mean     14.994256
H_std      34.438097
S_mean    144.734035
S_std      85.391781
V_mean     38.784786
V_std      39.090767
dtype: float64

In [8]:
X[y==2].mean()

H_mean     15.343963
H_std      34.983598
S_mean    142.652974
S_std      83.594886
V_mean     32.886668
V_std      30.133873
dtype: float64

In [9]:
# Création de l'arbre de décision
decision_tree = DecisionTreeClassifier(max_depth=3)

# Entraînement de l'arbre de décision
decision_tree.fit(X, y)

# Prédiction sur les données de test
y_val_pred = decision_tree.predict(X)

# Calculate the accuracy score
accuracy = metrics.accuracy_score(y, y_val_pred)
print(accuracy)

0.9003992015968064


In [10]:
r = export_text(decision_tree, feature_names=X_train.columns.values.tolist())
print(r)

|--- V_mean <= 82.15
|   |--- S_std <= 84.54
|   |   |--- H_mean <= 15.00
|   |   |   |--- class: 1
|   |   |--- H_mean >  15.00
|   |   |   |--- class: 2
|   |--- S_std >  84.54
|   |   |--- H_mean <= 15.25
|   |   |   |--- class: 1
|   |   |--- H_mean >  15.25
|   |   |   |--- class: 2
|--- V_mean >  82.15
|   |--- class: 0

