In [1]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
# from ROI_extraction import preprocess_image
import cv2
import os
# Set the path to dataset
dataset_path = '../images/3regimes/'

ids = []
labels = {}
classes = {'excess':1,'normal':0,'insufficient':2}
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path) :
            if filename.endswith((".jpg", ".jpeg", ".png")):
                img_path = os.path.join(class_path, filename) 
                ids.append(img_path)
                labels[img_path]=classes[class_name]

# Set the input image dimensions
img_width, img_height = 100, 100
n_channels = 3

params = {'dim': (img_height,img_width),
          'batch_size': 64,
          'n_classes': 3,
          'n_channels': n_channels,
          'shuffle': False}

# Set the number of classes
num_classes = 3

def correct_gamma(image):
    # Convert image to float and normalize to range 0-1
    image_normalized = image.astype(float) / 255.0

    # Calculate mean R intensity
    meanRimg = np.mean(image_normalized[:, :, 2])  # Image is in BGR format
    
    # Calculate G value
    G = 0.74 * np.exp(-3.97 * meanRimg)
    
    # Apply transformation
    transformed_image = np.power(image_normalized, 1 / G)
    img_float32 = np.float32(transformed_image)
    return img_float32

def extract_ROI(original_image):
    # Convert to grayscale
    gray_image = cv2.cvtColor((original_image*255).astype(np.uint8), cv2.COLOR_BGR2GRAY)
    
    # # Apply histogram normalization
    # normalized_image = cv2.equalizeHist(gray_image)
    
    # Apply median filtering
    filtered_image = cv2.medianBlur(gray_image, 5)
    
    # Apply Otsu's thresholding
    _, thresholded_image = cv2.threshold(filtered_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Apply morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    closed_image = cv2.morphologyEx(thresholded_image, cv2.MORPH_CLOSE, kernel)
    opened_image = cv2.morphologyEx(closed_image, cv2.MORPH_OPEN, kernel)

    # Find contours in the processed image
    contours, _ = cv2.findContours(opened_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Find the contour with the largest area
    contour = max(contours, key=cv2.contourArea)
    
    # Calculate the moments of the contour
    M = cv2.moments(contour)
    
    # Calculate the center of the contour
    center_x = int(M["m10"] / M["m00"])
    center_y = int(M["m01"] / M["m00"])
    
    # Calculate the coordinates of the square ROI
    roi_size = img_height
    roi_x = center_x - roi_size // 2
    roi_y = center_y - roi_size // 2
    
    return {'contours':contours,'roi_x':roi_x,'roi_y':roi_y,'roi_size':roi_size}

def data_generation(list_IDs_temp):
    y = np.empty((len(list_IDs_temp)), dtype=int)
    X = []
    for i, ID in enumerate(list_IDs_temp):
        image = cv2.imread(ID)
        img_gamma_correct = correct_gamma(image)
        ROI = extract_ROI(img_gamma_correct)
        ROI = image[ROI['roi_y']:ROI['roi_y']+ROI['roi_size'], ROI['roi_x']:ROI['roi_x']+ROI['roi_size']]
        img = cv2.cvtColor(ROI, cv2.COLOR_BGR2HSV)
        H_val,S_val,V_val = img[:,:,0],img[:,:,1],img[:,:,2]
        # Extract the mean values of hue, saturation, and value
        H_mean = np.mean(H_val)
        S_mean = np.mean(S_val)
        V_mean = np.mean(V_val)
        # Extract the standard deviation of hue, saturation, and value
        H_std = np.std(H_val)
        S_std = np.std(S_val)
        V_std = np.std(V_val)
        # Extract the contrast values of hue, saturation, and value
        # H_max,S_max,V_max = np.max(H_val),np.max(S_val),np.max(V_val)
        # H_min,S_min,V_min = np.min(H_val),np.min(S_val),np.min(V_val)
        # H_contrast = (H_max - H_min) / (H_max + H_min)
        # S_contrast = (S_max - S_min) / (S_max + S_min)
        # V_contrast = (V_max - V_min) / (V_max + V_min)
        X.append([H_mean,H_std,S_mean,S_std,V_mean,V_std])
        y[i] = labels[ID]
    return pd.DataFrame(X,columns=['H_mean','H_std','S_mean','S_std','V_mean','V_std']),keras.utils.to_categorical(y, num_classes=params['n_classes']),y


In [2]:
X,y_categorical,y = data_generation(ids)

In [3]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
# Nombre de plis pour la validation croisée k-fold
k = 5

# Créer une instance de StratifiedKFold avec k plis
skf = StratifiedKFold(n_splits=k)


In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
results = {i:{} for i in range(k)}
from sklearn.tree import export_text

# Initialize a list to store the accuracy scores
accuracy_scores = []

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

    # Diviser les données d'entraînement et de validation pour ce pli
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]

    y_train, y_val = y[train_index], y[test_index]

    # scaler = StandardScaler()
    # X_train = scaler.fit_transform(X_train)
    # X_val = scaler.fit_transform(X_val)

    # Création de l'arbre de décision
    decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)
    
    # Entraînement de l'arbre de décision
    decision_tree.fit(X_train, y_train)

    # Prédiction sur les données de test
    y_val_pred = decision_tree.predict(X_val)

    # Calculate the accuracy score
    accuracy = metrics.accuracy_score(y_val, y_val_pred)

    print(f'Fold {i}: {accuracy}')
    # Append the accuracy score to the list
    accuracy_scores.append(accuracy)

    # Affichage des poids des caractéristiques
    for i, poids in enumerate(decision_tree.feature_importances_):
        print(f"Poids caractéristique {i+1}: {poids}")
    r = export_text(decision_tree, feature_names=X_train.columns.values.tolist())
    print(r)
# Compute the average accuracy across all folds
average_accuracy = sum(accuracy_scores) / k

# Print the average accuracy
print("Average Accuracy:", average_accuracy)

Fold 0:
  Train: index=[ 334  335  336 ... 5007 5008 5009]
  Test:  index=[   0    1    2 ... 3671 3672 3673]
Fold 0: 0.8133732534930139
Poids caractéristique 1: 0.8759076624947696
Poids caractéristique 2: 0.0
Poids caractéristique 3: 0.0026273905015996523
Poids caractéristique 4: 0.0
Poids caractéristique 5: 0.12146494700363059
Poids caractéristique 6: 0.0
|--- H_mean <= 8.80
|   |--- V_mean <= 177.96
|   |   |--- class: 2
|   |--- V_mean >  177.96
|   |   |--- class: 1
|--- H_mean >  8.80
|   |--- S_mean <= 215.93
|   |   |--- class: 0
|   |--- S_mean >  215.93
|   |   |--- class: 1

Fold 1:
  Train: index=[   0    1    2 ... 5007 5008 5009]
  Test:  index=[ 334  335  336 ... 4005 4006 4007]
Fold 1: 0.7524950099800399
Poids caractéristique 1: 0.8585769180228076
Poids caractéristique 2: 0.0
Poids caractéristique 3: 0.002575404846762941
Poids caractéristique 4: 0.0
Poids caractéristique 5: 0.13884767713042942
Poids caractéristique 6: 0.0
|--- H_mean <= 8.80
|   |--- V_mean <= 173.14
| 

In [None]:
X_train.columns.values.tolist()

['H_mean', 'H_std', 'S_mean', 'S_std', 'V_mean', 'V_std']

|--- H_mean <= 8.80
|   |--- H_mean <= 6.99
|   |   |--- class: 2
|   |--- H_mean >  6.99
|   |   |--- class: 1
|--- H_mean >  8.80
|   |--- S_mean <= 215.93
|   |   |--- class: 0
|   |--- S_mean >  215.93
|   |   |--- class: 1



In [None]:
X[y==0].mean()

H_mean     21.952644
H_std       3.349832
S_mean    182.340762
S_std       8.038009
V_mean    253.856298
V_std       1.065612
dtype: float64

In [None]:
X[y==1].mean()

H_mean      7.376497
H_std       0.523833
S_mean    218.690415
S_std       3.015926
V_mean    171.870881
V_std      23.962581
dtype: float64

In [None]:
X[y==2].mean()

H_mean      6.977261
H_std       0.519825
S_mean    216.664462
S_std       3.428547
V_mean    133.904695
V_std      19.126893
dtype: float64

In [None]:
# Création de l'arbre de décision
decision_tree = DecisionTreeClassifier(max_depth=3)

# Entraînement de l'arbre de décision
decision_tree.fit(X, y)

# Prédiction sur les données de test
y_val_pred = decision_tree.predict(X)

# Calculate the accuracy score
accuracy = metrics.accuracy_score(y, y_val_pred)
print(accuracy)

0.7980039920159681


In [None]:
r = export_text(decision_tree, feature_names=X_train.columns.values.tolist())
print(r)

|--- H_mean <= 8.80
|   |--- H_mean <= 7.16
|   |   |--- H_std <= 0.46
|   |   |   |--- class: 1
|   |   |--- H_std >  0.46
|   |   |   |--- class: 2
|   |--- H_mean >  7.16
|   |   |--- V_mean <= 194.49
|   |   |   |--- class: 1
|   |   |--- V_mean >  194.49
|   |   |   |--- class: 1
|--- H_mean >  8.80
|   |--- S_mean <= 215.93
|   |   |--- class: 0
|   |--- S_mean >  215.93
|   |   |--- class: 1

