In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import Sequence
from sklearn.model_selection import KFold
from keras.applications.vgg19 import VGG19
from keras.layers import Flatten, Dense
from keras.models import Model
from keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import pandas as pd
from tensorflow.keras.models import load_model

class CustomDataGenerator(Sequence):
    """Custom data generator for image and age data."""

    def __init__(self, image_paths, ages, batch_size, image_size, augment):
        self.image_paths = image_paths
        self.ages = ages
        self.batch_size = batch_size
        self.image_size = image_size
        self.augment = augment
        if self.augment:
            self.datagen = ImageDataGenerator(
                rotation_range=30,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                vertical_flip=True,
                fill_mode='nearest'
            )

    def __len__(self):
        """Get the number of batches per epoch."""
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        """Get a batch of images and ages."""
        batch_image_paths = self.image_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_images = []
        batch_ages = []

        for i, image_path in enumerate(batch_image_paths):
            image, age = self.load_and_preprocess_image(image_path)
            augmented_image = self.apply_image_augmentation(image)
            batch_images.append(augmented_image)
            batch_ages.append(1 if age > 18 else 0)

        batch_images = np.array(batch_images)
        batch_ages = np.array(batch_ages, dtype=np.float32)

        return batch_images, batch_ages

    def load_and_preprocess_image(self, image_path):
        """Load and preprocess an image from the given path."""
        npzfile = np.load(image_path)
        image = npzfile['image']
        Age = np.load(image_path)['Age']

        image_resized = cv2.resize(image, self.image_size)
        image_norm = (image_resized.astype('float32') - np.min(image_resized)) / (
                    np.max(image_resized) - np.min(image_resized))

        if len(image.shape) == 2:
            image_tri = np.repeat(image_norm[:, :, np.newaxis], 3, axis=2)
        else:
            image_tri = image_norm

        return image_tri, Age

    def apply_image_augmentation(self, image):
        """Apply image augmentation on the given image."""
        if self.augment:
            augmented_image = self.datagen.random_transform(image)
        else:
            augmented_image = image
        return augmented_image


if __name__ == "__main__":

    train_path = r'E:/Papier/AIAGE/Base/reseau de neurones/save/train'
    test_path = r'E:/Papier/AIAGE/Base/reseau de neurones/save/test'
    batch_size = 32
    image_size = (224, 224)
    num_epochs = 10 #change to 10
    k = 5 #change to 5
    seed = 7
    np.random.seed(seed)

    # Load image paths and ages
    train_image_paths = [os.path.join(root, f) for root, dirs, files in os.walk(train_path) for f in files]
    train_ages = [np.load(image_path)['Age'] for image_path in train_image_paths]
    train_ages_binary = [1 if age > 18 else 0 for age in train_ages]
    
    test_image_paths = [os.path.join(root, f) for root, dirs, files in os.walk(test_path) for f in files]
    test_ages = [np.load(image_path)['Age'] for image_path in test_image_paths]
    test_ages_binary = [1 if age > 18 else 0 for age in test_ages]

    kfold = KFold(n_splits=k, shuffle=True, random_state=seed)

    # Initialize dataframes to store performance metrics for each fold
    fold_metrics = pd.DataFrame(columns=['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc_score'])

    for i, (train_index, test_index) in enumerate(kfold.split(train_image_paths, train_ages_binary)):
        print("======================================")
        print("Iteration = ", i+1)

        # Split the data into train and validation sets
        X_train, X_val = np.array(train_image_paths)[train_index], np.array(train_image_paths)[test_index]
        y_train, y_val = np.array(train_ages_binary)[train_index], np.array(train_ages_binary)[test_index]

        # Create data generators
        train_data_generator = CustomDataGenerator(X_train, y_train, batch_size, image_size, augment=True)
        val_data_generator = CustomDataGenerator(X_val, y_val, batch_size, image_size, augment=False)
        test_data_generator = CustomDataGenerator(test_image_paths, test_ages_binary, batch_size, image_size, augment=False)

        # Define model architecture
        pretrained_model = VGG19(weights='//chu-lyon.fr/bureautique/TOUS_COMMUNS/BIOSTAT_COMMUN/etude60/6458 stage Perla El Khoueiry/analyse/AI_LA/Perla/reseaux-Age_binaire/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                          include_top=False, input_shape=(224, 224, 3))
        for layer in pretrained_model.layers:
            layer.trainable = False

        x = pretrained_model.output
        x = Flatten()(x)
        x = Dense(512, activation='relu')(x)
#         pretrained_model = load_model('//chu-lyon.fr/bureautique/TOUS_COMMUNS/BIOSTAT_COMMUN/etude60/6458 stage Perla El Khoueiry/analyse/AI_LA/Perla/model_sauvegarder/VGG19_continue.h5')
#         for layer in pretrained_model.layers:
#             layer.trainable = False
#     Get the output of the previous layer
#         previous_output = pretrained_model.layers[-2].output

   # Add a new dense layer with the desired number of units or classes 
        new_output = Dense(1, activation='sigmoid')(x)

        model = Model(inputs=pretrained_model.input, outputs=new_output)

        # Compile model
        model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

        model.summary()

        # Train the model
        history = model.fit(train_data_generator, epochs=num_epochs, validation_data=val_data_generator)
        scores = model.evaluate(val_data_generator)

        # Evaluate the model's performance
        y_pred = model.predict(test_data_generator)
        y_pred_binary = np.round(y_pred).astype(np.int32)
        
        accuracy = accuracy_score(test_ages_binary, y_pred_binary)
        precision = precision_score(test_ages_binary, y_pred_binary)
        recall = recall_score(test_ages_binary, y_pred_binary)
        f1 = f1_score(test_ages_binary, y_pred_binary)
        roc_auc = roc_auc_score(test_ages_binary, y_pred)
        cm = confusion_matrix(test_ages_binary, y_pred_binary)
        
        # Append metrics to fold_metrics dataframe
        fold_metrics.loc[i] = [accuracy, precision, recall, f1, roc_auc]
        
        # Print the confusion matrix
        print("Confusion matrix:")
        print(cm)

        # Print performance metrics
        print("Accuracy: %.2f" % (accuracy))
        print("Precision: %.2f" % (precision))
        print("Recall: %.2f" % (recall))
        print("F1-score: %.2f" % (f1))
        print("ROC AUC score: %.2f" % (roc_auc))

    # Calculate the mean and standard deviation of the performance metrics
    mean_metrics = fold_metrics.mean()
    std_metrics = fold_metrics.std()

    # Display mean and standard deviation of the performance metrics
    print("======================================")
    print("Mean metrics:")
    print(mean_metrics)
    print("======================================")
    print("Standard deviation of metrics:")
    print(std_metrics)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
# Evaluate the model's performance on test data
test_data_generator = CustomDataGenerator(test_image_paths, test_ages_binary, batch_size, image_size, augment=False)
y_pred = model.predict(test_data_generator)
y_pred_binary = np.round(y_pred).astype(np.int32)

# Print the performance metrics
accuracy = accuracy_score(test_ages_binary, y_pred_binary)
precision = precision_score(test_ages_binary, y_pred_binary)
recall = recall_score(test_ages_binary, y_pred_binary)
f1 = f1_score(test_ages_binary, y_pred_binary)
roc_auc = roc_auc_score(test_ages_binary, y_pred)

print("Test data performance:")
print("Accuracy: %.2f" % (accuracy))
print("Precision: %.2f" % (precision))
print("Recall: %.2f" % (recall))
print("F1-score: %.2f" % (f1))
print("ROC AUC score: %.2f" % (roc_auc))

# Generate a confusion matrix for the test data
cm = confusion_matrix(test_ages_binary, y_pred_binary)
sns.set(font_scale=1.4)
sns.heatmap(cm, annot=True, annot_kws={"size": 16}, cmap='Blues', fmt='g')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Plotting training and validation loss
plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'], label='Training Loss') 
plt.plot(history.history['val_loss'], label='Validation Loss') 
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Plotting training and validation accuracy
plt.figure(figsize=(8, 6)) 
plt.plot(history.history['accuracy'], label='Training Accuracy') 
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Plotting ROC curve
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(test_ages_binary, y_pred)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label='ROC curve (AUC = %0.2f)' % roc_auc) 
plt.plot([0, 1], [0, 1], 'k--') 
plt.xlim([0.0, 1.0]) 
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate') 
plt.title('Receiver Operating Characteristic (ROC) Curve') 
plt.legend(loc="lower right")
plt.show()

In [None]:
# Trouvez l'indice du point qui correspond à la sensibilité par rapport à la spécificité
index = np.argmax(tpr - fpr)
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(test_ages_binary, y_pred)
roc_auc = auc(fpr, tpr)

# Obtenez les coordonnées (FPR, TPR) du point de sensibilité par rapport à la spécificité
sensitivity = tpr[index]
specificity = 1 - fpr[index]

# Tracez la courbe ROC
plt.plot(fpr, tpr, label='ROC curve (AUC = %0.2f)' % roc_auc)

# Tracez le point de sensibilité par rapport à la spécificité
plt.plot(fpr[index], tpr[index], 'ro', label='Sensitivity = %0.2f, Specificity = %0.2f' % (sensitivity, specificity))

# Tracez la ligne diagonale pour référence
plt.plot([0, 1], [0, 1], 'k--')

# Ajoutez des légendes et des titres
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend()

# Affichez le graphique
plt.show()