In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix, classification_report
from tensorflow.keras.callbacks import EarlyStopping
print("imported")

imported


In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [6]:
IMG_HEIGHT, IMG_WIDTH = 128, 128
BATCH_SIZE = 32
EPOCHS = 50
FOLDS = 5
DATA_DIR = '../../../chest_xray/train'

print("hypermarameters loaded...")

hypermarameters loaded...


In [8]:
def load_data(data_dir):
    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    
    train_data = datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training'
    )
    
    val_data = datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation'
    )
    
    return train_data, val_data

train_data, val_data = load_data(DATA_DIR)

Found 4173 images belonging to 2 classes.
Found 1043 images belonging to 2 classes.


In [10]:
# Étape 2: Création du modèle
def create_model(num_conv_layers):
    model = models.Sequential()

    print("Création de la couche d'entrée...")
    model.add(layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)))  # Modifié ici

    print(f"Ajout de {num_conv_layers} couches de convolution...")
    for i in range(num_conv_layers):
        print(f"Ajout de la couche de convolution {i+1}...")
        model.add(layers.Conv2D(32, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))

    print("Ajout de la couche Flatten...")
    model.add(layers.Flatten())

    print("Ajout de la couche Dense avec 128 unités...")
    model.add(layers.Dense(128, activation='relu'))

    print("Ajout de la couche de sortie avec activation sigmoid...")
    model.add(layers.Dense(1, activation='sigmoid'))

    print("Compilation du modèle...")
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    print("Modèle créé avec succès.")
    return model

In [10]:
# Étape 3: Validation croisée K-Fold
def k_fold_cross_validation(num_conv_layers, train_data, val_data):
    kf = KFold(n_splits=FOLDS, shuffle=True)
    accuracy_list = []
    auc_list = []
    all_histories = []  # Pour stocker les historiques d'entraînement

    print(f"Début de la validation croisée K-Fold avec {FOLDS} plis...")

    # Boucle à travers les différentes plis
    for fold, (train_index, val_index) in enumerate(kf.split(train_data)):
        print(f"Plis {fold+1}/{FOLDS}")

        # Créer un modèle
        print("Création du modèle...")
        model = create_model(num_conv_layers)

        # Entraînement avec EarlyStopping pour éviter le sur-apprentissage
        early_stopping = EarlyStopping(monitor='val_loss', patience=5)
        print("Début de l'entraînement...")
        history = model.fit(train_data, validation_data=val_data, epochs=EPOCHS, callbacks=[early_stopping], verbose=0)

        # Stocker l'historique
        all_histories.append(history.history)

        # Évaluation
        print("Évaluation du modèle...")
        val_loss, val_accuracy = model.evaluate(val_data, verbose=0)
        accuracy_list.append(val_accuracy)

        # Calcul de la courbe ROC
        y_true = val_data.classes
        y_scores = model.predict(val_data).ravel()
        fpr, tpr, _ = roc_curve(y_true, y_scores)
        roc_auc = auc(fpr, tpr)
        auc_list.append(roc_auc)

        print(f"Plis {fold+1} terminé. Accuracy: {val_accuracy}, AUC: {roc_auc}\n")

    print("Validation croisée terminée.")
    mean_accuracy = np.mean(accuracy_list)
    mean_auc = np.mean(auc_list)
    print(f"Accuracy moyenne: {mean_accuracy}")
    print(f"AUC moyenne: {mean_auc}")

    return mean_accuracy, mean_auc, all_histories

In [None]:
# Étape 4: Test avec différents nombres de couches de convolution
conv_layers_list = [2, 3, 4, 5]  # Tester avec 2 à 5 couches
results = {}

for num_layers in conv_layers_list:
    accuracy, roc_auc, histories = k_fold_cross_validation(num_layers)
    results[num_layers] = {'Accuracy': accuracy, 'AUC': roc_auc}
    print(f'Layers: {num_layers}, Accuracy: {accuracy:.4f}, AUC: {roc_auc:.4f}')

    # Visualisation des courbes d'entraînement
    for history in histories:
        plt.plot(history['accuracy'], label='Accuracy')
        plt.plot(history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'Model Accuracy for {num_layers} Convolutional Layers')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend(loc='upper left')
        plt.grid()
        plt.show()

        plt.plot(history['loss'], label='Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title(f'Model Loss for {num_layers} Convolutional Layers')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend(loc='upper left')
        plt.grid()
        plt.show()

model created


  self._warn_if_super_not_called()


In [None]:
results_df = pd.DataFrame(results).T
results_df.plot(kind='bar', figsize=(10, 6))
plt.title('Impact of Number of Convolutional Layers on Model Performance')
plt.xlabel('Number of Convolutional Layers')
plt.ylabel('Performance Metrics')
plt.xticks(rotation=0)
plt.legend(loc='upper left')
plt.grid(axis='y')
plt.show()

In [None]:
def plot_learning_curves(histories):
    plt.figure(figsize=(14, 5))

    # Précision
    plt.subplot(1, 2, 1)
    for history in histories:
        plt.plot(history['accuracy'], label='Train')
        plt.plot(history['val_accuracy'], label='Validation', linestyle='--')
    plt.title('Learning Curves - Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid()

    # Perte
    plt.subplot(1, 2, 2)
    for history in histories:
        plt.plot(history['loss'], label='Train')
        plt.plot(history['val_loss'], label='Validation', linestyle='--')
    plt.title('Learning Curves - Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()

    plt.show()

In [None]:
def plot_roc_curves(num_layers, histories):
    plt.figure(figsize=(8, 6))

    for history in histories:
        y_true = val_data.classes
        y_scores = model.predict(val_data).ravel()
        fpr, tpr, _ = roc_curve(y_true, y_scores)
        plt.plot(fpr, tpr, lw=1, label=f'ROC curve for {num_layers} layers (AUC = {auc(fpr, tpr):.2f})')

    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.title('Receiver Operating Characteristic')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.grid()
    plt.show()

In [None]:
for num_layers in conv_layers_list:
    plot_learning_curves(results[num_layers]['Histories'])
    plot_roc_curves(num_layers, results[num_layers]['Histories'])