In [None]:
 import keras
import pandas as pd
from google.colab import files
import os
import matplotlib.pyplot as plt
from PIL import Image
import zipfile
from keras.preprocessing.image import ImageDataGenerator
import tensorflow
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from skimage.transform import resize
import random
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16, preprocess_input, GlobalAveragePooling2D
from keras.layers import Dense, Dropout, Flatten
from keras.models import Sequential
from keras.applications import ResNet50
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

ImportError: cannot import name 'GlobalAveragePooling2D' from 'keras.applications.vgg16' (/usr/local/lib/python3.10/dist-packages/keras/applications/vgg16/__init__.py)

In [None]:
# Téléchargement et décompression du dataset
zip_file_path = '/content/drive/MyDrive/Colab Notebooks/malaria_hematie_dataset.zip'
extract_folder = '/content/drive/My Drive/Colab Notebooks/malaria_hematie_dataset/'
os.makedirs(extract_folder, exist_ok=True)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)
print("Décompression terminée.")

Décompression terminée.


In [None]:
parasitized_path = os.path.join(extract_folder, 'malaria_hematie_dataset/parasitized')
uninfected_path = os.path.join(extract_folder, 'malaria_hematie_dataset/uninfected')

In [None]:
image_size = (128, 128)
def load_image(image_path, size):
    with Image.open(image_path) as img:
        img = img.resize(size)
        return np.array(img)

In [None]:
# Charger les images et leurs labels
images = []
labels = []

# Charger les images parasitées
for filename in os.listdir(parasitized_path):
    if filename.endswith('.png') or filename.endswith('.jpg'):
        image_path = os.path.join(parasitized_path, filename)
        images.append(load_image(image_path, image_size))
        labels.append(1)  # Label 1 pour les hématies parasitées

# Charger les images non parasitées
for filename in os.listdir(uninfected_path):
    if filename.endswith('.png') or filename.endswith('.jpg'):
        image_path = os.path.join(uninfected_path, filename)
        images.append(load_image(image_path, image_size))
        labels.append(0)  # Label 0 pour les hématies non parasitées

In [None]:
images = np.array(images)
labels = np.array(labels)

In [None]:
print(f'Images shape: {images.shape}')
print(f'Labels shape: {labels.shape}')

Images shape: (27558, 128, 128, 3)
Labels shape: (27558,)


In [None]:
# Utilisation de 25% du dataset pour une meilleure visualisation de mon code
random.seed(42)
indices = list(range(len(images)))
random.shuffle(indices)
subset_size = int(0.25 * len(images))
subset_indices = indices[:subset_size]
images = images[subset_indices]
labels = labels[subset_indices]

print(f'My_new images shape: {images.shape}')
print(f'My_new labels shape: {labels.shape}')

Subset images shape: (6889, 128, 128, 3)
Subset labels shape: (6889,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)

In [None]:
# Normaliser les images
X_train = X_train / 255
X_test = X_test / 255

In [None]:
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
data_augmentation_generator = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.3,
    shear_range=0.2,
    validation_split=0.15,
)
train_generator = data_augmentation_generator.flow(X_train, y_train, batch_size=64, subset="training")
val_generator = data_augmentation_generator.flow(X_train, y_train, batch_size=64, subset="validation")

In [None]:
# Définir les callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

learning_rate_decay = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=0.0001
)

In [None]:
# Modèle from scratch avec Sequential
model_1 = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.25),

    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.25),

    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.25),

    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
# Fine-tuning du VGG16 pré-entraîné sur ImageNet
encodeur_for_VGG16 = VGG16(weights="imagenet", include_top=False, input_shape=(image_size[0], image_size[0], 3))
encodeur_for_VGG16.trainable = False

In [None]:
model_2 = Sequential([
    encodeur_for_VGG16,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [None]:
# Fine-tuning du ResNet50 pré-entraîné sur ImageNet
encodeur_for_ResNet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))
encodeur_for_ResNet50.trainable = False

In [None]:
model_3 = Sequential([
    encodeur_for_ResNet50,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [None]:
# Les compiles
model_1.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model_1.summary()

model_2.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model_2.summary()

model_3.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model_3.summary()

In [None]:
# Les fits
history_1 = model_1.fit(x=X_train, y=y_train,\
          epochs=3, batch_size=64, validation_split=0.15,\
          callbacks=[learning_rate_decay, early_stopping])

history_2 = model_2.fit(x=X_train, y=y_train,\
          epochs=3, batch_size=64, validation_split=0.15,\
          callbacks=[learning_rate_decay, early_stopping])

history_3 = model_3.fit(x=X_train, y=y_train,\
          epochs=3, batch_size=64, validation_split=0.15,\
          callbacks=[learning_rate_decay, early_stopping])

In [None]:
# Les saves
model_1.save_weights('resnet50_weights_1.h5')

model_2.save_weights('resnet50_weights_2.h5')

model_3.save_weights('resnet50_weights_3.h5')


In [None]:
# Les matrices de confusions
# Faire des prédictions sur le jeu de test
y_prod_1 = model_1.predict(X_test)
y_pred_1 = (y_prod_1 > 0.5).astype("int64")
cm1 = confusion_matrix(y_test, y_pred_1)

y_prod_2 = model_2.predict(X_test)
y_pred_2 = (y_prod_2 > 0.5).astype("int64")
cm2 = confusion_matrix(y_test, y_pred_2)

y_prod_3 = model_3.predict(X_test)
y_pred_3 = (y_prod_3 > 0.5).astype("int64")
cm3 = confusion_matrix(y_test, y_pred_3)

In [None]:
# Calcul des métriques
accuracy = accuracy_score(y_test, y_pred_1)

precision = precision_score(y_test, y_pred_1)

recall = recall_score(y_test, y_pred_1)

f1 = f1_score(y_test, y_pred_1)

tn, fp, fn, tp = cm.ravel()
sensibility = recall
specificity = tn / (tn + fp)

# ROC et AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_1)
roc_auc_1 = auc(fpr, tpr)

In [None]:
accuracy = accuracy_score(y_test, y_pred_2)

precision = precision_score(y_test, y_pred_2)

recall = recall_score(y_test, y_pred_2)

f1 = f1_score(y_test, y_pred_2)

tn, fp, fn, tp = cm.ravel()
sensibility = recall
specificity = tn / (tn + fp)

# ROC et AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_2)
roc_auc = auc(fpr, tpr)

In [None]:
accuracy = accuracy_score(y_test, y_pred_3)

precision = precision_score(y_test, y_pred_3)

recall = recall_score(y_test, y_pred_3)

f1 = f1_score(y_test, y_pred_3)

tn, fp, fn, tp = cm.ravel()
sensibility = recall
specificity = tn / (tn + fp)

# ROC et AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_3)
roc_auc = auc(fpr, tpr)