In [55]:
# Monter Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Chemins des dossiers dans Google Drive
covid_path = '/content/drive/My Drive/COVID-19_Radiography_Dataset/COVID/images'
normal_path = '/content/drive/My Drive/COVID-19_Radiography_Dataset/Normal/images'


# Vérifier que les chemins existent
import os

paths = [covid_path, normal_path]

for path in paths:
    if os.path.exists(path):
        print(f"Path exists: {path}")
    else:
        print(f"Path does not exist: {path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Path exists: /content/drive/My Drive/COVID-19_Radiography_Dataset/COVID/images
Path exists: /content/drive/My Drive/COVID-19_Radiography_Dataset/Normal/images


In [56]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import matplotlib.pyplot as plt
import joblib
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.pipeline import Pipeline

In [57]:
# Paramètres
IMG_HEIGHT = 28
IMG_WIDTH = 28
NUM_CLASSES = 2

In [58]:
# Définir les répertoires d'images
image_dirs = [
    r'/content/drive/My Drive/COVID-19_Radiography_Dataset/COVID/images',
    r'/content/drive/My Drive/COVID-19_Radiography_Dataset/Normal/images',]

labels = ['COVID', 'Normal']

In [60]:
# Fonction pour charger les images
def load_images(image_dir, label, limit=None):
    image_data = []
    label_data = []
    files = os.listdir(image_dir)
    if limit:
        files = files[:limit]
    for file_name in files:
        if file_name.endswith('.png'):
            img_path = os.path.join(image_dir, file_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH))
                img = img / 255.0
                image_data.append(img)
                label_data.append(label)
            except Exception as e:
                print(f"Erreur de chargement de l'image {file_name} : {e}")
    return np.array(image_data), np.array(label_data)

In [61]:
# Fonction pour charger les données de plusieurs répertoires
def load_data(image_dirs, labels, limit=None):
    all_images = []
    all_labels = []
    for i, image_dir in enumerate(image_dirs):
        images, label_data = load_images(image_dir, labels[i], limit)
        all_images.append(images)
        all_labels.append(label_data)
    return np.concatenate(all_images), np.concatenate(all_labels)

In [62]:
# Charger et prétraiter les données
images, labels = load_data(image_dirs, labels, limit=2500)

In [63]:
# Aplatir les images
X = images.reshape(images.shape[0], -1)
y = labels

In [64]:
# Encoder les étiquettes
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [65]:
# Diviser les données
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
# Augmentation des données avec ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [67]:
# Ajuster et transformer les données d'entraînement
datagen.fit(X_train.reshape(-1, IMG_HEIGHT, IMG_WIDTH, 3))

In [68]:
# Convertir les données augmentées en format adéquat pour sklearn
train_generator = datagen.flow(X_train.reshape(-1, IMG_HEIGHT, IMG_WIDTH, 3), y_train, batch_size=32)

In [17]:
# Mettre à l'échelle les caractéristiques et appliquer PCA dans un pipeline
from sklearn.pipeline import Pipeline

In [69]:
# Mettre à l'échelle les caractéristiques et appliquer PCA dans un pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=50, random_state=42)),
    ('rf', RandomForestClassifier(random_state=42, class_weight='balanced'))
])

In [70]:
# Définir une grille de recherche réduite pour GridSearchCV
param_grid = {
    'rf__n_estimators': [100, 200],
    'rf__max_depth': [10, 20],
    'rf__min_samples_split': [2, 5],
    'rf__min_samples_leaf': [1, 2],
    'rf__bootstrap': [True, False]
}

In [71]:
# Configurer GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid,
                           cv=3, n_jobs=-1, verbose=2, scoring='accuracy')

In [72]:
# Entraîner le modèle avec Grid Search et les données augmentées
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


  pid = os.fork()


In [73]:
# Meilleurs paramètres
print(f"Meilleurs paramètres : {grid_search.best_params_}")

Meilleurs paramètres : {'rf__bootstrap': False, 'rf__max_depth': 20, 'rf__min_samples_leaf': 2, 'rf__min_samples_split': 2, 'rf__n_estimators': 200}


In [74]:
# Meilleur modèle
best_pipeline = grid_search.best_estimator_

In [75]:
# Enregistrer le pipeline entier
joblib.dump(best_pipeline, 'best_pipeline.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']

In [76]:
# Évaluer le meilleur modèle Random Forest
X_test_scaled = best_pipeline.named_steps['scaler'].transform(X_test)
X_test_pca = best_pipeline.named_steps['pca'].transform(X_test_scaled)
y_pred = best_pipeline.named_steps['rf'].predict(X_test_pca)
y_pred = best_pipeline.named_steps['rf'].predict(X_test_pca)


In [80]:
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)


In [81]:
print(f'Accuracy : {accuracy}')
print(f'Rapport de classification :\n{class_report}')

Accuracy : 0.865
Rapport de classification :
              precision    recall  f1-score   support

       COVID       0.84      0.90      0.87       500
      Normal       0.90      0.83      0.86       500

    accuracy                           0.86      1000
   macro avg       0.87      0.86      0.86      1000
weighted avg       0.87      0.86      0.86      1000

