In [2]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import matplotlib.pyplot as plt
import joblib

In [4]:
#Paramètres
IMG_HEIGHT = 64
IMG_WIDTH = 64
NUM_CLASSES = 4  # Nombre de classes mis à jour

In [6]:

# Définir les répertoires d'images
image_dirs = [
    r'/Users/eloicadamjee/Downloads/COVID-19_Radiography_Dataset/COVID/images',
    r'/Users/eloicadamjee/Downloads/COVID-19_Radiography_Dataset/Normal/images',
    r'/Users/eloicadamjee/Downloads/COVID-19_Radiography_Dataset/Lung_Opacity/images',
    r'/Users/eloicadamjee/Downloads/COVID-19_Radiography_Dataset/Viral Pneumonia/images']

In [7]:
labels = ['COVID', 'Normal', 'Lung Opacity', 'Viral Pneumonia']

In [8]:
# Fonction pour charger les images
def load_images(image_dir, label, limit=None):
    image_data = []
    label_data = []
    files = os.listdir(image_dir)
    if limit:
        files = files[:limit]
    for file_name in files:
        if file_name.endswith('.png'):
            img_path = os.path.join(image_dir, file_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH))
                img = img / 255.0
                image_data.append(img)
                label_data.append(label)
            except Exception as e:
                print(f"Erreur de chargement de l'image {file_name} : {e}")
    return np.array(image_data), np.array(label_data)

In [9]:
# Fonction pour charger les données de plusieurs répertoires
def load_data(image_dirs, labels, limit=None):
    all_images = []
    all_labels = []
    for i, image_dir in enumerate(image_dirs):
        images, label_data = load_images(image_dir, labels[i], limit)
        all_images.append(images)
        all_labels.append(label_data)
    return np.concatenate(all_images), np.concatenate(all_labels)

In [10]:
# Charger et prétraiter les données
images, labels = load_data(image_dirs, labels, limit=25000)


In [11]:
# Aplatir les images
X = images.reshape(images.shape[0], -1)
y = labels

In [12]:
# Encoder les étiquettes
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [14]:
# Calculer les poids des classes
class_counts = np.bincount(y)
total_samples = len(y)
class_weights = {i: total_samples / (NUM_CLASSES * class_counts[i]) for i in range(NUM_CLASSES)}
train_weights = np.array([class_weights[i] for i in y])
print(f"Poids des classes calculés : {class_weights}")

Poids des classes calculés : {0: np.float64(1.4632881637168142), 1: np.float64(0.8801147704590818), 2: np.float64(0.5191571821036107), 3: np.float64(3.934014869888476)}


In [15]:
# Diviser les données
X_train, X_test, y_train, y_test, train_weights, test_weights = train_test_split(X, y, train_weights, test_size=0.2, random_state=42)

In [16]:
# Mettre à l'échelle les caractéristiques
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
# Appliquer PCA
pca = PCA(n_components=75, random_state=42)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [18]:
# Entraîner le modèle Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight=class_weights)
rf_model.fit(X_train_pca, y_train)

In [19]:
# Enregistrer les modèles et objets de prétraitement
joblib.dump(rf_model, 'rf_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(pca, 'pca.pkl')

['pca.pkl']

In [20]:
# Évaluer le modèle Random Forest
y_pred = rf_model.predict(X_test_pca)
y_pred_proba = rf_model.predict_proba(X_test_pca)

In [21]:
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
auc_roc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')


In [22]:
print(f'Accuracy : {accuracy}')
print(f'Rapport de classification :\n{class_report}')
print(f'AUC-ROC : {auc_roc}')

Accuracy : 0.7573824710607134
Rapport de classification :
                 precision    recall  f1-score   support

          COVID       0.90      0.37      0.53       701
   Lung Opacity       0.75      0.70      0.72      1185
         Normal       0.73      0.93      0.82      2085
Viral Pneumonia       0.95      0.66      0.77       262

       accuracy                           0.76      4233
      macro avg       0.83      0.67      0.71      4233
   weighted avg       0.78      0.76      0.74      4233

AUC-ROC : 0.9404318489053559


In [23]:
# Fonction pour prétraiter et prédire une nouvelle image
def predict_and_display_images(test_image_dir):
    rf_model = joblib.load('rf_model.pkl')
    scaler = joblib.load('scaler.pkl')
    label_encoder = joblib.load('label_encoder.pkl')
    pca = joblib.load('pca.pkl')

    for file_name in os.listdir(test_image_dir):
        if file_name.endswith('.png'):
            img_path = os.path.join(test_image_dir, file_name)
            img = cv2.imread(img_path)
            img_resized = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH))
            img_normalized = img_resized / 255.0
            img_flattened = img_normalized.reshape(1, -1)

            img_scaled = scaler.transform(img_flattened)
            img_pca = pca.transform(img_scaled)

            prediction_proba = rf_model.predict_proba(img_pca)
            prediction = np.argmax(prediction_proba, axis=1)
            predicted_class = label_encoder.inverse_transform(prediction)[0]

            plt.figure()
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.title(f'Fichier : {file_name} - Classe prédite : {predicted_class}')
            plt.axis('off')
            plt.show()

In [24]:
# Répertoire contenant les images de test
test_image_dir = r'/Users/eloicadamjee/Downloads/COVID-19_Radiography_Dataset/Test/Images'

In [25]:
# Prédire et afficher les images de test
predict_and_display_images(test_image_dir)