# TP INF4248 - Classification d'images de déchets avec CNN

# Entraînement d'un CNN avec Keras
Basé sur le dataset realwaste :  

**Membres du groupe** :  
1. FETUE FOKO NATHANAEL - 21T2382  
2. DJONTHU DJONTHU MAURICE JUNIOR - 21T2557  
3. Essouma Mbarga Valerie constance - 18T2880  

# 🗑️ Entraînement d'un CNN sur RealWaste avec Keras  
**Objectif** : Classifier des images de déchets authentiques dans un environnement de décharge avec un réseau de neurones convolutif  

**Dataset** : RealWaste  
- 9 classes (types de matériaux : Cardboard, Food Organics, Glass, Metal, Miscellaneous Trash, Paper, Plastic, Textile Trash, Vegetation)  
- 4 752 images au total (524x524 pixels en couleur), capturées dans une décharge  
- Distribution inégale : de 318 (Textile Trash) à 921 (Plastic) images par classe  

**Approche** :  
1. Architecture CNN personnalisée (plusieurs couches convolutives avec dropout)  
2. Entraînement from scratch avec augmentation de données  
3. Évaluation des performances sur un ensemble de test  

In [6]:
import os
import shutil
import random
from pathlib import Path

# Chemins (à ajuster selon où tu décompresses RealWaste sur Kaggle ou localement)
data_dir = Path("/kaggle/input/realwaste/realwaste-main/RealWaste")  # Suppose que tu importes le dataset dans Kaggle
dataset_dir = Path("/kaggle/working/realwaste")
train_dir = dataset_dir / "train"
test_dir = dataset_dir / "test"

train_dir.mkdir(parents=True, exist_ok=True)
test_dir.mkdir(parents=True, exist_ok=True)

# Liste des classes de RealWaste
class_names = ["cardboard", "food_organics", "glass", "metal", "miscellaneous_trash", 
               "paper", "plastic", "textile_trash", "vegetation"]

# Parcourir les classes et diviser en train/test (80/20)
for class_name in class_names:
    class_dir = data_dir / class_name
    if class_dir.is_dir():
        images = list(class_dir.glob("*.jpg"))  # ou autre extension selon le dataset
        random.shuffle(images)
        split_idx = int(0.8 * len(images))
        train_images, test_images = images[:split_idx], images[split_idx:]
        
        (train_dir / class_name).mkdir(parents=True, exist_ok=True)
        (test_dir / class_name).mkdir(parents=True, exist_ok=True)
        
        for img in train_images:
            shutil.copy(img, train_dir / class_name / img.name)
        for img in test_images:
            shutil.copy(img, test_dir / class_name / img.name)

print("Dataset RealWaste structuré avec succès !!!")

Dataset RealWaste structuré avec succès !!!


In [2]:
from pathlib import Path

download_dir = Path('/kaggle/working')
print("a")

a


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image augmentations
example_generator = ImageDataGenerator(
    rescale=1 / 255.,           # normalize pixel values between 0-1
    vertical_flip=True,         # vertical transposition
    horizontal_flip=True,       # horizontal transposition
    rotation_range=90,          # random rotation at 90 degrees
    height_shift_range=0.3,     # shift the height of the image 30%
    brightness_range=[0.1, 0.9] # specify the range in which to decrease/increase brightness
)

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Générateur d'entraînement avec augmentations
train_datagen = ImageDataGenerator(
    rescale=1/255.,
    brightness_range=[0.1, 0.7],
    width_shift_range=0.5,
    rotation_range=90,
    horizontal_flip=True,
    vertical_flip=True
)

In [None]:
import os

class_subset = sorted(os.listdir('/kaggle/input/realwate/images'))[:10]

In [5]:
BATCH_SIZE = 32
traingen = train_datagen.flow_from_directory(
    directory=str(train_dir),
    target_size=(524, 524),  # Taille native de RealWaste
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    classes=class_subset,
    subset = 'training',
    shuffle=True,
    seed=42
)


validgen = valid_datagen.flow_from_directory(
    directory=str(test_dir),
    target_size=(524, 524),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    #classes=class_names,
    classes=class_subset
    subset = 'validation',
    shuffle=True,
    seed=42
)

Found 0 images belonging to 9 classes.
Found 0 images belonging to 9 classes.


In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from keras.regularizers import l1_l2

model = Sequential()

#### Input Layer ####
model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same',
                 activation='relu', input_shape=(128, 128, 3)))

#### Convolutional Layers ####
model.add(Conv2D(32, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))  # Pooling
model.add(Dropout(0.2)) # Dropout

model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(512, (5,5), padding='same', activation='relu'))
model.add(Conv2D(512, (5,5), activation='relu'))
model.add(MaxPooling2D((4,4)))
model.add(Dropout(0.2))

#### Fully-Connected Layer ####
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(class_subset), activation='softmax'))

model.summary() # a handy way to inspect the architecture

In [None]:
%%time

from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt

# Vérification des générateurs (ajouté pour diagnostic)
print("Nombre d'échantillons dans traingen :", traingen.samples)
print("Nombre d'échantillons dans validgen :", validgen.samples)
print("Taille du batch (BATCH_SIZE) :", BATCH_SIZE)

steps_per_epoch = traingen.samples // BATCH_SIZE
val_steps = validgen.samples // BATCH_SIZE

# Assure au moins 1 step si le nombre d'échantillons est faible
steps_per_epoch = max(1, steps_per_epoch)
val_steps = max(1, val_steps)

print("Steps per epoch :", steps_per_epoch)
print("Validation steps :", val_steps)

n_epochs = 100

optimizer = RMSprop(learning_rate=0.0001)

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Sauvegarde des poids au format .weights.h5 (conforme aux versions récentes)
checkpointer = ModelCheckpoint(filepath='img_model.weights.best.weights.h5', 
                               verbose=1, 
                               save_best_only=True,
                               save_weights_only=True)

# Arrêt anticipé
early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           restore_best_weights=True,
                           mode='min')

# Entraînement du modèle
history = model.fit(traingen,
                    epochs=n_epochs, 
                    steps_per_epoch=steps_per_epoch,
                    validation_data=validgen,
                    validation_steps=val_steps,
                    callbacks=[early_stop, checkpointer],
                    verbose=1)  # verbose=1 pour voir la progression

# Visualisation avec matplotlib
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.show()

In [None]:
test_generator = ImageDataGenerator(rescale=1/255.)

testgen = test_generator.flow_from_directory(download_dir/'realwaste/test',
                                             target_size=(128, 128),
                                             batch_size=1,
                                             class_mode=None,
                                             classes=class_subset, 
                                             shuffle=False,
                                             seed=42)

In [None]:
import numpy as np

# Charger les poids (assure-toi que le fichier existe)
model.load_weights('img_model.weights.best.weights.h5')

# Faire les prédictions (probabilités pour chaque classe)
predictions = model.predict(testgen)

# Extraire les classes prédites en prenant l'indice de la probabilité maximale
predicted_classes = np.argmax(predictions, axis=1)

# Récupérer les indices et noms des classes
class_indices = traingen.class_indices
class_indices = dict((v, k) for k, v in class_indices.items())
true_classes = testgen.classes

# Afficher quelques exemples pour vérification
print("Classes prédites (5 premières) :", [class_indices[i] for i in predicted_classes[:5]])
print("Classes réelles (5 premières) :", [class_indices[i] for i in true_classes[:5]])

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def display_results(y_true, y_preds, class_labels):
    
    results = pd.DataFrame(precision_recall_fscore_support(y_true, y_preds),
                          columns=class_labels).T

    results.rename(columns={0: 'Precision', 1: 'Recall',
                            2: 'F-Score', 3: 'Support'}, inplace=True)
    
    results.sort_values(by='F-Score', ascending=False, inplace=True)                           
    global_acc = accuracy_score(y_true, y_preds)
    
    print("Overall Categorical Accuracy: {:.2f}%".format(global_acc*100))
    return results

def plot_predictions(y_true, y_preds, test_generator, class_indices):

    fig = plt.figure(figsize=(20, 10))
    for i, idx in enumerate(np.random.choice(test_generator.samples, size=20, replace=False)):
        ax = fig.add_subplot(4, 5, i + 1, xticks=[], yticks=[])
        ax.imshow(np.squeeze(test_generator[idx]))
        pred_idx = y_preds[idx]
        true_idx = y_true[idx]
                
        plt.tight_layout()
        ax.set_title("{}\n({})".format(class_indices[pred_idx], class_indices[true_idx]),
                     color=("green" if pred_idx == true_idx else "red"))

In [None]:
plot_predictions(true_classes, predicted_classes, testgen, class_indices)

In [None]:
display_results(true_classes, predicted_classes, class_indices.values())