In [None]:
import os
from scipy.io import loadmat
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import matplotlib.pyplot as plt
# --- Chemins ---
raw_data_dir = "../data/raw/"
processed_images_dir = "../data/processed/Images"


# --- Chargement fichiers .mat ---
def load_file_list(mat_path):
    print(f"⚙️ Chargement du fichier .mat depuis : {mat_path}")
    mat = loadmat(mat_path)
    return [item[0][0] for item in mat["file_list"]]

train_list = load_file_list("../data/raw/train_list.mat")
test_list = load_file_list("../data/raw/test_list.mat")

# --- Extraction des labels ---
def get_label_from_path(path):
    return path.split('/')[0]

train_labels = [get_label_from_path(p) for p in train_list]
test_labels = [get_label_from_path(p) for p in test_list]

train_paths = [os.path.join(processed_images_dir, path) for path in train_list]
test_paths = [os.path.join(processed_images_dir, path) for path in test_list]

train_df = pd.DataFrame({'filename': train_paths, 'class': train_labels})
test_df = pd.DataFrame({'filename': test_paths, 'class': test_labels})
print(f"📊 Nombre d'images d'entraînement : {len(train_df)}")
print(f"📊 Nombre d'images de test : {len(test_df)}")
print(f"🔍 Exemple d'image train : {train_list[0]}")


print(train_df.head(3))  # Voir les premiers chemins
print(os.path.exists(train_df['filename'].iloc[0]))  # Vérifie si le fichier existe vraiment

# --- Préparer les générateurs de données ---
datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

class_to_idx = train_generator.class_indices
num_classes = len(class_to_idx)

# --- Affichage des classes détectées ---
print(f"🏷️  Nombre de classes : {len(train_generator.class_indices)}")
print(f"📁 Classes : {list(train_generator.class_indices.keys())}")


model = models.Sequential()

# Bloc 1
model.add(layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(224, 224, 3)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))

# Bloc 2
model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.3))

# Bloc 3
model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.4))

# Fully connected
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))

# Sortie
model.add(layers.Dense(num_classes, activation='softmax'))

# Compilation
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# --- Callback pour sauvegarder le meilleur modèle ---
checkpoint_callback = ModelCheckpoint(
    'best_model.h5',
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

# --- Entraînement ---
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_list) // 32,
    epochs=10,
    validation_data=test_generator,
    validation_steps=len(test_list) // 32,
    callbacks=[checkpoint_callback]
)

# --- Évaluation sur les données de test ---
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_list) // 32)
print(f"✅ Test Accuracy: {test_acc:.4f}")

⚙️ Chargement du fichier .mat depuis : ../data/raw/train_list.mat
⚙️ Chargement du fichier .mat depuis : ../data/raw/test_list.mat
📊 Nombre d'images d'entraînement : 12000
📊 Nombre d'images de test : 8580
🔍 Exemple d'image train : n02085620-Chihuahua/n02085620_5927.jpg
                                            filename                class
0  ../data/processed/Images\n02085620-Chihuahua/n...  n02085620-Chihuahua
1  ../data/processed/Images\n02085620-Chihuahua/n...  n02085620-Chihuahua
2  ../data/processed/Images\n02085620-Chihuahua/n...  n02085620-Chihuahua
True
Found 12000 validated image filenames belonging to 120 classes.
Found 8580 validated image filenames belonging to 120 classes.
🏷️  Nombre de classes : 120
📁 Classes : ['n02085620-Chihuahua', 'n02085782-Japanese_spaniel', 'n02085936-Maltese_dog', 'n02086079-Pekinese', 'n02086240-Shih-Tzu', 'n02086646-Blenheim_spaniel', 'n02086910-papillon', 'n02087046-toy_terrier', 'n02087394-Rhodesian_ridgeback', 'n02088094-Afghan_hound', 'n0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 785ms/step - accuracy: 0.0139 - loss: 5.4293
Epoch 1: val_loss improved from inf to 5.20017, saving model to best_model.h5




[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m331s[0m 875ms/step - accuracy: 0.0139 - loss: 5.4286 - val_accuracy: 0.0188 - val_loss: 5.2002
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0425 - loss: 4.5967
Epoch 2: val_loss improved from 5.20017 to 4.75487, saving model to best_model.h5




[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m432s[0m 1s/step - accuracy: 0.0426 - loss: 4.5967 - val_accuracy: 0.0323 - val_loss: 4.7549
Epoch 3/20
[1m252/375[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1:37[0m 797ms/step - accuracy: 0.0839 - loss: 4.1911

KeyboardInterrupt: 