# **Analyse du DataSet**


## **Télécharger les données**

In [1]:
import os
import random
import shutil
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [2]:
path_to_dataset = 'dataset_equilibre'

In [3]:
def set_category(path_to_dataset):
  return os.listdir(path_to_dataset)

In [4]:
def load_category(path_to_dataset,category):
  return f"{path_to_dataset}/{category}"

In [None]:
print(load_category(path_to_dataset,"Academic_Art"))

In [None]:
categories = set_category(path_to_dataset)
path_to_categories = []

for category in categories:
  path_to_categories.append(load_category(path_to_dataset,category))

print(path_to_categories)

In [None]:
# Paramètres globaux
IMG_SIZE = (224, 224)
BATCH_SIZE = 64

# Fonction pour charger et prétraiter une image
def load_image(img_path):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = img_array / 255.0  # Normalisation
    return img_array

# Préparer les chemins et les labels
def prepare_data(path_to_categories):
    image_paths = []
    labels = []
    class_names = [os.path.basename(cat_path) for cat_path in path_to_categories]

    for label, category_path in enumerate(path_to_categories):
        images = [os.path.join(category_path, img) for img in os.listdir(category_path) if img.endswith(('png', 'jpg', 'jpeg'))]
        image_paths.extend(images)
        labels.extend([label] * len(images))

    return image_paths, labels, class_names

image_paths, labels, class_names = prepare_data(path_to_categories)
print(f"Classes disponibles : {class_names}")
print(f"Nombre total d'images : {len(image_paths)}")

## **DataGenerator création**

In [8]:
# Création d'un data generator personnalisé avec super().__init__()
class CustomDataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, shuffle=True, **kwargs):
        super().__init__(**kwargs)
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = (index + 1) * self.batch_size
        batch_image_paths = self.image_paths[start:end]
        batch_labels = self.labels[start:end]

        images = np.array([load_image(img_path) for img_path in batch_image_paths])
        labels = tf.keras.utils.to_categorical(batch_labels, num_classes=len(set(self.labels)))
        return images, labels

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.image_paths, self.labels))
            np.random.shuffle(temp)
            self.image_paths, self.labels = zip(*temp)


In [None]:
# Séparer les données en ensembles d'entraînement et de validation
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=123, stratify=labels
)

# Instancier les générateurs
generator_train = CustomDataGenerator(train_paths, train_labels, BATCH_SIZE)
generator_val = CustomDataGenerator(val_paths, val_labels, BATCH_SIZE)

# Exemple de visualisation
for images, labels in generator_train:
    plt.figure(figsize=(10, 10))
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])
        plt.title(class_names[np.argmax(labels[i])])
        plt.axis("off")
    break


# **Création du modèle**

In [None]:
# Création du modèle CNN
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(class_names), activation='softmax')
])

# Compilation du modèle
cnn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Entraînement du modèle
cnn_history = cnn_model.fit(
    generator_train,
    validation_data=generator_val,
    epochs=6,
    steps_per_epoch=len(generator_train),
    validation_steps=len(generator_val)
)

In [None]:
# Évaluation du modèle
val_loss, val_accuracy = cnn_model.evaluate(generator_val)
print(f"Validation Loss (CNN): {val_loss}")
print(f"Validation Accuracy (CNN): {val_accuracy}")

In [None]:
y_true = []
y_pred = []
num_batches = 150  # Nombre maximum de lots à tester

for i, (images, labels) in enumerate(generator_val):
    if i >= num_batches:
        break
    predictions = cnn_model.predict(images)
    y_true.extend(np.argmax(labels, axis=1))
    y_pred.extend(np.argmax(predictions, axis=1))

plt.figure(figsize=(12, 8))
conf_matrix = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.xticks(rotation=90)
plt.show()
