# **Analyse du DataSet**


## **Télécharger les données**

In [1]:
import os
import random
import shutil
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
path_to_dataset = 'dataset_equilibre'

In [3]:
def set_category(path_to_dataset):
  return os.listdir(path_to_dataset)

In [4]:
def load_category(path_to_dataset,category):
  return f"{path_to_dataset}/{category}"

In [5]:
categories = set_category(path_to_dataset)
path_to_categories = [load_category(path_to_dataset, category) for category in categories]

In [6]:
# Paramètres globaux
IMG_SIZE = (128, 128)
BATCH_SIZE = 16

# Fonction pour charger et prétraiter une image
def load_image(img_path):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = img_array / 255.0  # Normalisation
    return img_array

# Préparer les chemins et les labels
def prepare_data(path_to_categories):
    image_paths = []
    labels = []
    class_names = [os.path.basename(cat_path) for cat_path in path_to_categories]

    for label, category_path in enumerate(path_to_categories):
        images = [os.path.join(category_path, img) for img in os.listdir(category_path) if img.endswith(('png', 'jpg', 'jpeg'))]
        image_paths.extend(images)
        labels.extend([label] * len(images))

    return image_paths, labels, class_names

image_paths, labels, class_names = prepare_data(path_to_categories)
print(f"Classes disponibles : {class_names}")
print(f"Nombre total d'images : {len(image_paths)}")

Classes disponibles : ['Academic_Art', 'Art_Nouveau', 'Baroque', 'Expressionism', 'Japanese_Art', 'Neoclassicism', 'Primitivism', 'Realism', 'Renaissance', 'Rococo', 'Romanticism', 'Symbolism', 'Western_Medieval']
Nombre total d'images : 7800


## **DataGenerator création**

In [7]:
# Création d'un data generator personnalisé avec super().__init__()
class CustomDataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, shuffle=True, **kwargs):
        super().__init__(**kwargs)
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = (index + 1) * self.batch_size
        batch_image_paths = self.image_paths[start:end]
        batch_labels = self.labels[start:end]

        images = np.array([load_image(img_path) for img_path in batch_image_paths])
        labels = tf.keras.utils.to_categorical(batch_labels, num_classes=len(set(self.labels)))
        return images, labels

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.image_paths, self.labels))
            np.random.shuffle(temp)
            self.image_paths, self.labels = zip(*temp)


In [8]:
# Séparer les données en ensembles d'entraînement et de validation
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=123, stratify=labels
)

# Instancier les générateurs
generator_train = CustomDataGenerator(train_paths, train_labels, BATCH_SIZE)
generator_val = CustomDataGenerator(val_paths, val_labels, BATCH_SIZE)

# **Création du modèle**

In [9]:
# Generate hyperparameter combinations
def generate_hyperparameter_combinations(learning_rates):
    dropout_options = [0.2, 0.4, 0.5]
    batch_sizes = [16]
    epochs = [5, 6]
    batch_norm_options = [True]
    combinations = list(itertools.product(learning_rates, dropout_options, batch_sizes, epochs, batch_norm_options))
    return [
        {
            "learning_rate": lr,
            "dropout": do,
            "batch_size": bs,
            "epochs": ep,
            "batch_norm": bn
        }
        for lr, do, bs, ep, bn in combinations
    ]

# Define learning rates to test
learning_rates = [0.001, 0.0005, 0.0001]
hyperparameter_grid = generate_hyperparameter_combinations(learning_rates)

# Placeholder for results
results = []

In [10]:
# Build and train models
def build_cnn(filters, dropout, learning_rate, batch_norm, input_shape, num_classes):
    model = Sequential()
    for filter_size in filters:
        model.add(Conv2D(filter_size, (3, 3), activation='relu', input_shape=input_shape))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

for config in hyperparameter_grid:
    filters = [64]
    dropout = config["dropout"]
    learning_rate = config["learning_rate"]
    batch_size = config["batch_size"]
    epochs = config["epochs"]
    batch_norm = config["batch_norm"]

    model = build_cnn(filters, dropout, learning_rate, batch_norm, (IMG_SIZE[0], IMG_SIZE[1], 3), len(class_names))

    history = model.fit(
        generator_train,
        validation_data=generator_val,
        epochs=epochs,
        steps_per_epoch=len(generator_train),
        validation_steps=len(generator_val),
        batch_size=batch_size,
        verbose=1,
        callbacks=[early_stopping]
    )

    results.append({
        "filters": filters,
        "dropout": dropout,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "epochs": epochs,
        "batch_norm": batch_norm,
        "val_loss": history.history['val_loss'],
        "val_accuracy": history.history['val_accuracy']
    })

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m130/390[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m5:00[0m 1s/step - accuracy: 0.1265 - loss: 35.3722

In [None]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Display results
print(results_df)

In [None]:
# Plot evolution of performance by hyperparameters
plt.figure(figsize=(20, 15))

# Evolution of performance by epoch
plt.subplot(3, 1, 1)
for i, config in enumerate(hyperparameter_grid):
    val_loss = results[i]['val_loss']
    val_accuracy = results[i]['val_accuracy']
    epochs = range(1, len(val_loss) + 1)

    plt.plot(epochs, val_loss, marker='o', label=f'Config {i+1}: Epochs={config["epochs"]}')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.title('Validation Loss by Epochs')
plt.legend()

# Performance by dropout rate
plt.subplot(3, 1, 2)
for i, config in enumerate(hyperparameter_grid):
    plt.scatter(config['dropout'], results[i]['val_accuracy'][-1], label=f'Config {i+1}')
plt.xlabel('Dropout Rate')
plt.ylabel('Final Validation Accuracy')
plt.title('Final Validation Accuracy by Dropout Rate')
plt.legend()

# Performance by learning rate
plt.subplot(3, 1, 3)
for i, config in enumerate(hyperparameter_grid):
    plt.scatter(config['learning_rate'], results[i]['val_accuracy'][-1], label=f'Config {i+1}')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Final Validation Accuracy')
plt.title('Final Validation Accuracy by Learning Rate')
plt.legend()

plt.tight_layout()
plt.show()
