In [1]:
import pandas as pd
import numpy as np
from skimage import io, color, transform
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
import os

In [2]:
# Chemins vers les images et les données CSV
images_path = "C:/Users/user/OneDrive/DATASIENCETEST/PROJET/Data/Update/images/image_train"
X_csv_path = "C:/Users/user/OneDrive/DATASIENCETEST/PROJET/Data/Update/X_train_update.csv"
y_csv_path = "C:/Users/user/OneDrive/DATASIENCETEST/PROJET/Data/Update/Y_train_CVw08PX.csv"

# Chargement des données
X_df = pd.read_csv(X_csv_path)
y_df = pd.read_csv(y_csv_path)



In [3]:
# Vérification de la présence de la colonne 'prdtypecode'
if 'prdtypecode' not in y_df.columns:
    raise ValueError("La colonne 'prdtypecode' n'existe pas dans le DataFrame y_df. Veuillez vérifier les données.")

In [4]:

# Encoder pour transformer les labels en entiers continus de 0 à N-1
encoder = LabelEncoder()
y_df['encoded_labels'] = encoder.fit_transform(y_df['prdtypecode'])
X_df['labels'] = y_df['encoded_labels']

In [5]:
# Préparation du DataFrame final pour ImageDataGenerator
X_df['image_path'] = X_df.apply(lambda x: f"{images_path}/image_{x['imageid']}_product_{x['productid']}.jpg", axis=1)
data_df = X_df[['image_path', 'labels']]

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Création de l'ImageDataGenerator
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 20% des données pour la validation
)

# Générateur pour les données d'entraînement
train_generator = datagen.flow_from_dataframe(
    dataframe=data_df,
    x_col='image_path',
    y_col='labels',
    target_size=(64, 64),
    color_mode='grayscale',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    subset='training'
)

# Générateur pour les données de validation
validation_generator = datagen.flow_from_dataframe(
    dataframe=data_df,
    x_col='image_path',
    y_col='labels',
    target_size=(64, 64),
    color_mode='grayscale',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    subset='validation'
)

Found 67933 validated image filenames.
Found 16983 validated image filenames.


In [7]:
# Définition du modèle CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(encoder.classes_), activation='softmax')  # Nombre de classes dynamique
])

  super().__init__(


In [8]:
# Compilation du modèle
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [9]:
# Entraînement du modèle
model.fit(train_generator, epochs=20, validation_data=validation_generator, verbose=2)


Epoch 1/20


  self._warn_if_super_not_called()


2123/2123 - 3742s - 2s/step - accuracy: 0.2343 - loss: 2.6868 - val_accuracy: 0.3297 - val_loss: 2.3250
Epoch 2/20
2123/2123 - 3491s - 2s/step - accuracy: 0.3115 - loss: 2.3953 - val_accuracy: 0.3703 - val_loss: 2.1991
Epoch 3/20
2123/2123 - 13280s - 6s/step - accuracy: 0.3442 - loss: 2.2774 - val_accuracy: 0.3876 - val_loss: 2.1394
Epoch 4/20
2123/2123 - 2360s - 1s/step - accuracy: 0.3643 - loss: 2.2009 - val_accuracy: 0.4065 - val_loss: 2.0773
Epoch 5/20
2123/2123 - 778s - 366ms/step - accuracy: 0.3794 - loss: 2.1319 - val_accuracy: 0.4083 - val_loss: 2.0470
Epoch 6/20


In [None]:
# Évaluation du modèle sur le set de validation
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy*100:.2f}%")

[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 859ms/step - accuracy: 0.4430 - loss: 1.9754
Validation Accuracy: 44.08%
