In [9]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report, f1_score

In [18]:
df=pd.read_csv('X_train_update.csv')
df1=pd.read_csv("y_train_CVw08PX.csv")


# Nous allons changer les valeurs de notre colonne chemin_images du dataFrame par le nom des images correspondantes à productid et imageid
for mmm, mmm1 in zip(df.imageid, df.productid):    
        nom_img = f"image_{mmm}_product_{mmm1}.jpg"
        df.loc[df['imageid'] == mmm, 'image_name'] = nom_img
 
# On peut supprimer les colonnes productid et imageid de notre data Frame enfin de l'alleger.
df=df.drop(["designation","productid", "imageid","description"], axis=1)


In [11]:
df["categ"] = df1.prdtypecode
df.head()

Unnamed: 0.1,Unnamed: 0,image_name,categ
0,0,image_1263597046_product_3804725264.jpg,10
1,1,image_1008141237_product_436067568.jpg,2280
2,2,image_938777978_product_201115110.jpg,50
3,3,image_457047496_product_50418756.jpg,1280
4,4,image_1077757786_product_278535884.jpg,2705


In [12]:
# Chemins
DATA_DIR = "C:/Users/Bureau/Desktop/projetRakuten/images/train/image_train"

# Paramètres
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Préparer les données
X = df['image_name'].tolist()
y = df['categ'].tolist()

# Encodage des labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [13]:
# Split train/val
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)


In [14]:
def generator(df_subset, labels_subset):
    while True:
        for i in range(0, len(df_subset), BATCH_SIZE):
            batch_imgs = []
            batch_labels = []
            batch_files = df_subset[i:i+BATCH_SIZE]
            batch_labels_raw = labels_subset[i:i+BATCH_SIZE]
            
            for img_name, label in zip(batch_files, batch_labels_raw):
                img_path = os.path.join(DATA_DIR, img_name).replace('\\', '/')
                if not os.path.exists(img_path):
                    print(f"Image not found: {img_path}")
                    continue  # saute cette image si elle n'existe pas
                try:
                    img = load_img(img_path, target_size=IMG_SIZE)
                    img_array = img_to_array(img)
                    img_array = tf.keras.applications.vgg16.preprocess_input(img_array)
                    batch_imgs.append(img_array)
                    batch_labels.append(label)
                except Exception as e:
                    print(f"Erreur lors du chargement de {img_path}: {e}")
                    continue

            if batch_imgs:  # seulement si le batch n'est pas vide
                yield np.array(batch_imgs), tf.keras.utils.to_categorical(batch_labels, num_classes=len(label_encoder.classes_))
            else:
                print("Batch vide sauté.")

In [15]:
# Générateurs
train_gen = generator(X_train, y_train)
val_gen = generator(X_val, y_val)

# Calcul du nombre de batches
steps_per_epoch = len(X_train) // BATCH_SIZE
validation_steps = len(X_val) // BATCH_SIZE

# Modèle VGG16 
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False  

x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dense(len(label_encoder.classes_), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=x)
model.compile(
    optimizer='adam', 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

In [8]:
# Entraînement
model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=20,
    validation_data=val_gen,
    validation_steps=validation_steps
)

# Récupération de toutes les vraies classes et les prédictions
y_true = []
y_pred = []

# Le générateur
val_gen = generator(X_val, y_val)  

for i in range(validation_steps):
    X_batch, y_batch = next(val_gen)
    preds = model.predict(X_batch)
    
    y_true.extend(np.argmax(y_batch, axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

# Convertir en tableaux numpy
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Calcul du F1-score et de l'accuracy
f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)

print(f"F1-score (macro): {f1:.4f}")
print(f"Accuracy: {acc:.4f}")
# Sauvegarder le modèle
model.save('rakuten1_vgg16_model1.h5')

Epoch 1/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6068s[0m 3s/step - accuracy: 0.3225 - loss: 3.6063 - val_accuracy: 0.4363 - val_loss: 2.0848
Epoch 2/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6125s[0m 3s/step - accuracy: 0.4573 - loss: 1.9031 - val_accuracy: 0.4581 - val_loss: 1.9281
Epoch 3/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5943s[0m 3s/step - accuracy: 0.5048 - loss: 1.6929 - val_accuracy: 0.4641 - val_loss: 1.9661
Epoch 4/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5794s[0m 3s/step - accuracy: 0.5326 - loss: 1.5506 - val_accuracy: 0.4618 - val_loss: 1.9967
Epoch 5/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5774s[0m 3s/step - accuracy: 0.5551 - loss: 1.4591 - val_accuracy: 0.4802 - val_loss: 2.0684
Epoch 6/20
[1m2122/2122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5772s[0m 3s/step - accuracy: 0.5706 - loss: 1.3787 - val_accuracy: 0.4663 - val_loss: 2.1208
Epoc



F1-score (macro): 0.3625
Accuracy: 0.4662


## Calcul du F1-score moyen (macro, micro ou weighted selon le besoin)

In [16]:
MODEL_PATH = 'rakuten1_vgg16_model1.h5'
model1 = load_model(MODEL_PATH)



In [17]:
# Récupérer toutes les vraies classes et les prédictions
y_true = []
y_pred = []

# Le générateur 
val_gen = generator(X_val, y_val)  

for i in range(validation_steps):
    X_batch, y_batch = next(val_gen)
    preds = model1.predict(X_batch)
    
    y_true.extend(np.argmax(y_batch, axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

# Convertir en tableaux numpy
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Calcul du F1-score et de l'accuracy
for f1_sc in ["macro", "micro", "weighted"]:
    f1 = f1_score(y_true, y_pred, average=f1_sc)  
    print(f"F1-score moyen {f1_sc}:", f1)
acc = accuracy_score(y_true, y_pred)
print(f"Accuracy: {acc:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━