In [72]:
import os
import numpy as np
import random as rn
import tensorflow as tf

os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(0)
rn.seed(1)
tf.random.set_seed(42)

os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=0'

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [73]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/Flipkart/flipkart_com-ecommerce_sample_1050.csv")

In [74]:
df["category_name"] = df["product_category_tree"].str.split(r">>", regex=False, expand=True)[0]
df["category_name"] = df["category_name"].str.replace('["', '')

In [75]:
df['category_name'].value_counts()

Unnamed: 0_level_0,count
category_name,Unnamed: 1_level_1
Home Furnishing,150
Baby Care,150
Watches,150
Home Decor & Festive Needs,150
Kitchen & Dining,150
Beauty and Personal Care,150
Computers,150


In [76]:
category_list = {'Home Furnishing' : 0 ,
                 'Baby Care' : 1 ,
                 'Watches' : 2 ,
                 'Home Decor & Festive Needs' : 3 ,
                 'Kitchen & Dining' : 4	,
                 'Beauty and Personal Care' : 5 ,
                 'Computers' : 6}

df['category_name'].replace(category_list, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['category_name'].replace(category_list, inplace=True)


In [77]:
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertModel
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Lambda, concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import MobileNetV3Small

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")

for layer in bert_model.layers:
  layer.trainable = False

max_sequence_length = 512

X = np.array(df['description'])
y = df['category_name']

# Division des données
X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y, shuffle=True)

inputs_text = Input(shape=(max_sequence_length,), dtype=tf.int32, name="inputs_text")

def extract_token(tensor):
    outputs_text = bert_model(tensor)
    token = outputs_text.last_hidden_state[:, 0, :]
    return token

bert_model_output = Lambda(extract_token, output_shape=(768,))(inputs_text)

text_output = Dense(128, activation='relu', name="text_output")(bert_model_output)


img_input_shape = (224, 224, 3)
img_input = Input(shape=img_input_shape, name="img_input")

cv_model = MobileNetV3Small(weights='imagenet', include_top=False, input_shape=img_input_shape)

cv_model.trainable = False

x = cv_model(img_input, training=False)
x = GlobalAveragePooling2D()(x)
cv_output = Dense(128, activation='relu', name="cv_output")(x)

# Fusion des sorties
fusion = concatenate([text_output, cv_output])

# Couche de sortie
output_layer = Dense(7, activation='softmax')(fusion)

# Définition du modèle final
model_final = Model(
    inputs=[inputs_text, img_input],
    outputs=output_layer
)

# Compilation du modèle
model_final.compile(optimizer=Adam(learning_rate=1e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [78]:
model_final.summary()

In [79]:
example_text = "serves as both a practical necessity and a fashion accessory"

encoding = tokenizer(example_text, return_tensors='tf', padding="max_length", truncation=True, max_length=max_sequence_length)
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']

def text_encoding(text, tokenizer, max_sequence_length):
    input_ids = []
    attention_masks = []
    for t in text:
        encoding = tokenizer(t, return_tensors='tf', padding="max_length", truncation=True, max_length=max_sequence_length)
        input_ids.append(encoding['input_ids'][0])
    return np.array(input_ids)

X_train_text = text_encoding(X_train_text, tokenizer, max_sequence_length)
X_test_text = text_encoding(X_test_text, tokenizer, max_sequence_length)

X_train_text = tf.convert_to_tensor(X_train_text, dtype=tf.int32)
X_test_text = tf.convert_to_tensor(X_test_text, dtype=tf.int32)

In [80]:
from keras.applications.mobilenet_v3 import preprocess_input
from keras.preprocessing.image import load_img, img_to_array

img_path = '/content/drive/MyDrive/Flipkart/Images/'
image_filenames = list(df.image)

# Split the filenames and labels first
X_train_filenames, X_test_filenames, y_train, y_test = train_test_split(
    image_filenames, y, test_size=0.2, random_state=0, stratify=y, shuffle=True
)

def load_and_preprocess_image(path=img_path, list_img = image_filenames):
    img_array = []
    for img in list_img:
      image = load_img(path + '/' + img, target_size=(224, 224), interpolation='nearest')
      array = img_to_array(image)
      array = preprocess_input(array)
      img_array.append(array)
    return np.array(img_array)

data_img = load_and_preprocess_image()
array_img = data_img[0]





In [82]:
# Load and preprocess images for training and testing sets separately
X_train_img = load_and_preprocess_image(path=img_path, list_img=X_train_filenames)
X_test_img = load_and_preprocess_image(path=img_path, list_img=X_test_filenames)

from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit the encoder on your training labels
label_encoder.fit(y_train_text)

# Transform the training and testing labels
y_train_encoded = label_encoder.transform(y_train_text)
y_test_encoded = label_encoder.transform(y_test_text)

# Convert the encoded labels to TensorFlow tensors
y_train_text = tf.convert_to_tensor(y_train_encoded, dtype=tf.int32)
y_test_text = tf.convert_to_tensor(y_test_encoded, dtype=tf.int32)


history = model_final.fit(
    [X_train_text, X_train_img],
    y_train_text,
    batch_size=32,
    verbose="auto",
    validation_split=0.2,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=32,
    validation_freq=1,
    epochs=10
)



Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - accuracy: 0.1387 - loss: 2.1018 - val_accuracy: 0.3750 - val_loss: 1.7378
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 751ms/step - accuracy: 0.4215 - loss: 1.6591 - val_accuracy: 0.5417 - val_loss: 1.4395
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 650ms/step - accuracy: 0.6137 - loss: 1.3661 - val_accuracy: 0.6845 - val_loss: 1.2170
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 637ms/step - accuracy: 0.6991 - loss: 1.1503 - val_accuracy: 0.7202 - val_loss: 1.0540
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 758ms/step - accuracy: 0.7459 - loss: 0.9890 - val_accuracy: 0.7440 - val_loss: 0.9365
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 749ms/step - accuracy: 0.7978 - loss: 0.8661 - val_accuracy: 0.7560 - val_loss: 0.8510
Epoch 7/10
[1m21/21[0m 

In [83]:
model_final.evaluate([X_test_text, X_test_img], y_test_text)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.8140 - loss: 0.7068


[0.7577314972877502, 0.776190459728241]

///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////

**IMAGE CLASSIFICATION**  (Ne pas utiliser à partir d'ici, résultats médiocres voir non fonctionnels)

///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Charger les données depuis le CSV
data_path = '/content/drive/MyDrive/Flipkart/flipkart_com-ecommerce_sample_1050.csv'
data = pd.read_csv(data_path)

# Extraire les chemins d'images et les catégories
data['image_path'] = '/content/drive/MyDrive/Flipkart/Images/' + data['image']
data['category'] = data['product_category_tree'].apply(lambda x: eval(x)[0].split(" >> ")[0])
data["category"] = data["category"].str.replace('["', '')

# Paramètres de l'image et du modèle
img_height, img_width = 224, 224
batch_size = 16

# Préparation des données avec des augmentations d'images
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Générateur pour l'entraînement et la validation
train_generator = datagen.flow_from_dataframe(
    data,
    x_col='image_path',
    y_col='category',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_dataframe(
    data,
    x_col='image_path',
    y_col='category',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Charger MobileNetV3
base_model = MobileNetV3Small(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = False  # Garder les couches de MobileNetV3 gelées pour l'apprentissage par transfert

# Construire le modèle
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(len(train_generator.class_indices), activation='softmax')
])

# Compiler le modèle
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])



In [None]:
# Calcul des poids de classe
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))

# Callback pour le taux d'apprentissage
lr_reduction = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Entraîner le modèle
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_steps=validation_generator.samples // batch_size,
    class_weight=class_weights,
    callbacks=[lr_reduction]  # Ajouter le callback
)


In [None]:
import matplotlib.pyplot as plt

# Tracer la précision
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Précision d\'entraînement')
plt.plot(history.history['val_accuracy'], label='Précision de validation')
plt.title('Précision du modèle')
plt.xlabel('Époques')
plt.ylabel('Précision')
plt.legend(loc='lower right')

# Tracer la perte
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Perte d\'entraînement')
plt.plot(history.history['val_loss'], label='Perte de validation')
plt.title('Perte du modèle')
plt.xlabel('Époques')
plt.ylabel('Perte')
plt.legend(loc='upper right')

plt.tight_layout()
plt.show()


In [None]:
model.summary()

In [None]:
from tensorflow.keras.layers import Input, Concatenate, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# Fusion des sorties image et texte
combined = Concatenate()([image_output, text_output])
combined_output = Dense(64, activation='relu')(combined)
combined_output = Dropout(0.3)(combined_output)
final_output = Dense(len(train_generator.class_indices), activation='softmax')(combined_output)

# Modèle multi-input final
final_model = Model(inputs=[resnet_base.input, text_input], outputs=final_output)

# Compilation du modèle
final_model.compile(optimizer=SGD(learning_rate=1e-4, momentum=0.9),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

# Entraînement du modèle avec les générateurs et les données textuelles
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))

final_model.fit(
    [train_generator, text_features],
    validation_data=(validation_generator, X_test_text),
    epochs=10,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_steps=validation_generator.samples // batch_size,
    class_weight=class_weights
)

**Multi inputs, combinaison des deux modèles**

In [None]:
from sklearn.preprocessing import LabelEncoder

# Création du modèle combiné
text_model_output = tf.keras.layers.Input(shape=(768,), name='text_input')  # La taille du vecteur de DistilBERT est 768
image_model_output = tf.keras.layers.Input(shape=(img_height[0], img_width[1], 3), name='image_input')

# Passer l'entrée d'image à EfficientNet
base_output = base_model(image_model_output)

# Aplatir la sortie d'EfficientNet
base_output_flat = layers.Flatten()(base_output)

# Fusion des sorties
x_combined = layers.Concatenate()([text_model_output, base_output_flat])

# Ajout de couches supplémentaires pour le modèle de fusion
x_combined = layers.Dense(128, activation='relu')(x_combined)
x_combined = layers.Dropout(0.2)(x_combined)
final_output = layers.Dense(train_dataset.cardinality().numpy(), activation='softmax')(x_combined)  # Nombre de classes

# Construction du modèle combiné
combined_model = Model(inputs=[text_model_output, image_model_output], outputs=final_output)

# Compilation du modèle combiné
combined_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Préparer les données d'entraînement et de validation en tant que tableaux NumPy
X_image_train = np.concatenate([x.numpy() for x, _ in train_dataset])
y_image_train = np.concatenate([y.numpy() for _, y in train_dataset])

X_image_val = np.concatenate([x.numpy() for x, _ in validation_dataset])
y_image_val = np.concatenate([y.numpy() for _, y in validation_dataset])

# Encoder les étiquettes
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Entraîner le modèle combiné
combined_model.fit(
    [X_train, X_image_train],  # Entrées : vecteurs de texte et tableaux d'images
    y_train_encoded,  # Utiliser les étiquettes encodées
    validation_data=([X_test, X_image_val], y_test_encoded),  # Validation avec les données de validation
    epochs=10  # Vous pouvez ajuster le nombre d'époques
)


In [None]:
# Évaluation du modèle sur l'ensemble de test
test_loss, test_accuracy = combined_model.evaluate([X_test, X_image_val], y_test_encoded)

print(f'Perte sur l\'ensemble de test : {test_loss:.4f}')
print(f'Précision sur l\'ensemble de test : {test_accuracy:.4f}')


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Prédire les étiquettes pour l'ensemble de test
y_pred = combined_model.predict([X_test, X_image_val])
y_pred_classes = np.argmax(y_pred, axis=1)

# Créer une matrice de confusion
cm = confusion_matrix(y_test_encoded, y_pred_classes)

# Afficher la matrice de confusion
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(cmap=plt.cm.Blues)
plt.title('Matrice de confusion')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.show()


In [None]:
from sklearn.metrics import classification_report

# Afficher le rapport de classification
report = classification_report(y_test_encoded, y_pred_classes, target_names=label_encoder.classes_)
print(report)
