In [2]:
%%capture
%run preprocessing.ipynb

In [3]:
# Affiche les variables et modules importés depuis le notebook preprocessing.ipynb
%whos

Variable                          Type         Data/Info
--------------------------------------------------------
EfficientNetB0                    function     <function EfficientNetB0 at 0x000001530C552AC0>
efficientnet_decode_predictions   function     <function decode_predicti<...>ns at 0x000001530C553060>
efficientnet_preprocess_input     function     <function preprocess_input at 0x000001530C552FC0>
flipkart                          DataFrame                             <...>\n[1050 rows x 4 columns]
gc                                module       <module 'gc' (built-in)>
msno                              module       <module 'missingno' from <...>\missingno\\__init__.py'>
os                                module       <module 'os' (frozen)>
pd                                module       <module 'pandas' from 'c:<...>es\\pandas\\__init__.py'>
plt                               module       <module 'matplotlib.pyplo<...>\\matplotlib\\pyplot.py'>


In [5]:
# Autres imports
from tqdm import tqdm
import numpy as np 
import pandas as pd
import os
import gc

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output, display

import cv2
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input as efficientnet_preprocess_input
from tensorflow.keras.applications.efficientnet import decode_predictions as efficientnet_decode_predictions

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from tensorflow.keras.models import Model

# Définit le nombre de coeurs utilisés à 10
os.environ['LOKY_MAX_CPU_COUNT'] = '10'

Le dataframe flipkart a été chargé lors du préprocessing dans les globals, réassignons le au nom flipkart pour plus de clarté.

In [6]:
image_df = globals()['flipkart']
image_df.drop(columns=['product_name', 'description'], inplace=True)
print(image_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050 entries, 0 to 1049
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   image   1050 non-null   object  
 1   class   1050 non-null   category
dtypes: category(1), object(1)
memory usage: 9.7+ KB
None


**INITIALISE LE MODELE AVEC TOUTES LES LAYERS SANS AUCUN POIDS + CHANGEMENT DE LA COUCHE DE SORTIE A 7 CLASSES**

In [51]:
n_classes = image_df['class'].nunique()

In [85]:
model = EfficientNetB0(
    include_top=True,
    weights=None,
    input_shape=(224, 224, 3),
    classes=7
)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [86]:
model.output

<KerasTensor shape=(None, 7), dtype=float32, sparse=False, name=keras_tensor_2159>

In [13]:
from tensorflow.data import Dataset

In [14]:
from sklearn.model_selection import train_test_split

In [55]:
path = 'Images/'

In [56]:
def extract_img_matrix(model, images, path, preprocess_input):
    input_shape = model.input.shape
    input_size = (input_shape[1], input_shape[2])
    print(f"Input size: {input_size}")
    images_preprocessed = []
    for img in tqdm(images):
        image = cv2.imread(path + img)
        image = cv2.resize(image, input_size)
        image = np.expand_dims(image, axis=0)
        image = preprocess_input(image)
        images_preprocessed.append(image)
    images_preprocessed = np.vstack(images_preprocessed)
    return images_preprocessed

In [57]:
X = extract_img_matrix(model, image_df['image'], path, efficientnet_preprocess_input)

Input size: (224, 224)


100%|██████████| 1050/1050 [00:15<00:00, 67.01it/s]


In [58]:
X.shape

(1050, 224, 224, 3)

In [75]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    image_df[['class']],
    test_size=0.2,
    random_state=42,
    stratify=image_df['class']
)

In [76]:
X_train.shape, X_test.shape


((840, 224, 224, 3), (210, 224, 224, 3))

In [77]:
y_train.shape, y_test.shape

((840, 1), (210, 1))

In [78]:
# Créer des datasets TensorFlow
ds_train = Dataset.from_tensor_slices((np.expand_dims(X_train, -1), np.expand_dims(y_train, -1)))
ds_test = Dataset.from_tensor_slices((np.expand_dims(X_test, -1), np.expand_dims(y_test, -1)))

In [79]:
ds_train

<_TensorSliceDataset element_spec=(TensorSpec(shape=(224, 224, 3, 1), dtype=tf.uint8, name=None), TensorSpec(shape=(1, 1), dtype=tf.string, name=None))>

In [80]:
# Diviser le dataset d'entraînement en ensembles d'entraînement et de validation
val_size = int(0.2 * len(X_train))
train_size = len(X_train) - val_size

ds_train = ds_train.shuffle(buffer_size=1024).take(train_size)
ds_val = ds_train.skip(train_size)

# Afficher les tailles des datasets
print(f"Taille de l'ensemble d'entraînement : {train_size}")
print(f"Taille de l'ensemble de validation : {val_size}")
print(f"Taille de l'ensemble de test : {len(X_test)}")

Taille de l'ensemble d'entraînement : 672
Taille de l'ensemble de validation : 168
Taille de l'ensemble de test : 210


In [81]:
BATCH_SIZE = 32

In [43]:
from tensorflow.data import AUTOTUNE

In [82]:
batch_train = ds_train.batch(batch_size=BATCH_SIZE, drop_remainder=True)

batch_val = ds_val.batch(batch_size=BATCH_SIZE, drop_remainder=True)

batch_test = ds_test.batch(batch_size=BATCH_SIZE, drop_remainder=True)

In [83]:
epochs = 10  # @param {type: "slider", min:10, max:100}
hist = model.fit(batch_train, epochs=epochs, validation_data=batch_val)

Epoch 1/10


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(32, 1), output.shape=(32, 7)