In [1]:
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Affiche uniquement les warnings et les erreurs

In [2]:
labels_df = pd.read_csv('data/trainLabels.csv')
labels_df

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile
...,...,...
49995,49996,bird
49996,49997,frog
49997,49998,truck
49998,49999,automobile


In [19]:
image_ids = labels_df['id'].values
labels_name = labels_df['label'].values

In [20]:
# Encoding du label
labelEncoder = LabelEncoder()
labels = labelEncoder.fit_transform(labels_name)

In [21]:
df = pd.DataFrame({
    'id' : labels,
    'name' : labels_name
})

df = df.drop_duplicates(keep='first')
df = df.sort_values(by='id', ascending=True)
df.to_csv('data/labels.csv', index=False)
df

Unnamed: 0,id,name
29,0,airplane
4,1,automobile
6,2,bird
9,3,cat
3,4,deer
27,5,dog
0,6,frog
7,7,horse
8,8,ship
1,9,truck


In [5]:
image_dir = 'data/train'
image_paths = [f'{image_dir}/{image_id}.png' for image_id in image_ids]

In [10]:
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

In [11]:
def load_and_preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [32, 32])
    return image, label

dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

In [14]:
dataset = dataset.shuffle(buffer_size=len(image_paths))

In [15]:
train_size = int(0.8 * len(image_paths))
train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)

In [16]:
batch_size = 32
train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [18]:
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(32, 32, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [19]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [22]:
epochs = 100
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(train_dataset, epochs=epochs, validation_data=test_dataset, callbacks=[callback])

Epoch 1/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 12ms/step - accuracy: 0.6523 - loss: 0.9869 - val_accuracy: 0.7001 - val_loss: 0.8694
Epoch 2/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - accuracy: 0.6739 - loss: 0.9266 - val_accuracy: 0.6885 - val_loss: 0.8899
Epoch 3/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - accuracy: 0.6979 - loss: 0.8676 - val_accuracy: 0.7217 - val_loss: 0.7955
Epoch 4/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 11ms/step - accuracy: 0.7120 - loss: 0.8287 - val_accuracy: 0.7314 - val_loss: 0.7825
Epoch 5/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - accuracy: 0.7256 - loss: 0.7829 - val_accuracy: 0.7405 - val_loss: 0.7585
Epoch 6/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 12ms/step - accuracy: 0.7394 - loss: 0.7441 - val_accuracy: 0.7215 - val_loss: 0.804

In [24]:
val_loss, val_accuracy = model.evaluate(test_dataset)
print(f'Validation accuracy: {val_accuracy}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9719 - loss: 0.0811
Validation accuracy: 0.9682999849319458


In [28]:
model.save('model/my_cifar10_model.h5')

