---
# 1. Paramètrage environnement et import des données

In [1]:
# Activer l'environnement Colab
env = 'colab'
#env = None

In [None]:
if env == 'colab':
  from google.colab import drive
  drive.mount('/content/drive')

  import os
  os.chdir('/content/drive/My Drive/Formation/Informatique - Digital/OpenClassroom/IML/P6_Classez_des_images_à_l_aide_d_algorithmes_de_deep_learning/dev')

  import tensorflow as tf
  tf.test.gpu_device_name()

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("default")

from sklearn.model_selection import train_test_split

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from keras.callbacks import EarlyStopping

In [4]:
dogs_raw = pd.read_csv('src/dogs.csv')
print(f'Nous avons {len(dogs_raw)} photos de chiens comprenant {dogs_raw.breeds.nunique()} races.')

Nous avons 20579 photos de chiens comprenant 120 races.


---
# Choix du nombre de races à tester

In [5]:
# Choix du nombre de races à tester
n_breeds = 120
top_breeds = dogs_raw.groupby('breeds').count().sort_values(by='uri', ascending=False).head(n_breeds).reset_index().breeds.to_list()

dogs = pd.DataFrame()
for b in top_breeds:
    dogs = dogs.append(dogs_raw[dogs_raw.breeds == b])

dogs = dogs.reset_index(drop=True)
dogs

Unnamed: 0,uri,breeds
0,src\img\n02085936-Maltese_dog\n02085936_10073.jpg,Maltese_dog
1,src\img\n02085936-Maltese_dog\n02085936_10130.jpg,Maltese_dog
2,src\img\n02085936-Maltese_dog\n02085936_10148.jpg,Maltese_dog
3,src\img\n02085936-Maltese_dog\n02085936_10197.jpg,Maltese_dog
4,src\img\n02085936-Maltese_dog\n02085936_10199.jpg,Maltese_dog
...,...,...
20574,src\img\n02090379-redbone\n02090379_855.jpg,redbone
20575,src\img\n02090379-redbone\n02090379_859.jpg,redbone
20576,src\img\n02090379-redbone\n02090379_91.jpg,redbone
20577,src\img\n02090379-redbone\n02090379_957.jpg,redbone


In [6]:
if env == 'colab':
  dogs.uri = dogs.uri.str.replace('\\', '/')

---
# 3. Préparation des données

In [7]:
# Séparation des données d'entrainements et de tests
train, test = train_test_split(dogs, test_size=0.2, random_state=42)

In [8]:
# Séparation des données d'entrainement et de validations
datagen = ImageDataGenerator(
            rescale=1/255,
            validation_split=0.2)

train_train = datagen.flow_from_dataframe(
              train,
              x_col = 'uri',
              y_col = 'breeds',
              seed=123,
              subset='training'
            )
val_train = datagen.flow_from_dataframe(
              train,
              x_col = 'uri',
              y_col = 'breeds',
              seed=123,
              subset='validation'
            )

Found 13171 validated image filenames belonging to 120 classes.
Found 3292 validated image filenames belonging to 120 classes.


In [9]:
datagen_test = ImageDataGenerator(
                    rescale=1/255)

test = datagen.flow_from_dataframe(
              test,
              x_col = 'uri',
              y_col = 'breeds',
              seed=123,
            )


Found 4116 validated image filenames belonging to 120 classes.


---
# 4. Formation du CNN

In [10]:
model = Sequential()

# Base d'un CNN
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

# Spécifique au données
model.add(Flatten())
model.add(Dense(len(train_train.class_indices), activation='softmax'))

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 127, 127, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 125, 125, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 60, 60, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 30, 30, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 57600)             0

In [12]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping_monitor = EarlyStopping(patience=5)

history = model.fit(train_train, validation_data=val_train, epochs=100, callbacks = [early_stopping_monitor])

Epoch 1/100
  6/412 [..............................] - ETA: 1:49:08 - loss: 0.0523 - accuracy: 0.0052

KeyboardInterrupt: ignored

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

ax[0].plot(history.history['accuracy'], label='accuracy')
ax[0].plot(history.history['val_accuracy'], label = 'val_accuracy')
ax[0].set_xlabel('Epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend()

ax[1].plot(history.history['loss'], label='loss')
ax[1].plot(history.history['val_loss'], label = 'val_loss')
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('Loss')
ax[1].legend()

plt.savefig('img/cnn_retention_map.png',
            transparent=True)

In [None]:
model.evaluate(test)