In [25]:
import numpy as np
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
from _params import params

In [27]:
dataset_folder_name  = params().dataset_folder_name()
TRAIN_TEST_SPLIT     = params().TRAIN_TEST_SPLIT()
WIDTH = HEIGHT       = params().IM_WIDTH()
dataset_dict         = params().dataset_dict()

In [28]:
dataset_dict['espece_alias'] = dict((e, i) for i, e in dataset_dict['espece_id'].items())
dataset_dict['race_alias']   = dict((r, i) for i, r in dataset_dict['race_id'].items())
dataset_dict['color_alias']  = dict((c, i) for i, c in dataset_dict['color_id'].items())

In [29]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Utilisé pour extraire des informations sur notre ensemble de données.
    Il itère sur toutes les images et renvoie un DataFrame avec les données
    (espece, race et couleur) de tous les fichiers.
    """

    def parse_info_from_file(path):
        """
        Analyse des informations à partir d'un seul fichier
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            espece, race, color, _ = filename.split('_')
            return dataset_dict['espece_id'][int(espece)], dataset_dict['race_id'][int(race)], dataset_dict['color_id'][int(color)]
        except Exception as ex:
            return None, None, None

    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))

    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)

    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['espece', 'race', 'color', 'file']
    df = df.dropna()

    return df

In [30]:
df = parse_dataset(dataset_folder_name)
df.head()

Unnamed: 0,espece,race,color,file
0,Chien,Inconnu,default,dataset\0_0_0_1.jpg
1,Chien,Inconnu,default,dataset\0_0_0_10.jpg
2,Chien,Inconnu,default,dataset\0_0_0_100.jpg
3,Chien,Inconnu,default,dataset\0_0_0_101.jpg
4,Chien,Inconnu,default,dataset\0_0_0_102.jpg


In [31]:
import plotly.graph_objects as go

In [32]:
def plot_distribution(pd_series):
    labels = pd_series.value_counts().index.tolist()
    counts = pd_series.value_counts().values.tolist()

    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(title_text='Distribution for %s' % pd_series.name)

    fig.show()

In [33]:
plot_distribution(df['espece'])

In [34]:
plot_distribution(df['race'])

In [35]:
plot_distribution(df['color'])

In [36]:
from tensorflow.keras.utils import to_categorical
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [37]:
class AnimalDataGenerator():
    """
    Générateur de données pour le jeu de données AntLefAnimals.
    Cette classe doit être utilisée pour la formation du modèle Keras à sorties multiples.
    """

    def __init__(self, df):
        self.df = df

    def generate_split_indexes(self):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * TRAIN_TEST_SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]
        train_up_to = int(train_up_to * TRAIN_TEST_SPLIT)
        train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]

        # convertit les alias en id
        self.df['espece_id'] = self.df['espece'].map(lambda gender: dataset_dict['espece_alias'][gender])
        self.df['race_id'] = self.df['race'].map(lambda gender: dataset_dict['race_alias'][gender])
        self.df['color_id'] = self.df['color'].map(lambda race: dataset_dict['color_alias'][race])

        return train_idx, valid_idx, test_idx

    def preprocess_image(self, img_path):
        """
        Utilisé pour effectuer un prétraitement mineur sur l'image avant de l'entrer dans le réseau.
        """
        
        
        im = Image.open(img_path)
        im.convert('RGB')
        im = im.resize((WIDTH, HEIGHT))
        im = np.array(im) / 255.0
        """
        im = mpimg.imread(img_path)
        plt.imshow(im)
        plt.axis("off")
        im = im/255"""
        
        print(f"Image shape: {im.shape} - img_path : {img_path}")

        return im

    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Utilisé pour générer un lot avec des images lors de la formation/test/validation de notre modèle Keras.
        """

        # tableaux pour stocker les données par lots
        images, especes, races, colors = [], [], [], []
        while True:
            for idx in image_idx:
                animal = self.df.iloc[idx]

                espece = animal['espece_id']
                race = animal['race_id']
                color = animal['color_id']
                _file = animal['file']

                im = self.preprocess_image(_file)

                especes.append(to_categorical(espece, len(dataset_dict['espece_id'])))
                races.append(to_categorical(race, len(dataset_dict['race_id'])))
                colors.append(to_categorical(color, len(dataset_dict['color_id'])))
                images.append(im)

                # condition de rendement
                if len(images) >= batch_size:
                    yield np.array(images), [np.array(especes), np.array(races), np.array(colors)]
                    images, especes, races, colors = [], [], [], []

            if not is_training:
                break

In [38]:
data_generator = AnimalDataGenerator(df)
train_idx, valid_idx, test_idx = data_generator.generate_split_indexes()

------

In [39]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
import tensorflow as tf

In [40]:
class AnimalMultiOutputModel():
    """
    Utilisé pour générer notre modèle multi-sorties. 
    Ce CNN contient trois branches : l'espece, la race & la couleur. 
    Chaque branche contient une séquence de couches convolutives qui 
    est définiesur la méthode make_default_hidden_layers.
    """

    def make_default_hidden_layers(self, inputs):
        """
        Utilisé pour générer un ensemble par défaut de calques masqués. 
        La structure utilisée dans ce réseau est définie comme :
        Conv2D -> Normalisation par lots -> Mise en commun -> Abandon
        """
        x = Conv2D(8, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)
        
        x = Conv2D(16, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)
        
        x = Conv2D(16, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)
        return x

    def build_espece_branch(self, inputs, num_espece):
        """
        Utilisé pour construire la branche course de notre réseau de reconnaissance d'animaux.
        Cette branche est composée de trois blocs Conv -> BN -> Pool -> Dropout,
        suivi de la couche de sortie dense.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(16)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_espece)(x)
        x = Activation("softmax", name="espece_output")(x)
        return x

    def build_race_branch(self, inputs, num_race):
        """
        Utilisé pour construire la branche course de notre réseau de reconnaissance d'animaux.
        Cette branche est composée de trois blocs Conv -> BN -> Pool -> Dropout,
        suivi de la couche de sortie dense.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(16)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_race)(x)
        x = Activation("softmax", name="race_output")(x)
        return x

    def build_color_branch(self, inputs, num_color):
        """
        Utilisé pour construire la branche course de notre réseau de reconnaissance d'animaux.
        Cette branche est composée de trois blocs Conv -> BN -> Pool -> Dropout,
        suivi de la couche de sortie dense.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(16)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_color)(x)
        x = Activation("softmax", name="color_output")(x)
        return x

    def assemble_full_model(self, width, height, num_espece, num_race, num_color):
        """
        Utilisé pour assembler notre modèle multi-sorties CNN.
        """

        # Activation = softmax or sigmoid or linear
        # verify in Keras documentation

        input_shape = (height, width, 3)
        inputs      = Input(shape=input_shape)

        espece_branch = self.build_espece_branch(inputs, num_espece)
        race_branch   = self.build_race_branch(inputs, num_race)
        color_branch  = self.build_color_branch(inputs, num_color)
        
        model = Model(inputs=inputs,
                      outputs=[espece_branch, race_branch, color_branch],
                      name="loc_animal")
        return model

In [41]:
model = AnimalMultiOutputModel().assemble_full_model(
    WIDTH,
    HEIGHT,
    num_espece=len(dataset_dict['espece_alias']),
    num_race=len(dataset_dict['race_alias']),
    num_color=len(dataset_dict['color_alias']))

In [42]:
# model.summary()

Model: "loc_animal"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 250, 250, 3) 0                                            
__________________________________________________________________________________________________
conv2d_9 (Conv2D)               (None, 250, 250, 8)  224         input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_12 (Conv2D)              (None, 250, 250, 8)  224         input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_15 (Conv2D)              (None, 250, 250, 8)  224         input_2[0][0]                    
_________________________________________________________________________________________

In [43]:
from tensorflow.keras.optimizers import Adam

In [44]:
init_lr = 1e-4
epochs = 100
opt = Adam(learning_rate=init_lr, decay=init_lr / epochs)
model.compile(optimizer=opt,
              loss={
                  'espece_output': 'categorical_crossentropy',
                  'race_output': 'categorical_crossentropy',
                  'color_output': 'categorical_crossentropy'},
              loss_weights={
                  'espece_output': 1.5,
                  'race_output': 1.5,
                  'color_output': 1.5},
              metrics={
                  'espece_output': 'accuracy',
                  'race_output': 'accuracy',
                  'color_output': 'accuracy'})

In [45]:
from tensorflow.keras.callbacks import ModelCheckpoint

In [46]:
batch_size = 32
valid_batch_size = 32

train_generator      = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
validation_generator = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)

In [47]:
callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

In [48]:
history = model.fit(train_generator,
                    steps_per_epoch=len(train_idx) // batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=validation_generator)

In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['race_output_acc'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_race_output_acc'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for race feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

NameError: name 'history' is not defined

In [None]:
history.save('locAnimal.h5')

NameError: name 'history' is not defined