In [1]:
import numpy as np
import pandas as pd

import string

import matplotlib.pyplot as plt
import cv2

import tensorflow 
from tensorflow import keras

# fonctions de récupération et préparation des données
import traintestsplit as tts
from words_txt_to_df import txt_to_df
from nettoyage_fichiers import clean_data, error_image
from harmonisation import harmony_clean
from keep_n_chars import max_n_chars

# Générateur de batchs
from  generator_rnn import DatasetGenerator

# Modèlisation
import tensorflow  
import string

# Modèlisation
from tensorflow.keras import Model

from tensorflow.keras.layers import  Input, Activation, BatchNormalization
from tensorflow.keras.layers import Conv2D, LSTM, Dense

from tensorflow.keras.layers import  MaxPooling2D, Dropout, Bidirectional

from tensorflow.keras.layers import  Lambda, Reshape
from tensorflow import squeeze

# Loss
from tensorflow.keras.backend import ctc_batch_cost

# Décodage
import tensorflow.keras.backend as K
# Décodage
import rnn_pred 

# Reproductibilité
from numpy.random import seed
seed(64)
from tensorflow import random
random.set_seed(8)


Init Plugin
Init Graph Optimizer
Init Kernel


# Préparation des données 

In [2]:
print("Lecture de words.txt et transformation en dataframe") 
df_words = txt_to_df('words.txt')

print("Suppression des erreurs de lecture d'image du df")
df_words = clean_data(df_words)

print("génération d'un dataframe contenant la répartition du dataset [line_id,set]")
df_tts = tts.text_to_splitDataframe()

print("Split des données")
trainset, testset, validationset = tts.split_data(df_tts, df_words)


# Variables utiles
TARGET_SIZE = (128,32)
BATCH_SIZE = 100
EPOCHS = 25
MAX_LENGTH = 21



# Suppression des mots plus longs que MAX_LENGTH 
trainset = max_n_chars(trainset, MAX_LENGTH)
testset = max_n_chars(testset, MAX_LENGTH)
validationset = max_n_chars(validationset, MAX_LENGTH)
    
# Préparation des données
print("Préparation des données")
    
# Tranformation des images par lots
train_generator = DatasetGenerator(dataframe=trainset,
                                    directory="",
                                    x_col = "data_path",
                                    y_col = "transcript",
                                    targetSize = TARGET_SIZE,
                                    nb_canaux = 1, # images grayscale par défaut
                                    batchSize = BATCH_SIZE,
                                    shuffle = False,
                                    max_y_length = MAX_LENGTH)

# Idem pour le jeu de validation
valid_generator = DatasetGenerator(dataframe=validationset,
                                    x_col = "data_path",
                                    y_col = "transcript", 
                                    targetSize = TARGET_SIZE,  
                                    shuffle = False, 
                                    max_y_length = MAX_LENGTH)

# Et le jeu de test
test_generator = DatasetGenerator(dataframe=testset,
                                    x_col = "data_path",
                                    y_col = "transcript", 
                                    targetSize = TARGET_SIZE,  
                                    shuffle = False, 
                                    max_y_length = MAX_LENGTH)
    
print("Construction du modèle")

Lecture de words.txt et transformation en dataframe
Suppression des erreurs de lecture d'image du df
Erreur de lecture sur l'image: a01-117-05-02.png
Erreur de lecture sur l'image: r06-022-03-05.png
génération d'un dataframe contenant la répartition du dataset [line_id,set]
Split des données
Préparation des données
Construction du modèle


# Construction du modèle et entraînement

In [3]:
# Construction de la couche CTC
class CTCLayer(tensorflow.keras.layers.Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = ctc_batch_cost

    def call(self, y_true, y_pred, y_lengths):
        # Calcul de la loss value et ajouter à la couche avec fonction 'self.add_loss()'
        batch_len = tensorflow.cast(tensorflow.shape(y_true)[0], dtype="int64")
        input_length = tensorflow.cast(tensorflow.shape(y_pred)[1], dtype="int64")       

        input_length = input_length * tensorflow.ones(shape=(batch_len, 1), dtype="int64")
        label_length = y_lengths * tensorflow.ones(shape = [1], dtype="int64")

        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # Retourner seulement les prédiction calculées au final
        return y_pred

In [4]:
def build_model_rnn(target_size):
    """
    La fonction build_model_rnn construit un modèle rnn pour obtenir la transcription des écritures manuscrites sur une image.
    Paramètres :
        target_size : tuple correspondant aux dimensions de l'image souhaitées
    Renvoie:
        probabilités pour chaque classe
    """
    # Inputs
    inputs_data = Input(shape = (target_size[1],target_size[0], 1), name = 'input_im', dtype = 'float32')
    labels = Input(shape = (None,), name = 'labels', dtype = 'float32')
    y_lengths = Input(name = 'label_length', shape = (None,), dtype = 'int64')
    
    # CNN
    x = Conv2D(filters=64, kernel_size=(9,9),strides=(1,1), padding="same", name = 'conv_1')(inputs_data)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters=128, kernel_size=(5,5), strides=(1,1), padding="valid", name = 'conv_2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2,2), name = 'pool2')(x)
    
    
    x = Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), padding="valid", name = 'conv_3')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    output_cnn = MaxPooling2D(pool_size=(2,2), name = 'max_pool2')(x)
    
    # reshape to enter RNN
    x = Reshape((1,output_cnn.shape[2],-1))(output_cnn)
    reshape_cnn = Lambda(lambda x: squeeze(x, 1))(x)
    
    # Couche dense 
    dense = Dense(256, name = 'dense_1')(reshape_cnn)
    dense = Activation("relu")(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(0.2)(dense)
    
    # RNN
    blstm = Bidirectional(LSTM(64, activation='relu', return_sequences=True, dropout=0.2,
                               name="blstm1"))(dense)
    blstm2 = Bidirectional(LSTM(64, activation='relu', return_sequences=True, dropout=0.2,
                               name="blstm2"))(blstm)
    
    # output layer
    y_pred = Dense(len(list(string.printable[:-17]))+1, activation='softmax', name="dense")(blstm2)
    
    # ctc layer pour calcul de la CTC loss à chaque step
    output_ctc = CTCLayer(name="ctc_batch_cost")(labels, y_pred, y_lengths)
    
    # Définission du modèle
    model = Model(inputs=[inputs_data, labels, y_lengths], outputs=output_ctc, name="rnn")
    
    # compiler le model
    model.compile(optimizer=tensorflow.keras.optimizers.Adam())
    
    print(model.summary())
    
    return model

In [5]:
rnn = build_model_rnn(TARGET_SIZE)
print("Entrainement")
current_pred = keras.models.Model(rnn.get_layer(name="input_im").input, rnn.get_layer(name="dense").output)
callbacks = [tensorflow.keras.callbacks.ModelCheckpoint(filepath = 'rnn4.weights.h5', 
                                                        monitor = 'val_loss', 
                                                        mode = 'min',
                                                        save_best_only=True), 
             tensorflow.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          patience=5,
                                                          factor=0.1,
                                                          verbose=2,
                                                          mode='min')]
history = rnn.fit(train_generator,
                  steps_per_epoch = len(trainset)//train_generator.batchSize,
                  validation_data = valid_generator,
                  validation_steps = len(validationset)//valid_generator.batchSize,
                  epochs = EPOCHS, callbacks = callbacks)

Metal device set to: Apple M1


2021-12-16 14:17:30.605445: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-16 14:17:30.605525: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "rnn"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_im (InputLayer)           [(None, 32, 128, 1)] 0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 32, 128, 64)  5248        input_im[0][0]                   
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 128, 64)  256         conv_1[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 128, 64)  0           batch_normalization[0][0]        
________________________________________________________________________________________________

2021-12-16 14:17:30.879805: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-12-16 14:17:30.879919: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2021-12-16 14:17:31.959890: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2021-12-16 14:57:42.250894: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


# Prédictions et évaluation sur le jeu de données test

In [7]:
import evaluation
print("prédictions")
rnn.load_weights('rnn4.weights.h5')
predictions = rnn.predict(test_generator)
pred_key = rnn_pred.pred_top5(predictions, MAX_LENGTH)
pred_words = rnn_pred.df_bilan_top5(testset, pred_key)

pred_words['transcript'] = testset.transcript
pred_words['transcript_is_pred1'] = None
for i in range(pred_words.shape[0]):
    pred_words.transcript_is_pred1[i] = evaluation.transcript_in_pred1(pred_words, i)

pred_words['transcript_in_top5'] = None
for i in range(pred_words.shape[0]):
    pred_words.transcript_in_top5[i] = evaluation.transcript_in_top5(pred_words, i)
pred_words.head()

prédictions


Unnamed: 0,data_path,predict_1,predict_2,predict_3,predict_4,predict_5,transcript,transcript_is_pred1,transcript_in_top5
0,data/words/m01/m01-049/m01-049-00-00.png,He,he,te,Hhe,be,He,True,True
1,data/words/m01/m01-049/m01-049-00-01.png,1e,se,1ele,le,1ae,rose,False,False
2,data/words/m01/m01-049/m01-049-00-02.png,fice,fice,fie,fie,fire,from,False,False
3,data/words/m01/m01-049/m01-049-00-03.png,In,Is,bn,bs,hn,his,False,False
4,data/words/m01/m01-049/m01-049-00-04.png,hertes,herted,herte,herates,herated,breakfast-nook,False,False


In [8]:
print("accuracy top 5:", pred_words["transcript_in_top5"].value_counts(normalize = True))
print("accuracy top 1:", pred_words["transcript_is_pred1"].value_counts(normalize = True))

accuracy top 5: False    0.577032
True     0.422968
Name: transcript_in_top5, dtype: float64
accuracy top 1: False    0.67399
True     0.32601
Name: transcript_is_pred1, dtype: float64


In [9]:
mean_cer_pred1 = pred_words.apply(evaluation.cer, axis = 1).mean()
print("Moyenne CER sur pred 1: ", mean_cer_pred1)

Moyenne CER sur pred 1:  0.4806142143505904


**Les résultats sur le test sans restriction sur la longueur des mots satisfaisant, nous sommes proches de nos résultats avec les restrictions à 10 caractères. Le test a été effectué sur 25 epochs, mais il semblerait qu'il aurait été plus bénéfique d'entraîner le modèle sur un nombre d'epoch plus important.**