In [7]:
## Modules nécessaires
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sys, os
from pathlib import Path
import glob

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TerminateOnNaN, EarlyStopping, TensorBoard

sys.path.insert(0, str(Path.cwd().parent))
import leukopy_lib as leuko
from importlib import reload

reload(leuko)

<module 'leukopy_lib' from '/home/marie-anne/code/Leukopy/notebooks/leukopy_lib.py'>

In [2]:
#get path to data/main_dir
path_name = '../../data/PBC_dataset_normal_DIB'
selected_label = ['MY', 'SNE', 'MMY', 'BNE', 'IG', 'PMY', 'NEUTROPHIL']
df_train, df_valid, df_test = leuko.load_df_tf_dir(path_name, selection_list=selected_label)

In [3]:
# Params
x_col = 'img_paths'
y_col = 'label'
batch_size = 32
img_height  = 256
img_width = 256

In [17]:
# Augmentation, no normalisation EfficientNet models expect their inputs to be float tensors of pixels with values in the [0-255] range.
train_generator = ImageDataGenerator(rotation_range = 90,
                                     horizontal_flip = True, 
                                     vertical_flip = True,
                                    zoom_range=0.6,
                                    brightness_range=[0,2])

valid_generator = ImageDataGenerator()
test_generator = ImageDataGenerator()

# Batchs, redimensionnement, chargement des images
training_set = train_generator.flow_from_dataframe(df_train, 
                                                   directory = None, # utilise x_col
                                                   x_col = x_col, 
                                                   y_col = y_col,
                                                   target_size = (img_height, img_width), 
                                                   color_mode = 'rgb',
                                                   classes = None,   # utilise y_col
                                                   class_mode = 'categorical', 
                                                   batch_size = batch_size, 
                                                   shuffle = True)

validation_set = valid_generator.flow_from_dataframe(df_valid, 
                                                     directory = None, # utilise x_col
                                                     x_col = x_col, 
                                                     y_col = y_col,
                                                     target_size = (img_height, img_width), 
                                                     color_mode = 'rgb',
                                                     classes = None,   # utilise y_col
                                                     class_mode = 'categorical', 
                                                     batch_size = batch_size, 
                                                     shuffle = True)

testing_set = test_generator.flow_from_dataframe(df_test, 
                                                 directory = None, # utilise x_col
                                                 x_col = x_col, 
                                                 y_col = y_col,
                                                 target_size = (img_height, img_width),
                                                 color_mode = 'rgb',
                                                 classes = None,   # utilise y_col
                                                 class_mode = 'categorical', 
                                                 batch_size = batch_size, 
                                                 shuffle = True)

Found 4381 validated image filenames belonging to 7 classes.
Found 598 validated image filenames belonging to 7 classes.
Found 1245 validated image filenames belonging to 7 classes.


In [8]:
@tf.function
def load_image(filepath, resize=(img_height, img_width)):
    im = tf.io.read_file(filepath)
    im = tf.image.decode_jpeg(im, channels=3)
    return tf.image.resize(im, resize)

In [9]:
dataset_train = tf.data.Dataset.from_tensor_slices((df_train[x_col], df_train[y_col]))
dataset_train = dataset_train.map(lambda x, y : [load_image(x), y],
                                  num_parallel_calls=-1).batch(batch_size)

In [None]:
def data_generation(image, label):
    """data generation

    Args:
        image: tensor
        label

    Returns:
        Augmented image
    """
    x = image
    x = tf.image.stateless_random_flip_left_right(x)
    x = tf.image.stateless_random_flip_up_down(x)
    x = tf.image.stateless_random_hue(x, 0.08)
    x = tf.image.stateless_random_saturation(x, 0.6, 1.4)
    x = tf.image.stateless_random_brightness(x, 0.05)
    x = tf.image.stateless_random_contrast(x, 0.7, 1.3)
    x = tf.image.central_crop(x, central_fraction=0.5)
    x =  tf.image.rot90(x, tf.random_uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))

    return x, label



(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

data = (x_train[0:8] / 255).astype(np.float32)
dataset = tf.data.Dataset.from_tensor_slices(data)

# Add augmentations
augmentations = [flip, color, zoom, rotate]

for f in augmentations:
    dataset = dataset.map(lambda x: tf.cond(tf.random_uniform([], 0, 1) > 0.75, lambda: f(x), lambda: x), num_parallel_calls=4)
dataset = dataset.map(lambda x: tf.clip_by_value(x, 0, 1))

plot_images(dataset, n_images=8, samples_per_image=10)


In [10]:
dataset_train_augmented = (
    dataset_train
    .map(data_generation, num_parallel_calls=autotune)
    .shuffle(1024)
    .batch(batch_size)
    .prefetch(autotune)
    .cache()
)

<BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.string)>

In [19]:
# Correspondance indices/labels (ordre alphanumérique):
print('Train :', training_set.class_indices)
print('Valid :', validation_set.class_indices)
print('Test  :', testing_set.class_indices)

label_map = training_set.class_indices

Train : {'BNE': 0, 'IG': 1, 'MMY': 2, 'MY': 3, 'NEUTROPHIL': 4, 'PMY': 5, 'SNE': 6}
Valid : {'BNE': 0, 'IG': 1, 'MMY': 2, 'MY': 3, 'NEUTROPHIL': 4, 'PMY': 5, 'SNE': 6}
Test  : {'BNE': 0, 'IG': 1, 'MMY': 2, 'MY': 3, 'NEUTROPHIL': 4, 'PMY': 5, 'SNE': 6}


In [29]:
# Callbacks :

TON = TerminateOnNaN()

save_model = ModelCheckpoint(filepath = '/content/drive/MyDrive/Leukopy/EfficientNetModel/B0_augment', 
                             monitor = 'val_loss', 
                             save_best_only = True,
                             save_weights_only = True,
                             save_freq = 'epoch')

control_lr = ReduceLROnPlateau(monitor = 'val_loss',
                               factor = 0.1, 
                               patience = 3, 
                               verbose = 1, 
                               mode = 'min')

early_stopping = EarlyStopping(monitor = "val_loss", 
                               patience = 6, 
                               mode = 'min',
                               restore_best_weights = True)

# %load_ext tensorboard
log_dir = Path('./tf/log')
tensorboard = TensorBoard(log_dir = log_dir)


In [None]:
## Modèle : EfficientNet pour transfer learning

base_model = EfficientNetB0(include_top = False,
                            weights = 'imagenet',
                            input_shape = (224,224,3),
                            pooling = 'avg')

for layer in base_model.layers:
    layer.trainable = False
    
model = Sequential()
model.add(base_model)
model.add(layers.Dense(units = 1280, activation = 'relu'))
model.add(layers.Dense(units = 640, activation = 'relu'))
model.add(layers.Dense(units = 8, activation = 'softmax'))

# Compilation
model.compile(optimizer = 'adam',
              loss = "categorical_crossentropy",
              metrics = ["accuracy"])

epochs = 30
history = model.fit(training_set, 
                    epochs = epochs, 
                    validation_data = validation_set, 
                    callbacks = [save_model, control_lr, TON, early_stopping])

In [None]:
# Courbe d'entrainement :
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

plt.figure()
plt.plot(np.arange(epochs), training_accuracy, label = 'Training Set')
plt.plot(np.arange(epochs), validation_accuracy, label = 'Validation Set')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.xlim([0,19])
plt.ylim([0.5,1])
plt.legend()
plt.show()

In [None]:
# Évaluation sur les données test :

model.evaluate(testing_set)

PosixPath('tf/log')

In [22]:
!mkdir tf_log