# ETL with tf.data

## Librairies

In [1]:
import os

import matplotlib.pyplot as plt
import tensorflow as tf
tf.__version__

'2.4.1'

## Paths & Config

In [2]:
DATA_DIR = "/home/joseph/idermatoDeepLearning/data/raw_Data/images_medicales"
CHECKPOINT = "models/classifier-weights.hdf5"
SAVE_MODEL_PATH = "models/classifier.h5"
TENSORBOARD = "logs"

VALIDATION_SPLIT = 0.2

WIDTH = 150
HEIGHT = 150
NUM_CHANNELS = 3
INPUT_SHAPE = (WIDTH, HEIGHT, NUM_CHANNELS)

LEARNING_RATE = 1e-4

EPOCHS = 30
BATCH_SIZE = 16

## Data

In [3]:
generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=VALIDATION_SPLIT
)
train_set = generator.flow_from_directory(
    DATA_DIR,
    target_size=(WIDTH, HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)
val_set = generator.flow_from_directory(
    DATA_DIR,
    target_size=(WIDTH, HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)
num_classes = len(train_set.class_indices)
print("Total classes:", num_classes)

Found 592 images belonging to 10 classes.
Found 144 images belonging to 10 classes.
Total classes: 10


In [4]:
train_ds = ds = tf.data.Dataset.from_generator(
    lambda: train_set, 
    output_types=(tf.float32, tf.float32),
    output_shapes=(
        [BATCH_SIZE, WIDTH, HEIGHT,NUM_CHANNELS],
        [BATCH_SIZE, num_classes]
    )
)
val_ds = tf.data.Dataset.from_generator(
    lambda: val_set,
    output_types=(tf.float32, tf.float32),
    output_shapes=(
        [BATCH_SIZE, WIDTH, HEIGHT,NUM_CHANNELS],
        [BATCH_SIZE, num_classes]
    )
)

## Model Transfer Learning

In [5]:
def build_model(input_shape, num_classes):
    tf.keras.backend.set_learning_phase(0) # fix bug during inference
    base_model = tf.keras.applications.inception_v3.InceptionV3(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape)

    # Freeze high layers
    for layer in base_model.layers[:249]:
        layer.trainable = False
    for layer in base_model.layers[249:]:
        layer.trainable = True

    # Build model
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(name='globalaverage2d1'),
        tf.keras.layers.Dense(1024, activation='relu', name='dense1024'),
        tf.keras.layers.Dense(units=num_classes, activation='softmax', name='classifier')
    ])

    return model

model = build_model(INPUT_SHAPE, num_classes)
print(model.summary())



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Functional)    (None, 3, 3, 2048)        21802784  
_________________________________________________________________
globalaverage2d1 (GlobalAver (None, 2048)              0         
_________________________________________________________________
dense1024 (Dense)            (None, 1024)              2098176   
_________________________________________________________________
classifier (Dense)           (None, 10)                10250     
Total params: 23,911,210
Trainable params: 13,223,306
Non-trainable params: 10,687,904
_________________________________________________________________
None


## Restore checkpoints

In [6]:
if os.path.exists(CHECKPOINT):
    _ = model.load_weights(CHECKPOINT, by_name=False)

### Loss & Optimizer

In [7]:
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.RMSprop(lr=LEARNING_RATE),
              metrics=["acc"])

### Callbacks

In [8]:
callback_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=CHECKPOINT,
    verbose=0,
    save_weights_only=True
)
callback_tensorboard = tf.keras.callbacks.TensorBoard(
    log_dir=TENSORBOARD,
    write_images=True
)

callbacks = [callback_checkpoint, callback_tensorboard]

## Training

In [None]:
steps_per_epoch = train_set.samples // BATCH_SIZE
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    callbacks=callbacks
)

Epoch 1/30
 7/37 [====>.........................] - ETA: 3:30 - loss: 2.3735 - acc: 0.1942





In [None]:
acc = history.history['acc']
loss = history.history['loss']
val_acc = history.history['val_acc']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

plt.figure(figsize=(8, 4))
plt.plot(epochs, acc, 'bo', label='Training Accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc='best')
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(epochs, loss, 'bo', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and validation loss')
plt.legend(loc='best')
plt.show()

## Save model

In [None]:
model.save(SAVE_MODEL_PATH)

# Références
* https://github.com/damienpontifex/mobilenet-classifier-transfer/blob/master/binary_classifier_train.py