# ETL with tf.data

## Librairies

In [1]:
import os

import glob
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.__version__

'2.4.1'

## Paths & Config

In [2]:
DATA_DIR = "/home/joseph/idermatoDeepLearning/data/raw_Data/images_medicales"
CHECKPOINT = "models/classifier-weights.hdf5"
SAVE_MODEL_PATH = "models/classifier.h5"
TENSORBOARD = "logs"

VALIDATION_SPLIT = 0.2

WIDTH = 150
HEIGHT = 150
NUM_CHANNELS = 3
INPUT_SHAPE = (WIDTH, HEIGHT, NUM_CHANNELS)

LEARNING_RATE = 1e-4

EPOCHS = 10
BATCH_SIZE = 16

## Data

In [3]:
# listing all images paths
images = glob.glob(DATA_DIR+'/*/*.*')
np.random.shuffle(images)
n_samples = len(images)
print("Total images:", n_samples)

Total images: 736


In [4]:
# associate label to each image
labels = [path.split('/')[-2] for path in images]
len(labels)

736

### Encode label into one hot

In [5]:
# encode labels into numeric values
label_encoder = LabelEncoder()
label_encoder.fit(labels)
labels = label_encoder.transform(labels).reshape(-1, 1)

# encode labels into onehot array
one_hot_encoder = OneHotEncoder()
one_hot_encoder.fit(labels)
labels = one_hot_encoder.transform(labels).toarray()
labels.shape

(736, 10)

In [6]:
num_classes = len(label_encoder.classes_)
print("Total classes:", num_classes)

Total classes: 10


### Split train, val

In [7]:
train_images, val_images, train_labels, val_labels = train_test_split(images,
                                                                      labels,
                                                                      test_size=VALIDATION_SPLIT,
                                                                      stratify=labels)

### Build Data Generator

In [8]:
def process_path(file_path):
    label = tf.strings.split(file_path, os.sep)[-2]
#     print(label)
#     label = str(file_path.numpy().decode("utf-8")).split("/")[-2]
#     label = label_encoder.transform(label)
#     label = one_hot_encoder.transform(label)
    
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_with_pad(img, WIDTH, HEIGHT)

    return img, label

In [9]:
# Pistes: utiliser from_range et se servir des idices pour fair le mapping avec les entrées
train_ds = tf.data.Dataset.list_files(train_images)
val_ds = tf.data.Dataset.list_files(val_images)

train_ds = train_ds.map(process_path).shuffle(1024).batch(BATCH_SIZE).repeat(None).prefetch(tf.data.experimental.AUTOTUNE)
val_ds = val_ds.map(process_path)

## Model Transfer Learning

In [10]:
def build_model(input_shape, num_classes):
    base_model = tf.keras.applications.inception_v3.InceptionV3(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape)

    # Freeze base model
    base_model.trainable = False

    # Build model
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(name='globalaverage2d1'),
        tf.keras.layers.Dense(1024, activation='relu', name='dense1024'),
        tf.keras.layers.Dense(units=num_classes, activation='softmax', name='classifier')
    ])

    return model

model = build_model(INPUT_SHAPE, num_classes)
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Functional)    (None, 3, 3, 2048)        21802784  
_________________________________________________________________
globalaverage2d1 (GlobalAver (None, 2048)              0         
_________________________________________________________________
dense1024 (Dense)            (None, 1024)              2098176   
_________________________________________________________________
classifier (Dense)           (None, 10)                10250     
Total params: 23,911,210
Trainable params: 2,108,426
Non-trainable params: 21,802,784
_________________________________________________________________
None


### Loss & Optimizer

In [11]:
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.RMSprop(lr=LEARNING_RATE),
              metrics=["acc"])

### Callbacks

In [12]:
callback_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=CHECKPOINT,
                                                         verbose=0,
                                                         save_weights_only=True)
callback_tensorboard = tf.keras.callbacks.TensorBoard(log_dir=TENSORBOARD,
                                                      write_images=True)

callbacks = [callback_checkpoint, callback_tensorboard]

## Training

In [13]:
steps_per_epoch = len(train_images) // BATCH_SIZE
validation_steps = len(val_images) // BATCH_SIZE
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks=callbacks
)

Epoch 1/10


ValueError: in user code:

    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/joseph/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 10) are incompatible


## Save model

In [None]:
model.save(SAVE_MODEL_PATH)

# Références
* https://github.com/damienpontifex/mobilenet-classifier-transfer/blob/master/binary_classifier_train.py