In [13]:
"""This module implements data feeding and training loop to create model
to classify X-Ray chest images as a lab example for BSU students.
"""

__author__ = 'Alexander Soroka, soroka.a.m@gmail.com'
__copyright__ = """Copyright 2020 Alexander Soroka"""


import argparse
import glob
import numpy as np
import tensorflow as tf
import time
from tensorflow.python import keras as keras
from tensorflow.python.keras.callbacks import LearningRateScheduler


LOG_DIR = 'logs'
SHUFFLE_BUFFER = 4
BATCH_SIZE = 32
NUM_CLASSES = 6
PARALLEL_CALLS=4
RESIZE_TO = 224
TRAINSET_SIZE = 14034
VALSET_SIZE = 3000


def parse_proto_example(proto):
    keys_to_features = {
        'image/encoded': tf.io.FixedLenFeature((), tf.string, default_value=''),
        'image/class/label': tf.io.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64))
    }
    example = tf.io.parse_single_example(proto, keys_to_features)
    example['image'] = tf.image.decode_jpeg(example['image/encoded'], channels=3)
    example['image'] = tf.image.convert_image_dtype(example['image'], dtype=tf.float32)
    example['image'] = tf.image.resize(example['image'], tf.constant([RESIZE_TO, RESIZE_TO]))
    return example['image'], tf.one_hot(example['image/class/label'], depth=NUM_CLASSES)


def normalize(image, label):
    return tf.image.per_image_standardization(image), label

def resize(image, label):
    return tf.image.resize(image, tf.constant([RESIZE_TO, RESIZE_TO])), label

def create_dataset(filenames, batch_size):
    """Create dataset from tfrecords file
    :tfrecords_files: Mask to collect tfrecords file of dataset
    :returns: tf.data.Dataset
    """
    return tf.data.TFRecordDataset(filenames)\
        .map(parse_proto_example)\
        .map(resize)\
        .map(normalize)\
        .batch(batch_size)\
        .prefetch(batch_size)

def create_aug_dataset(filenames, batch_size):
    """Create dataset from tfrecords file
    :tfrecords_files: Mask to collect tfrecords file of dataset
    :returns: tf.data.Dataset
    """
    return tf.data.TFRecordDataset(filenames)\
        .map(parse_proto_example)\
        .map(augmentation)\
        .map(resize)\
        .map(normalize)\
        .shuffle(buffer_size=5 * batch_size)\
        .batch(batch_size)\
        .prefetch(2 * batch_size)

def augmentation(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_flip_left_right(image)
    return image, label

def build_model():

    new_model = tf.keras.applications.MobileNetV2(input_shape=(224,224,3),
						classes = NUM_CLASSES,
            include_top=False,
            weights='imagenet')
    

    inputs = tf.keras.Input(shape=(224, 224, 3))

    x = new_model(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Flatten()(x)
    outputs = tf.keras.layers.Dense(NUM_CLASSES, activation=tf.keras.activations.softmax)(x)
    model = tf.keras.Model(inputs, outputs)

    return model


def main():
    train_path = '/content/drive/My Drive/SMOMI/dataset/train*'
    test_path = '/content/drive/My Drive/SMOMI/dataset/val*'

    train_dataset = create_aug_dataset(glob.glob(train_path), BATCH_SIZE)
    validation_dataset = create_dataset(glob.glob(test_path), BATCH_SIZE)

    model = build_model()

    model.compile(
        optimizer=tf.optimizers.Adam(lr=4e-6),
        loss=tf.keras.losses.categorical_crossentropy,
        metrics=[tf.keras.metrics.categorical_accuracy],
    )

    log_dir='/content/drive/My Drive/SMOMI/{}/lab4_1/ilcd-{}'.format(LOG_DIR, time.time())
    weights_file = "/content/drive/My Drive/SMOMI/w-l-4_1new.hdf5"

    model.fit(
        train_dataset,
        epochs=100,
        validation_data=validation_dataset,
        callbacks=[
            tf.keras.callbacks.TensorBoard(log_dir),
            tf.keras.callbacks.ModelCheckpoint(filepath=weights_file, monitor='val_categorical_accuracy', mode='max', save_best_only=True, save_weights_only=True, verbose=1)
        ]
    )

if __name__ == '__main__':
    main()

Epoch 1/100
    439/Unknown - 48s 110ms/step - loss: 1.8281 - categorical_accuracy: 0.0938
Epoch 00001: val_categorical_accuracy improved from -inf to 0.12900, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 2/100
Epoch 00002: val_categorical_accuracy improved from 0.12900 to 0.15400, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 3/100
Epoch 00003: val_categorical_accuracy improved from 0.15400 to 0.18733, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 4/100
Epoch 00004: val_categorical_accuracy improved from 0.18733 to 0.22333, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 5/100
Epoch 00005: val_categorical_accuracy improved from 0.22333 to 0.25433, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 6/100
Epoch 00006: val_categorical_accuracy improved from 0.25433 to 0.28433, saving model to /content/drive/My Drive/SMOMI/w-l-4_1new.hdf5
Epoch 7/100
Epoch 00007: val_categorical_accurac