In [1]:
import os
import keras
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import librosa
import librosa.display
import re
from sklearn.model_selection import train_test_split
import math
from keras import backend as K

In [2]:
ds_dir = '/arch/mnist_data'
filenames = []

for root, dirs, files in os.walk(ds_dir):
    for file in files:
        filenames.append(os.path.join(root,file))
filenames = [re.sub(r'(\\)', '/', name) for name in filenames[1:]]

samples_number = len(filenames)
print('Total number of samples:', samples_number)

Total number of samples: 30000


In [3]:
labels = np.array(["0","1","2","3","4","5","6","7","8","9"])
train_files,test_files = train_test_split(filenames, test_size=0.3, shuffle=True, random_state=1)
test_files,val_files = train_test_split(test_files, test_size=0.33, shuffle=True, random_state=1)

print('Training data size', len(train_files))
print('Test data size', len(test_files))
print('Validation data size', len(val_files))

Training data size 21000
Test data size 6030
Validation data size 2970


In [4]:
def get_label(file_path):
    label = re.findall(r'/([0-9])_', file_path)[0]
    index = np.argmax(label == labels)
    label_id = np.zeros(len(labels), dtype=np.float64)
    label_id[index] = 1
    return label_id

In [5]:
def get_ms(file_path, input_shape):
    y, sr = librosa.load(file_path)
    ms = librosa.feature.melspectrogram(y=y, sr=sr,n_fft=2048, hop_length=512)
    ms = np.resize(ms, input_shape)
    ms = ms.astype(np.float64, copy=False)
    
    ms = np.log(ms + 1e-9)
    ms = librosa.util.normalize(ms)
    
    ms = np.repeat(ms[..., np.newaxis], 3, -1)
    return ms

In [6]:
def preprocess_dataset(files, input_shape):
    output_labels = np.array([get_label(file) for file in files])
    output_ms = np.array([get_ms(file, input_shape) for file in files])
    return output_ms, output_labels

In [7]:
batch_size = 64
epoch=15

In [8]:
tf.compat.v1.enable_eager_execution()

In [9]:
input_shape = (128,44)
train_ms,train_labels = preprocess_dataset(train_files, input_shape)
test_ms,test_labels = preprocess_dataset(test_files, input_shape)
val_ms,val_labels = preprocess_dataset(val_files, input_shape)

train_dataset = tf.data.Dataset.from_tensor_slices((train_ms, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_ms, test_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_ms, val_labels))

train_batches = train_dataset.batch(batch_size)
test_batches = test_dataset.batch(batch_size)
val_batches = val_dataset.batch(batch_size)

In [10]:
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_score(y_true, y_pred):
    prec = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    return 2*((prec*rec)/(prec+rec+K.epsilon()))

In [11]:
import sklearn
def metric_calc(model):
    y_true=test_labels
    y_pred = model.predict(test_batches, verbose=2)
    
    y_pred = np.array([np.argmax(i) for i in y_pred])   
    y_true = np.array([np.argmax(i) for i in y_true])
    
    accuracy = sklearn.metrics.accuracy_score(y_true, y_pred)
    precision = sklearn.metrics.precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = sklearn.metrics.recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = sklearn.metrics.f1_score(y_true, y_pred, average='weighted', zero_division=0)
    
    return accuracy, precision, recall, f1

# DenseNet121

In [64]:
model_densenet121 = tf.keras.applications.densenet.DenseNet121(input_shape=input_shape+(3,), include_top=False, weights=None)

densenet121 = keras.Sequential([
    model_densenet121,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))               
])

densenet121.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_densenet121 = densenet121.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches,
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [65]:
metric_calc(densenet121)

95/95 - 69s - 69s/epoch - 729ms/step


(0.982089552238806, 0.9821046468661228, 0.982089552238806, 0.9820814800019442)

In [66]:
test_loss, test_acc, test_f1, test_prec, test_rec = densenet121.evaluate(test_batches, verbose=2)

95/95 - 67s - loss: 0.0659 - accuracy: 0.9821 - f1_score: 0.4508 - precision: 0.2911 - recall: 0.9998 - 67s/epoch - 700ms/step


In [67]:
densenet121.save("models/densenet121_no_weights")



INFO:tensorflow:Assets written to: models/densenet121_no_weights\assets


INFO:tensorflow:Assets written to: models/densenet121_no_weights\assets


# DenseNet169

In [68]:
model_densenet169 = tf.keras.applications.densenet.DenseNet169(input_shape=input_shape+(3,), include_top=False, weights=None)

densenet169 = keras.Sequential([
    model_densenet169,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                  
])

densenet169.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_densenet169 = densenet169.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches,
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [70]:
metric_calc(densenet169)

95/95 - 102s - 102s/epoch - 1s/step


(0.9817578772802653,
 0.9817573768670986,
 0.9817578772802653,
 0.9817387830120752)

In [69]:
test_loss, test_acc, test_f1, test_prec, test_rec = densenet169.evaluate(test_batches, verbose=2)

95/95 - 101s - loss: 0.0660 - accuracy: 0.9818 - f1_score: 0.4737 - precision: 0.3105 - recall: 0.9995 - 101s/epoch - 1s/step


In [71]:
densenet169.save("models/densenet169_no_weights")



INFO:tensorflow:Assets written to: models/densenet169_no_weights\assets


INFO:tensorflow:Assets written to: models/densenet169_no_weights\assets


# DenseNet201

In [72]:
model_densenet201 = tf.keras.applications.densenet.DenseNet201(input_shape=input_shape+(3,), include_top=False, weights=None)

densenet201 = keras.Sequential([
    model_densenet201,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                  
])

densenet201.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_densenet201 = densenet201.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches,
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [74]:
metric_calc(densenet201)

95/95 - 147s - 147s/epoch - 2s/step


(0.982089552238806, 0.9820771970363731, 0.982089552238806, 0.9820772018039268)

In [73]:
test_loss, test_acc, test_f1, test_prec, test_rec = densenet201.evaluate(test_batches, verbose=2)

95/95 - 147s - loss: 0.0638 - accuracy: 0.9821 - f1_score: 0.4491 - precision: 0.2896 - recall: 0.9998 - 147s/epoch - 2s/step


In [75]:
densenet201.save("models/densenet201_no_weights")



INFO:tensorflow:Assets written to: models/densenet201_no_weights\assets


INFO:tensorflow:Assets written to: models/densenet201_no_weights\assets


# ShuffleNetV2

In [23]:
# Source code: https://github.com/opconty/keras-shufflenetV2/blob/master/shufflenetv2.py
import keras_applications
import keras.backend as K

def channel_split(x, name=''):
    # equipartition
    in_channles = x.shape.as_list()[-1]
    ip = in_channles // 2
    c_hat = keras.layers.Lambda(lambda z: z[:, :, :, 0:ip], name='%s/sp%d_slice' % (name, 0))(x)
    c = keras.layers.Lambda(lambda z: z[:, :, :, ip:], name='%s/sp%d_slice' % (name, 1))(x)
    return c_hat, c

def channel_shuffle(x):
    height, width, channels = x.shape.as_list()[1:]
    channels_per_split = channels // 2
    x = K.reshape(x, [-1, height, width, 2, channels_per_split])
    x = K.permute_dimensions(x, (0,1,2,4,3))
    x = K.reshape(x, [-1, height, width, channels])
    return x

def shuffle_unit(inputs, out_channels, bottleneck_ratio,strides=2,stage=1,block=1):
    if K.image_data_format() == 'channels_last':
        bn_axis = -1
    else:
        raise ValueError('Only channels last supported')

    prefix = 'stage{}/block{}'.format(stage, block)
    bottleneck_channels = int(out_channels * bottleneck_ratio)
    if strides < 2:
        c_hat, c = channel_split(inputs, '{}/spl'.format(prefix))
        inputs = c

    x = keras.layers.Conv2D(bottleneck_channels, kernel_size=(1,1), strides=1, padding='same', 
                            name='{}/1x1conv_1'.format(prefix))(inputs)
    x = keras.layers.BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_1'.format(prefix))(x)
    x = keras.layers.Activation('relu', name='{}/relu_1x1conv_1'.format(prefix))(x)
    x = keras.layers.DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', name='{}/3x3dwconv'.format(prefix))(x)
    x = keras.layers.BatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv'.format(prefix))(x)
    x = keras.layers.Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', name='{}/1x1conv_2'.format(prefix))(x)
    x = keras.layers.BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_2'.format(prefix))(x)
    x = keras.layers.Activation('relu', name='{}/relu_1x1conv_2'.format(prefix))(x)
    
    if strides < 2:
        ret = keras.layers.Concatenate(axis=bn_axis, name='{}/concat_1'.format(prefix))([x, c_hat])
    else:
        s2 = keras.layers.DepthwiseConv2D(kernel_size=3, strides=2, padding='same', 
                                name='{}/3x3dwconv_2'.format(prefix))(inputs)
        s2 = keras.layers.BatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv_2'.format(prefix))(s2)
        s2 = keras.layers.Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', 
                                 name='{}/1x1_conv_3'.format(prefix))(s2)
        s2 = keras.layers.BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_3'.format(prefix))(s2)
        s2 = keras.layers.Activation('relu', name='{}/relu_1x1conv_3'.format(prefix))(s2)
        ret = keras.layers.Concatenate(axis=bn_axis, name='{}/concat_2'.format(prefix))([x, s2])

    ret = keras.layers.Lambda(channel_shuffle, name='{}/channel_shuffle'.format(prefix))(ret)

    return ret

def block(x, channel_map, bottleneck_ratio, repeat=1, stage=1):
    x = shuffle_unit(x, out_channels=channel_map[stage-1],
                      strides=2,bottleneck_ratio=bottleneck_ratio,stage=stage,block=1)

    for i in range(1, repeat+1):
        x = shuffle_unit(x, out_channels=channel_map[stage-1],strides=1,
                          bottleneck_ratio=bottleneck_ratio,stage=stage, block=(1+i))

    return x

def ShuffleNetV2(include_top=True,
                 input_tensor=None,
                 scale_factor=1.0,
                 pooling='max',
                 input_shape=(3,128,44),
                 load_model=None,
                 num_shuffle_units=[3,7,3],
                 bottleneck_ratio=1,
                 classes=len(labels)):
    name = 'ShuffleNetV2_{}_{}_{}'.format(scale_factor, bottleneck_ratio, "".join([str(x) for x in num_shuffle_units]))
    input_shape = keras_applications.imagenet_utils._obtain_input_shape(input_shape, default_size=224, 
                                    min_size=28, require_flatten=include_top, data_format=K.image_data_format())
    out_dim_stage_two = {0.5:48, 1:116, 1.5:176, 2:244}

    if pooling not in ['max', 'avg']:
        raise ValueError('Invalid value for pooling')
    if not (float(scale_factor)*4).is_integer():
        raise ValueError('Invalid value for scale_factor, should be x over 4')
    exp = np.insert(np.arange(len(num_shuffle_units), dtype=np.float32), 0, 0)  # [0., 0., 1., 2.]
    out_channels_in_stage = 2**exp
    out_channels_in_stage *= out_dim_stage_two[bottleneck_ratio]  #  calculate output channels for each stage
    out_channels_in_stage[0] = 24  # first stage has always 24 output channels
    out_channels_in_stage *= scale_factor
    out_channels_in_stage = out_channels_in_stage.astype(int)

    if input_tensor is None:
        img_input = keras.layers.Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = keras.layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # create shufflenet architecture
    x = keras.layers.Conv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same', 
                            use_bias=False, strides=(2, 2), activation='relu', name='conv1')(img_input)
    x = keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='maxpool1')(x)

    # create stages containing shufflenet units beginning at stage 2
    for stage in range(len(num_shuffle_units)):
        repeat = num_shuffle_units[stage]
        x = block(x, out_channels_in_stage,
                   repeat=repeat,
                   bottleneck_ratio=bottleneck_ratio,
                   stage=stage + 2)

    if bottleneck_ratio < 2:
        k = 1024
    else:
        k = 2048
    x = keras.layers.Conv2D(k, kernel_size=1, padding='same', strides=1, name='1x1conv5_out', activation='relu')(x)

    if pooling == 'avg':
        x = keras.layers.GlobalAveragePooling2D(name='global_avg_pool')(x)
    elif pooling == 'max':
        x = keras.layers.GlobalMaxPooling2D(name='global_max_pool')(x)

    if include_top:
        x = keras.layers.Dense(classes, name='fc')(x)
        x = keras.layers.Activation('softmax', name='softmax')(x)
        
    if input_tensor:
        inputs = keras.utils.get_source_inputs(input_tensor)

    else:
        inputs = img_input

    model = keras.models.Model(inputs, x, name=name)

    if load_model:
        model.load_weights('', by_name=True)

    return model

In [24]:
shufflenetv2 = ShuffleNetV2(include_top=True, input_shape=input_shape+(3,), load_model=None, classes=len(labels))

shufflenetv2.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_shufflenetv2 = shufflenetv2.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches,
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [25]:
test_loss, test_acc, test_f1, test_prec, test_rec = shufflenetv2.evaluate(test_batches, verbose=2)

95/95 - 33s - loss: 0.1694 - accuracy: 0.9478 - f1_score: 0.9478 - precision: 0.9519 - recall: 0.9439 - 33s/epoch - 352ms/step


In [26]:
shufflenetv2.save("models/shufflenetv2_no_weights")



INFO:tensorflow:Assets written to: models/shufflenetv2_no_weights\assets


INFO:tensorflow:Assets written to: models/shufflenetv2_no_weights\assets


# InceptionResNetV2

In [76]:
input_shapeIR = (128,75)

train_msIR,train_labelsIR = preprocess_dataset(train_files, input_shapeIR)
test_msIR,test_labelsIR = preprocess_dataset(test_files, input_shapeIR)
val_msIR,val_labelsIR = preprocess_dataset(val_files, input_shapeIR)

train_datasetIR = tf.data.Dataset.from_tensor_slices((train_msIR, train_labelsIR))
test_datasetIR = tf.data.Dataset.from_tensor_slices((test_msIR, test_labelsIR))
val_datasetIR = tf.data.Dataset.from_tensor_slices((val_msIR, val_labelsIR))

train_batchesIR = train_datasetIR.shuffle(shuffle_buffer_size).batch(batch_size)
test_batchesIR = test_datasetIR.batch(batch_size)
val_batchesIR = val_datasetIR.batch(batch_size)

In [77]:
model_inceptionresnetv2 = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(input_shape=input_shapeIR+(3,), 
                                                                                      include_top=False, weights=None)

inceptionresnetv2 = keras.Sequential([
    model_inceptionresnetv2,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

inceptionresnetv2.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_inceptionresnetv2 = inceptionresnetv2.fit(train_batchesIR,
                    epochs=epoch,
                    validation_data=val_batchesIR, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [82]:
metric_calc(inceptionresnetv2)

95/95 - 207s - 207s/epoch - 2s/step


(0.9683250414593698,
 0.9685709623150446,
 0.9683250414593698,
 0.9683220455428999)

In [83]:
test_loss, test_acc, test_f1, test_prec, test_rec = inceptionresnetv2.evaluate(test_batchesIR, verbose=2)

95/95 - 203s - loss: 0.1008 - accuracy: 0.9683 - f1_score: 0.4269 - precision: 0.2715 - recall: 0.9995 - 203s/epoch - 2s/step


In [84]:
inceptionresnetv2.save("models/inceptionresnetv2_no_weights")



INFO:tensorflow:Assets written to: models/inceptionresnetv2_no_weights\assets


INFO:tensorflow:Assets written to: models/inceptionresnetv2_no_weights\assets


# Xception

In [86]:
input_shapeX = (128,71)

train_msX,train_labelsX = preprocess_dataset(train_files, input_shapeX)
test_msX,test_labelsX = preprocess_dataset(test_files, input_shapeX)
val_msX,val_labelsX = preprocess_dataset(val_files, input_shapeX)

train_datasetX = tf.data.Dataset.from_tensor_slices((train_msX, train_labelsX))
test_datasetX = tf.data.Dataset.from_tensor_slices((test_msX, test_labelsX))
val_datasetX = tf.data.Dataset.from_tensor_slices((val_msX, val_labelsX))

train_batchesX = train_datasetX.shuffle(shuffle_buffer_size).batch(batch_size)
test_batchesX = test_datasetX.batch(batch_size)
val_batchesX = val_datasetX.batch(batch_size)

In [87]:
model_xception = tf.keras.applications.xception.Xception(input_shape=input_shapeX+(3,), include_top=False, weights=None)

xception = keras.Sequential([
    model_xception,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

xception.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_xception = xception.fit(train_batchesX,
                    epochs=epoch,
                    validation_data=val_batchesX, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [88]:
metric_calc(xception)

95/95 - 148s - 148s/epoch - 2s/step


(0.9714759535655058,
 0.9715861116605584,
 0.9714759535655058,
 0.9714719244720276)

In [89]:
test_loss, test_acc, test_f1, test_prec, test_rec = xception.evaluate(test_batchesX, verbose=2)

95/95 - 147s - loss: 0.0934 - accuracy: 0.9715 - f1_score: 0.4727 - precision: 0.3097 - recall: 0.9987 - 147s/epoch - 2s/step


In [90]:
xception.save("models/xception_no_weights")



INFO:tensorflow:Assets written to: models/xception_no_weights\assets


INFO:tensorflow:Assets written to: models/xception_no_weights\assets


# MobileNet

In [97]:
model_mobilenet = tf.keras.applications.mobilenet.MobileNet(input_shape=input_shape+(3,), include_top=False, weights=None)

mobilenet = keras.Sequential([
    model_mobilenet,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

mobilenet.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_mobilenet = mobilenet.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches,
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [98]:
metric_calc(mobilenet)

95/95 - 21s - 21s/epoch - 222ms/step


(0.9646766169154228, 0.964804337226236, 0.9646766169154228, 0.9646772641826247)

In [99]:
test_loss, test_acc, test_f1, test_prec, test_rec = mobilenet.evaluate(test_batches, verbose=2)

95/95 - 21s - loss: 0.1363 - accuracy: 0.9647 - f1_score: 0.4800 - precision: 0.3161 - recall: 0.9980 - 21s/epoch - 218ms/step


In [100]:
mobilenet.save("models/mobilenet_no_weights")



INFO:tensorflow:Assets written to: models/mobilenet_no_weights\assets


INFO:tensorflow:Assets written to: models/mobilenet_no_weights\assets


# MobileNetV2

In [42]:
input_shapeMN2 = (128,64)

train_msMN2,train_labelsMN2 = preprocess_dataset(train_files, input_shapeMN2)
test_msMN2,test_labelsMN2 = preprocess_dataset(test_files, input_shapeMN2)
val_msMN2,val_labelsMN2 = preprocess_dataset(val_files, input_shapeMN2)

train_datasetMN2 = tf.data.Dataset.from_tensor_slices((train_msMN2, train_labelsMN2))
test_datasetMN2 = tf.data.Dataset.from_tensor_slices((test_msMN2, test_labelsMN2))
val_datasetMN2 = tf.data.Dataset.from_tensor_slices((val_msMN2, val_labelsMN2))

train_batchesMN2 = train_datasetMN2.shuffle(shuffle_buffer_size).batch(batch_size)
test_batchesMN2 = test_datasetMN2.batch(batch_size)
val_batchesMN2 = val_datasetMN2.batch(batch_size)

In [43]:
# https://github.com/marload/ConvNets-TensorFlow2
from tensorflow.keras import layers, Sequential, Model


def ReLU6():
    return layers.Lambda(lambda x: tf.nn.relu6(x))


class LinearBottleNeck(layers.Layer):
    def __init__(self, in_channels, out_channels, strides=1, t=6):
        super(LinearBottleNeck, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.strides = strides

        self.residual = Sequential([
            layers.Conv2D(in_channels * t,
                          (1, 1),
                          strides=1,
                          padding='same'),
            layers.BatchNormalization(),
            ReLU6(),
            layers.DepthwiseConv2D((3, 3),
                                   strides=strides,
                                   padding='same'),
            layers.BatchNormalization(),
            ReLU6(),
            layers.Conv2D(out_channels,
                          (1, 1),
                          strides=1,
                          padding='same'),
            layers.BatchNormalization(),
        ])

    def call(self, x, training=False):
        residual = self.residual(x, training=training)

        if self.strides == 1 and self.in_channels == self.out_channels:
            residual += x

        return residual


class MobileNetV2(Model):
    def __init__(self, num_classes, input_shape):
        super(MobileNetV2, self).__init__()

        self.front = Sequential([
            layers.Input(input_shape),
            layers.BatchNormalization(),
            ReLU6()
        ])
        self.stage1 = LinearBottleNeck(32, 16, 1, 1)
        self.stage2 = self._make_stage(2, 16, 24, 2, 6)
        self.stage3 = self._make_stage(3, 24, 32, 2, 6)
        self.stage4 = self._make_stage(4, 32, 64, 2, 6)
        self.stage5 = self._make_stage(3, 64, 96, 1, 6)
        self.stage6 = self._make_stage(3, 96, 160, 1, 6)
        self.stage7 = LinearBottleNeck(160, 320, 1, 6)

        self.conv1 = layers.Conv2D(filters=1280,
                                   kernel_size=(1, 1),
                                   strides=1,
                                   padding="same")
        self.ap = layers.AveragePooling2D((7, 7))
        self.fc = layers.Dense(num_classes, activation='softmax')
    def _make_stage(self, repeat, in_channels, out_channels, strides, t):
        nets = Sequential()
        nets.add(LinearBottleNeck(in_channels, out_channels, strides, t))

        while repeat - 1:
            nets.add(LinearBottleNeck(out_channels, out_channels, 1, t))
            repeat -= 1
        return nets

    def call(self, inputs, training=False):
        x = self.front(inputs)
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)
        x = self.stage6(x)
        x = self.stage7(x)
        x = self.conv1(x)
        x = self.ap(x)
        #x = tf.reshape(x, (x.shape[0], -1))
        x = keras.layers.GlobalAveragePooling2D()(x)
        x = self.fc(x)
        return x


def mobilenetv2(num_classes, input_shape):
    return MobileNetV2(num_classes, input_shape)

In [44]:
mobilenetv2 = mobilenetv2(len(labels), input_shapeMN2+(3,))

mobilenetv2.compile(optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_mobilenetv2 = mobilenetv2.fit(train_batchesMN2,
                             epochs=epoch,
                             validation_data=val_batchesMN2,
                             verbose=1,
                             shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [45]:
test_loss, test_acc, test_f1, test_prec, test_rec = mobilenetv2.evaluate(test_batchesMN2, verbose=2)

95/95 - 146s - loss: 0.0594 - accuracy: 0.9831 - f1_score: 0.9835 - precision: 0.9844 - recall: 0.9826 - 146s/epoch - 2s/step


In [46]:
mobilenetv2.save("models/mobilenetv2_no_weights")



INFO:tensorflow:Assets written to: models/mobilenetv2_no_weights\assets


INFO:tensorflow:Assets written to: models/mobilenetv2_no_weights\assets


# MobileNetV3Small

In [56]:
def _make_divisible(v, divisor, min_value=None):
    """https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)

    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor

    return new_v

def get_layer(layer_name, layer_dict, default_layer):
    if layer_name is None:
        return default_layer

    if layer_name in layer_dict.keys():
        return layer_dict.get(layer_name)
    else:
        raise NotImplementedError(f"Layer [{layer_name}] is not implemented")
        
class LayerNamespaceWrapper(tf.keras.layers.Layer):
    """`NameWrapper` defines auxiliary layer that wraps given `layer`
    with given `name`. This is useful for better visualization of network
    in TensorBoard.
    Default behavior of namespaces defined with nested `tf.keras.Sequential`
    layers is to keep only the most high-level `tf.keras.Sequential` name.
    """
    def __init__(
            self,
            layer: tf.keras.layers.Layer,
            name: str,
    ):
        super().__init__(name=name)
        self.wrapped_layer = tf.keras.Sequential(
            [
                layer,
            ],
            name=name,
        )

    def call(self, input):
        return self.wrapped_layer(input)

In [57]:
class Identity(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__(name="Identity")

    def call(self, input):
        return input


class ReLU6(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__(name="ReLU6")
        self.relu6 = tf.keras.layers.ReLU(max_value=6, name="ReLU6")

    def call(self, input):
        return self.relu6(input)


class HardSigmoid(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__(name="HardSigmoid")
        self.relu6 = ReLU6()

    def call(self, input):
        return self.relu6(input + 3.0) / 6.0


class HardSwish(tf.keras.layers.Layer):
    def __init__(self, name="HardSwish"):
        super().__init__(name=name)
        self.hard_sigmoid = HardSigmoid()

    def call(self, input):
        return input * self.hard_sigmoid(input)


class Squeeze(tf.keras.layers.Layer):
    """Squeeze the second and third dimensions of given tensor.
    (batch, 1, 1, channels) -> (batch, channels)
    """
    def __init__(self):
        super().__init__(name="Squeeze")

    def call(self, input):
        x = tf.keras.backend.squeeze(input, 1)
        x = tf.keras.backend.squeeze(x, 1)
        return x


class GlobalAveragePooling2D(tf.keras.layers.Layer):
    """Return tensor of output shape (batch_size, rows, cols, channels)
    where rows and cols are equal to 1. Output shape of
    `tf.keras.layer.GlobalAveragePooling2D` is (batch_size, channels),
    """
    def __init__(self):
        super().__init__(name="GlobalAveragePooling2D")

    def build(self, input_shape):
        pool_size = tuple(map(int, input_shape[1:3]))
        self.gap = tf.keras.layers.AveragePooling2D(
            pool_size=pool_size,
            name=f"AvgPool{pool_size[0]}x{pool_size[1]}",
        )

        super().build(input_shape)

    def call(self, input):
        return self.gap(input)


class BatchNormalization(tf.keras.layers.Layer):
    """Searching fo MobileNetV3: All our convolutional layers
    use batch-normalization layers with average decay of 0.99.
    """
    def __init__(
            self,
            momentum: float=0.99,
            name="BatchNormalization",
    ):
        super().__init__(name=name)

        self.bn = tf.keras.layers.BatchNormalization(
            momentum=0.99,
            name="BatchNormalization",
        )

    def call(self, input):
        return self.bn(input)


class ConvNormAct(tf.keras.layers.Layer):
    def __init__(
            self,
            filters: int,
            kernel_size: int=3,
            stride: int=1,
            padding: int=0,
            norm_layer: str=None,
            act_layer: str="relu",
            use_bias: bool=True,
            l2_reg: float=1e-5,
            name: str="ConvNormAct",
    ):
        super().__init__(name=name)

        if padding > 0:
            self.pad = tf.keras.layers.ZeroPadding2D(
                padding=padding,
                name=f"Padding{padding}x{padding}",
            )
        else:
            self.pad = Identity()

        self.conv = tf.keras.layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            strides=stride,
            name=f"Conv{kernel_size}x{kernel_size}",
            kernel_regularizer=tf.keras.regularizers.l2(l2_reg),
            use_bias=use_bias,
        )

        _available_normalization = {
            "bn": BatchNormalization(),
            }
        self.norm = get_layer(norm_layer, _available_normalization, Identity())

        _available_activation = {
            "relu": tf.keras.layers.ReLU(name="ReLU"),
            "relu6": ReLU6(),
            "hswish": HardSwish(),
            "hsigmoid": HardSigmoid(),
            "softmax": tf.keras.layers.Softmax(name="Softmax"),
        }
        self.act = get_layer(act_layer, _available_activation, Identity())

    def call(self, input):
        x = self.pad(input)
        x = self.conv(x)
        x = self.norm(x)
        x = self.act(x)
        return x


class Bneck(tf.keras.layers.Layer):
    def __init__(
            self,
            out_channels: int,
            exp_channels: int,
            kernel_size: int,
            stride: int,
            use_se: bool,
            act_layer: str,
            l2_reg: float=1e-5,
    ):
        super().__init__(name="Bneck")

        self.out_channels = out_channels
        self.stride = stride
        self.use_se = use_se

        # Expand
        self.expand = ConvNormAct(
            exp_channels,
            kernel_size=1,
            norm_layer="bn",
            act_layer=act_layer,
            use_bias=False,
            l2_reg=l2_reg,
            name="Expand",
        )

        # Depthwise
        dw_padding = (kernel_size - 1) // 2
        self.pad = tf.keras.layers.ZeroPadding2D(
            padding=dw_padding,
            name=f"Depthwise/Padding{dw_padding}x{dw_padding}",
        )
        self.depthwise = tf.keras.layers.DepthwiseConv2D(
            kernel_size=kernel_size,
            strides=stride,
            name=f"Depthwise/DWConv{kernel_size}x{kernel_size}",
            depthwise_regularizer=tf.keras.regularizers.l2(l2_reg),
            use_bias=False,
        )
        self.bn = BatchNormalization(name="Depthwise/BatchNormalization")
        if self.use_se:
            self.se = SEBottleneck(
                l2_reg=l2_reg,
                name="Depthwise/SEBottleneck",
            )

        _available_activation = {
            "relu": tf.keras.layers.ReLU(name="Depthwise/ReLU"),
            "hswish": HardSwish(name="Depthwise/HardSwish"),
        }
        self.act = get_layer(act_layer, _available_activation, Identity())

        # Project
        self.project = ConvNormAct(
            out_channels,
            kernel_size=1,
            norm_layer="bn",
            act_layer=None,
            use_bias=False,
            l2_reg=l2_reg,
            name="Project",
        )

    def build(self, input_shape):
        self.in_channels = int(input_shape[3])
        super().build(input_shape)

    def call(self, input):
        x = self.expand(input)
        x = self.pad(x)
        x = self.depthwise(x)
        x = self.bn(x)
        if self.use_se:
            x = self.se(x)
        x = self.act(x)
        x = self.project(x)

        if self.stride == 1 and self.in_channels == self.out_channels:
            return input + x
        else:
            return x


class SEBottleneck(tf.keras.layers.Layer):
    def __init__(
            self,
            reduction: int=4,
            l2_reg: float=0.01,
            name: str="SEBottleneck",
    ):
        super().__init__(name=name)

        self.reduction = reduction
        self.l2_reg = l2_reg

    def build(self, input_shape):
        input_channels = int(input_shape[3])
        self.gap = GlobalAveragePooling2D()
        self.conv1 = ConvNormAct(
            input_channels // self.reduction,
            kernel_size=1,
            norm_layer=None,
            act_layer="relu",
            use_bias=False,
            l2_reg=self.l2_reg,
            name="Squeeze",
        )
        self.conv2 = ConvNormAct(
            input_channels,
            kernel_size=1,
            norm_layer=None,
            act_layer="hsigmoid",
            use_bias=False,
            l2_reg=self.l2_reg,
            name="Excite",
        )

        super().build(input_shape)

    def call(self, input):
        x = self.gap(input)
        x = self.conv1(x)
        x = self.conv2(x)
        return input * x


class LastStage(tf.keras.layers.Layer):
    def __init__(
            self,
            penultimate_channels: int,
            last_channels: int,
            num_classes: int,
            l2_reg: float,
    ):
        super().__init__(name="LastStage")

        self.conv1 = ConvNormAct(
            penultimate_channels,
            kernel_size=1,
            stride=1,
            norm_layer="bn",
            act_layer="hswish",
            use_bias=False,
            l2_reg=l2_reg,
        )
        self.gap = GlobalAveragePooling2D()
        self.conv2 = ConvNormAct(
            last_channels,
            kernel_size=1,
            norm_layer=None,
            act_layer="hswish",
            l2_reg=l2_reg,
        )
        self.dropout = tf.keras.layers.Dropout(
            rate=0.2,
            name="Dropout",
        )
        self.conv3 = ConvNormAct(
            num_classes,
            kernel_size=1,
            norm_layer=None,
            act_layer="softmax",
            l2_reg=l2_reg,
        )
        self.squeeze = Squeeze()

    def call(self, input):
        x = self.conv1(input)
        x = self.gap(x)
        x = self.conv2(x)
        x = self.dropout(x)
        x = self.conv3(x)
        x = self.squeeze(x)
        return x

In [58]:
class MobileNetV3Small(tf.keras.Model):
    def __init__(
            self,
            num_classes: int=1001,
            width_multiplier: float=1.0,
            name: str="MobileNetV3_Small",
            divisible_by: int=8,
            l2_reg: float=1e-5,
    ):
        super().__init__(name=name)

        # First layer
        self.first_layer = ConvNormAct(
            16,
            kernel_size=3,
            stride=2,
            padding=1,
            norm_layer="bn",
            act_layer="hswish",
            use_bias=False,
            l2_reg=l2_reg,
            name="FirstLayer",
        )

        # Bottleneck layers
        self.bneck_settings = [
            # k   exp   out  SE      NL         s
            [ 3,  16,   16,  True,   "relu",    2 ],
            [ 3,  72,   24,  False,  "relu",    2 ],
            [ 3,  88,   24,  False,  "relu",    1 ],
            [ 5,  96,   40,  True,   "hswish",  2 ],
            [ 5,  240,  40,  True,   "hswish",  1 ],
            [ 5,  240,  40,  True,   "hswish",  1 ],
            [ 5,  120,  48,  True,   "hswish",  1 ],
            [ 5,  144,  48,  True,   "hswish",  1 ],
            [ 5,  288,  96,  True,   "hswish",  2 ],
            [ 5,  576,  96,  True,   "hswish",  1 ],
            [ 5,  576,  96,  True,   "hswish",  1 ],
        ]

        self.bneck = tf.keras.Sequential(name="Bneck")
        for idx, (k, exp, out, SE, NL, s) in enumerate(self.bneck_settings):
            out_channels = _make_divisible(out * width_multiplier, divisible_by)
            exp_channels = _make_divisible(exp * width_multiplier, divisible_by)

            self.bneck.add(
                LayerNamespaceWrapper(
                    Bneck(
                        out_channels=out_channels,
                        exp_channels=exp_channels,
                        kernel_size=k,
                        stride=s,
                        use_se=SE,
                        act_layer=NL,
                    ),
                    name=f"Bneck{idx}")
            )

        # Last stage
        penultimate_channels = _make_divisible(576 * width_multiplier, divisible_by)
        last_channels = _make_divisible(1_280 * width_multiplier, divisible_by)

        self.last_stage = LastStage(
            penultimate_channels,
            last_channels,
            num_classes,
            l2_reg=l2_reg,
        )

    def call(self, input):
        x = self.first_layer(input)
        x = self.bneck(x)
        x = self.last_stage(x)
        return x

In [59]:
from typing import Tuple

def build_mobilenetv3(
        model_type: str,
        input_shape: Tuple[int, int, int]=(224, 224, 3),
        num_classes: int=1001,
        width_multiplier: float=1.0,
        l2_reg: float=1e-5,
):
    assert len(input_shape) == 3, "`input_shape` should be a tuple representing input data shape (height, width, channels)"

    if model_type == 'small':
        model = MobileNetV3Small(
            num_classes=num_classes,
            width_multiplier=width_multiplier,
            l2_reg=l2_reg)
    elif model_type == 'large':
        model = MobileNetV3Large(
            num_classes=num_classes,
            width_multiplier=width_multiplier,
            l2_reg=l2_reg)

    input_tensor = tf.keras.layers.Input(shape=input_shape)
    output_tensor = model(input_tensor)

    model = tf.keras.Model(
        inputs=[model.input],
        outputs=[model.output],
    )

    return model

In [60]:
mobilenetv3small = build_mobilenetv3(
    "small",
    input_shape=input_shape+(3,),
    num_classes=len(labels),
    width_multiplier=1.0)

mobilenetv3small.compile(optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_mobilenetv3small = mobilenetv3small.fit(train_batches,
                             epochs=epoch,
                             validation_data=val_batches,
                             verbose=1,
                             shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [61]:
metric_calc(mobilenetv3small)

95/95 - 13s - 13s/epoch - 141ms/step


(0.9781094527363184,
 0.9781833517010187,
 0.9781094527363184,
 0.9780986652581752)

In [62]:
test_loss, test_acc, test_f1, test_prec, test_rec = mobilenetv3small.evaluate(test_batches, verbose=2)

95/95 - 11s - loss: 0.1320 - accuracy: 0.9781 - f1_score: 0.9779 - precision: 0.9790 - recall: 0.9769 - 11s/epoch - 118ms/step


In [63]:
mobilenetv3small.save("models/mobilenetv3small_no_weights")



INFO:tensorflow:Assets written to: models/mobilenetv3small_no_weights\assets


INFO:tensorflow:Assets written to: models/mobilenetv3small_no_weights\assets






# MobileNetV3Large

In [56]:
class MobileNetV3Large(tf.keras.Model):
    def __init__(
            self,
            num_classes: int=1001,
            width_multiplier: float=1.0,
            name: str="MobileNetV3_Large",
            divisible_by: int=8,
            l2_reg: float=1e-5,
    ):
        super().__init__(name=name)

        # First layer
        self.first_layer = ConvNormAct(
            16,
            kernel_size=3,
            stride=2,
            padding=1,
            norm_layer="bn",
            act_layer="hswish",
            use_bias=False,
            l2_reg=l2_reg,
            name="FirstLayer",
        )

        # Bottleneck layers
        self.bneck_settings = [
            # k   exp   out   SE      NL         s
            [ 3,  16,   16,   False,  "relu",    1 ],
            [ 3,  64,   24,   False,  "relu",    2 ],
            [ 3,  72,   24,   False,  "relu",    1 ],
            [ 5,  72,   40,   True,   "relu",    2 ],
            [ 5,  120,  40,   True,   "relu",    1 ],
            [ 5,  120,  40,   True,   "relu",    1 ],
            [ 3,  240,  80,   False,  "hswish",  2 ],
            [ 3,  200,  80,   False,  "hswish",  1 ],
            [ 3,  184,  80,   False,  "hswish",  1 ],
            [ 3,  184,  80,   False,  "hswish",  1 ],
            [ 3,  480,  112,  True,   "hswish",  1 ],
            [ 3,  672,  112,  True,   "hswish",  1 ],
            [ 5,  672,  160,  True,   "hswish",  2 ],
            [ 5,  960,  160,  True,   "hswish",  1 ],
            [ 5,  960,  160,  True,   "hswish",  1 ],
        ]

        self.bneck = tf.keras.Sequential(name="Bneck")
        for idx, (k, exp, out, SE, NL, s) in enumerate(self.bneck_settings):
            out_channels = _make_divisible(out * width_multiplier, divisible_by)
            exp_channels = _make_divisible(exp * width_multiplier, divisible_by)

            self.bneck.add(
                LayerNamespaceWrapper(
                    Bneck(
                        out_channels=out_channels,
                        exp_channels=exp_channels,
                        kernel_size=k,
                        stride=s,
                        use_se=SE,
                        act_layer=NL,
                    ),
                    name=f"Bneck{idx}")
            )

        # Last stage
        penultimate_channels = _make_divisible(960 * width_multiplier, divisible_by)
        last_channels = _make_divisible(1_280 * width_multiplier, divisible_by)

        self.last_stage = LastStage(
            penultimate_channels,
            last_channels,
            num_classes,
            l2_reg=l2_reg,
        )

    def call(self, input):
        x = self.first_layer(input)
        x = self.bneck(x)
        x = self.last_stage(x)
        return x

In [57]:
mobilenetv3large = build_mobilenetv3(
    "large",
    input_shape=input_shape+(3,),
    num_classes=len(labels),
    width_multiplier=1.0)

mobilenetv3large.compile(optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_mobilenetv3large = mobilenetv3large.fit(train_batches,
                             epochs=epoch,
                             validation_data=val_batches,
                             verbose=1,
                             shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [58]:
test_loss, test_acc, test_f1, test_prec, test_rec = mobilenetv3large.evaluate(test_batches, verbose=2)

95/95 - 35s - loss: 0.1532 - accuracy: 0.9806 - f1_score: 0.9806 - precision: 0.9817 - recall: 0.9794 - 35s/epoch - 370ms/step


In [59]:
mobilenetv3large.save("models/mobilenetv3large_no_weights")



INFO:tensorflow:Assets written to: models/mobilenetv3large_no_weights\assets


INFO:tensorflow:Assets written to: models/mobilenetv3large_no_weights\assets






# SqueezeNet

In [61]:
from tensorflow.keras import layers, Sequential, Model

class Fire(layers.Layer):
    def __init__(self, out_channels, squeeze_channel):
        super(Fire, self).__init__()
        self.squeeze = Sequential([
            layers.Conv2D(squeeze_channel, (1, 1)),
            layers.BatchNormalization(),
            layers.ReLU()
        ])
        self.expand_1x1 = Sequential([
            layers.Conv2D(int(out_channels / 2), (1, 1)),
            layers.BatchNormalization(),
            layers.ReLU()
        ])
        self.expand_3x3 = Sequential([
            layers.Conv2D(int(out_channels / 2), (3, 3), padding='same'),
            layers.BatchNormalization(),
            layers.ReLU()
        ])
    
    def call(self, x, training=False):
        x = self.squeeze(x, training=training)
        x = tf.concat([
            self.expand_1x1(x, training=training),
            self.expand_3x3(x, training=training)
        ], -1)

        return x
    

class SqueezeNet(Model):
    def __init__(self, num_classes, input_shape):
        super(SqueezeNet, self).__init__()
        self.stem = Sequential([
            layers.Input(input_shape),
            layers.Conv2D(96, (3, 3), padding='same'),
            layers.BatchNormalization(),
            layers.ReLU(),
            layers.MaxPooling2D((2, 2), strides=2)
        ])
        self.fire = Sequential([
            Fire(128, 16),
            Fire(128, 16),
            Fire(256, 32),
            Fire(256, 32),
            Fire(384, 48),
            Fire(384, 48),
            Fire(512, 64),
            Fire(512, 64)
        ])
        self.conv = layers.Conv2D(num_classes, 1)
        self.ap = layers.AveragePooling2D((7, 7), strides=1)
        self.mp = layers.MaxPooling2D()
        self.flat = layers.Flatten()
        self.fc = layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=False):
        x = self.stem(inputs, training=training)
        x = self.fire(x, training=training)
        x = self.conv(x, training=training)
        x = self.ap(x)
        x = self.mp(x)
        x = self.flat(x)
        x = self.fc(x)
        return x

def model_squeezenet(num_classes, input_shape):
    return SqueezeNet(num_classes, input_shape)

In [62]:
squeezenet = model_squeezenet(num_classes=len(labels), input_shape=input_shape+(3,))

squeezenet.compile(optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_squeezenet = squeezenet.fit(train_batches,
                             epochs=epoch,
                             validation_data=val_batches,
                             verbose=1,
                             shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [63]:
test_loss, test_acc, test_f1, test_prec, test_rec = squeezenet.evaluate(test_batches, verbose=2)

95/95 - 272s - loss: 0.1072 - accuracy: 0.9778 - f1_score: 0.9782 - precision: 0.9786 - recall: 0.9778 - 272s/epoch - 3s/step


In [64]:
squeezenet.save("models/squeezenet_no_weights")



INFO:tensorflow:Assets written to: models/squeezenet_no_weights\assets


INFO:tensorflow:Assets written to: models/squeezenet_no_weights\assets


# RegNetX002

In [93]:
model_regnetx002 = tf.keras.applications.regnet.RegNetX002(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnetx002 = keras.Sequential([
    model_regnetx002,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnetx002.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnetx002 = regnetx002.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [94]:
metric_calc(regnetx002)

95/95 - 69s - 69s/epoch - 726ms/step


(0.9461028192371476,
 0.9461274661756428,
 0.9461028192371476,
 0.9460189873176599)

In [95]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnetx002.evaluate(test_batches, verbose=2)

95/95 - 67s - loss: 0.2042 - accuracy: 0.9461 - f1_score: 0.3427 - precision: 0.2069 - recall: 0.9992 - 67s/epoch - 710ms/step


In [96]:
regnetx002.save("models/regnetx002_no_weights")



INFO:tensorflow:Assets written to: models/regnetx002_no_weights\assets


INFO:tensorflow:Assets written to: models/regnetx002_no_weights\assets


# RegNetX004

In [101]:
model_regnetx004 = tf.keras.applications.regnet.RegNetX004(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnetx004 = keras.Sequential([
    model_regnetx004,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnetx004.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnetx004 = regnetx004.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [102]:
metric_calc(regnetx004)

95/95 - 94s - 94s/epoch - 984ms/step


(0.9383084577114428,
 0.9388423257207839,
 0.9383084577114428,
 0.9382406773344414)

In [103]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnetx004.evaluate(test_batches, verbose=2)

95/95 - 91s - loss: 0.3150 - accuracy: 0.9383 - f1_score: 0.3774 - precision: 0.2328 - recall: 0.9974 - 91s/epoch - 962ms/step


In [104]:
regnetx004.save("models/regnetx004_no_weights")



INFO:tensorflow:Assets written to: models/regnetx004_no_weights\assets


INFO:tensorflow:Assets written to: models/regnetx004_no_weights\assets


# RegNetX006

In [105]:
model_regnetx006 = tf.keras.applications.regnet.RegNetX006(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnetx006 = keras.Sequential([
    model_regnetx006,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnetx006.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnetx006 = regnetx006.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [106]:
metric_calc(regnetx006)

95/95 - 53s - 53s/epoch - 560ms/step


(0.9504145936981758,
 0.9505773683445093,
 0.9504145936981758,
 0.9504066614868186)

In [107]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnetx006.evaluate(test_batches, verbose=2)

95/95 - 52s - loss: 0.2239 - accuracy: 0.9504 - f1_score: 0.3734 - precision: 0.2297 - recall: 0.9979 - 52s/epoch - 551ms/step


In [108]:
regnetx006.save("models/regnetx006_no_weights")



INFO:tensorflow:Assets written to: models/regnetx006_no_weights\assets


INFO:tensorflow:Assets written to: models/regnetx006_no_weights\assets


# RegNetY002

In [109]:
model_regnety002 = tf.keras.applications.regnet.RegNetY002(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnety002 = keras.Sequential([
    model_regnety002,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnety002.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnety002 = regnety002.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [110]:
metric_calc(regnety002)

95/95 - 72s - 72s/epoch - 753ms/step


(0.9442786069651742,
 0.9443658501027206,
 0.9442786069651742,
 0.9441757452932362)

In [111]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnety002.evaluate(test_batches, verbose=2)

95/95 - 70s - loss: 0.2258 - accuracy: 0.9443 - f1_score: 0.4131 - precision: 0.2605 - recall: 0.9985 - 70s/epoch - 735ms/step


In [112]:
regnety002.save("models/regnety002_no_weights")



INFO:tensorflow:Assets written to: models/regnety002_no_weights\assets


INFO:tensorflow:Assets written to: models/regnety002_no_weights\assets


# RegNetY004

In [12]:
model_regnety004 = tf.keras.applications.regnet.RegNetY004(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnety004 = keras.Sequential([
    model_regnety004,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnety004.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnety004 = regnety004.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [13]:
metric_calc(regnety004)

95/95 - 19s - 19s/epoch - 195ms/step


(0.9321724709784411,
 0.9333378687274676,
 0.9321724709784411,
 0.9320707801421217)

In [14]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnety004.evaluate(test_batches, verbose=2)

95/95 - 16s - loss: 0.3048 - accuracy: 0.9322 - f1_score: 0.3705 - precision: 0.2276 - recall: 0.9969 - 16s/epoch - 166ms/step


In [15]:
regnety004.save("models/regnety004_no_weights")



INFO:tensorflow:Assets written to: models/regnety004_no_weights\assets


INFO:tensorflow:Assets written to: models/regnety004_no_weights\assets


# RegNetY006

In [113]:
model_regnety006 = tf.keras.applications.regnet.RegNetY006(input_shape=input_shape+(3,), include_top=False, 
                                               include_preprocessing=False, weights=None)

regnety006 = keras.Sequential([
    model_regnety006,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(len(labels))                
])

regnety006.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy', f1_score, precision, recall])

history_regnety006 = regnety006.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [114]:
metric_calc(regnety006)

95/95 - 79s - 79s/epoch - 829ms/step


(0.9587064676616915,
 0.9587201360290123,
 0.9587064676616915,
 0.9586563954167945)

In [115]:
test_loss, test_acc, test_f1, test_prec, test_rec = regnety006.evaluate(test_batches, verbose=2)

95/95 - 77s - loss: 0.1583 - accuracy: 0.9587 - f1_score: 0.3795 - precision: 0.2342 - recall: 0.9990 - 77s/epoch - 811ms/step


In [116]:
regnety006.save("models/regnety006_no_weights")



INFO:tensorflow:Assets written to: models/regnety006_no_weights\assets


INFO:tensorflow:Assets written to: models/regnety006_no_weights\assets


# ShuffleNet

In [27]:
# https://github.com/Machine-Learning-Tokyo/CNN-Architectures/tree/master/Implementations/ShuffleNet
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, \
     Dense, Concatenate, Add, ReLU, BatchNormalization, AvgPool2D, \
     MaxPool2D, GlobalAvgPool2D, Reshape, Permute, Lambda
    
def stage(x, channels, repetitions, groups):
    x = shufflenet_block(x, channels=channels, strides=2, groups=groups)
    for i in range(repetitions):
        x = shufflenet_block(x, channels=channels, strides=1, groups=groups)
    return x


def shufflenet_block(tensor, channels, strides, groups):
    x = gconv(tensor, channels=channels // 4, groups=groups)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = channel_shuffle(x, groups)
    x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same')(x)
    x = BatchNormalization()(x)

    if strides == 2:
        channels = channels - tensor.get_shape().as_list()[-1]
    x = gconv(x, channels=channels, groups=groups)
    x = BatchNormalization()(x)

    if strides == 1:
        x = Add()([tensor, x])
    else:
        avg = AvgPool2D(pool_size=3, strides=2, padding='same')(tensor)
        x = Concatenate()([avg, x])

    output = ReLU()(x)
    return output

def gconv(tensor, channels, groups):
    input_ch = tensor.get_shape().as_list()[-1]
    group_ch = input_ch // groups
    output_ch = channels // groups
    groups_list = []

    for i in range(groups):
        # group_tensor = tensor[:, :, :, i * group_ch: (i+1) * group_ch]
        group_tensor = Lambda(lambda x: x[:, :, :, i * group_ch: (i+1) * group_ch])(tensor)
        group_tensor = Conv2D(output_ch, 1)(group_tensor)
        groups_list.append(group_tensor)

    output = Concatenate()(groups_list)
    return output


def channel_shuffle(x, groups):  
    _, width, height, channels = x.get_shape().as_list()
    group_ch = channels // groups

    x = Reshape([width, height, group_ch, groups])(x)
    x = Permute([1, 2, 4, 3])(x)
    x = Reshape([width, height, channels])(x)
    return x

def shufflenet_model(input_shape, num_classes):

    input = Input(input_shape)
    x = Conv2D(filters=24, kernel_size=3, strides=2, padding='same')(input)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPool2D(pool_size=3, strides=2, padding='same')(x)

    repetitions = 3, 7, 3
    initial_channels = 384
    groups = 8

    for i, reps in enumerate(repetitions):
        channels = initial_channels * (2**i)
        x = stage(x, channels, reps, groups)

    x = GlobalAvgPool2D()(x)
    output = Dense(num_classes, activation='softmax')(x)

    from tensorflow.keras import Model
    model = Model(input, output)

    return model

In [28]:
shufflenet = shufflenet_model(input_shape+(3,), len(labels))

shufflenet.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9, decay=1e-2/epoch),
              loss=keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', f1_score, precision, recall])

history_shufflenet = shufflenet.fit(train_batches,
                    epochs=epoch,
                    validation_data=val_batches, 
                    verbose=1,
                    shuffle=True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [29]:
test_loss, test_acc, test_f1, test_prec, test_rec = shufflenet.evaluate(test_batches, verbose=2)

95/95 - 29s - loss: 0.3032 - accuracy: 0.9182 - f1_score: 0.9186 - precision: 0.9259 - recall: 0.9114 - 29s/epoch - 303ms/step


In [30]:
shufflenet.save("models/shufflenet_no_weights")



INFO:tensorflow:Assets written to: models/shufflenet_no_weights\assets


INFO:tensorflow:Assets written to: models/shufflenet_no_weights\assets
