In [1]:
from __future__ import absolute_import, division, print_function
import os, sys, json
current_path = os.path.abspath('.')
parent_path = os.path.dirname(current_path)
sys.path.append(parent_path)
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torchvision import datasets, transforms
from tqdm import tqdm
from torchinfo import summary

from src.data_loader import *
from src.utils import *

from src.models.simple_cnn import *
from src.models.resnet_models import *
from src.models.dscnn import *
from train_joint import *

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

############################################
########## Plot Style Declaration ##########
# Set the style globally
# Alternatives include bmh, fivethirtyeight, ggplot,
# dark_background, seaborn-deep, etc
# plt.style.use('ggplot')
plt.style.use('seaborn-white')

plt.rcParams['font.family'] = 'times new roman'
# plt.rcParams['font.serif'] = 'Ubuntu'
# plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 15
plt.rcParams['axes.labelsize'] = 15
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['figure.titlesize'] = 15
plt.rcParams['lines.linewidth'] = 3
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

##################################
########## End of Setup ##########

import tensorflow as tf
from tensorflow import keras
#import tensorflow_addons as tfa
# from tensorflow.keras import layers

In [2]:
class ArgClass():
    def __init__(self):
        super(ArgClass, self).__init__()

args = ArgClass()
args.dataset="cifar10"
args.model_arch = "cnn"
args.batch_size=128
args.test_batch_size=1000
args.percent=[0.8, 0.92, 0.991, 0.93]
args.alpha=5e-4
args.rho=1e-2
args.l1=False
args.l2=False
args.num_pre_epochs=3
args.num_epochs=10
args.num_re_epochs=3
args.lr=1e-3
args.adam_epsilon=1e-8
args.no_cuda=False
args.seed=1
args.save_model=False
args.shuffle=True

args.optimizer_name = 'adam'
args.lr_mode = 'multistep'
args.lr_decay = 0.1
args.lr_decay_epoch = '20,40'
args.target_lr = 1e-8
args.warmup_epochs = 0
args.warmup_lr = 1e-8
args.warmup_mode = 'linear'
args.momentum = 0.9
args.wd = 0.0001
args.gamma_wd_mult = 1.0
args.beta_wd_mult = 1.0
args.bias_wd_mult = 1.0
args.grad_clip = None
args.label_smoothing = False


args.test_fold_l = '[10]'
args.use_one_task = 'false'
args.exp_setup = ''
args.subject_idx = None
args.pretrained=False
args.best_acc = 200.0
args.mixup = False
args.mixup_alpha = 1.0
args.mixup_epoch_tail = 10
args.session = 1
args.test_vote = None

use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
random.seed(args.seed)
np.random.seed(args.seed)
tf.random.set_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Setting seed for reproducibiltiy


In [3]:
device

device(type='cuda')

In [4]:
print(tf.__version__)
print(keras.__version__)

2.3.1
2.4.0


In [3]:
# MicroNet-AD-K
init_block_channel = 168
init_block_stride = (1,1)
channels = [[192],[192],[192],[192],[192]]
# channels = [[276],[276],[276],[276],[276]]
strides = [2, 1, 1, 2, 2]

def get_pool_size(strides, config):
    pool_size = None
    for stride in strides:
        if stride > 1:
            if pool_size is None:
                pool_size=[np.ceil(config["seq"]/2),np.ceil(config["features"]/2)]
            else:
                pool_size = [np.ceil(pool_size[0]/2),np.ceil(pool_size[1]/2)]
    # return pool_size
    return [np.ceil(pool_size[0]/2),np.ceil(pool_size[1]/2)]


In [4]:
print(tf.keras.backend.image_data_format())
# force channels-first ordering
# tf.keras.backend.set_image_data_format('channels_first')
# print(tf.keras.backend.image_data_format())
# bn_axis=1
# channel_ordering='channels_first'
# force channels-last ordering
tf.keras.backend.set_image_data_format('channels_last')
print(tf.keras.backend.image_data_format())
bn_axis=-1
channel_ordering='channels_last'

channels_last
channels_last


In [28]:
# from keras.datasets import cifar10
# load train and test dataset
def load_dataset():
    # load data
    (trainX, trainY), (testX, testY) = keras.datasets.cifar10.load_data()
    return trainX, trainY, testX, testY

def scale(train, test):
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')

    # normalize
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    
    mean_rgb = (0.4914, 0.4822, 0.4465)
    std_rgb = (0.24703233,0.24348505,0.26158768)
    for i in range(3):
        train_norm[:,:,:,i] = (train_norm[:,:,:,i]-mean_rgb[i])/std_rgb[i]
        test_norm[:,:,:,i] = (test_norm[:,:,:,i]-mean_rgb[i])/std_rgb[i]
    
    return train_norm, test_norm

train_X, train_y, test_X, test_y = load_dataset()

train_X, test_X = scale(train_X, test_X)


In [5]:
# model
import tensorflow_addons as tfa
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)
initializer = tf.keras.initializers.HeNormal()
args.config = DATASET_CONFIGS[args.dataset]
print(args.config)
init_block_kernel=(3,3)


tf.keras.backend.clear_session()
model3 = tf.keras.models.Sequential([
# init_block
  tf.keras.layers.Input(shape=[args.config["seq"],args.config["features"],args.config["in_channels"]]),
  tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.02),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
  tf.keras.layers.Conv2D(filters=init_block_channel,kernel_size=init_block_kernel,
                        strides=init_block_stride,padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 1st layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[0],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[0][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 2nd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[1],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[1][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 3rd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[2],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[2][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 4th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[3],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[3][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 5th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[4],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[4][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# Note: Add dropout layer after all blocks and before pooling
  tf.keras.layers.Dropout(rate=0.4),
  tf.keras.layers.AveragePooling2D(pool_size=get_pool_size(strides, args.config),data_format=channel_ordering),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(units=args.config["classes"],
                       kernel_initializer=initializer)
])

model3.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

{'in_channels': 3, 'features': 32, 'seq': 32, 'classes': 10}


In [6]:
model3.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resizing (Resizing)          (None, 32, 32, 3)         0         
_________________________________________________________________
random_flip (RandomFlip)     (None, 32, 32, 3)         0         
_________________________________________________________________
random_rotation (RandomRotat (None, 32, 32, 3)         0         
_________________________________________________________________
random_zoom (RandomZoom)     (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 32, 168)       4536      
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 168)       672       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 168)       0

In [20]:
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=128,
      epochs=80,
      validation_data=(test_X, test_y)
)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80


Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [21]:
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0001, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=128,
      epochs=20,
      validation_data=(test_X, test_y)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
# model
import tensorflow_addons as tfa
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)
initializer = tf.keras.initializers.HeNormal()
args.config = DATASET_CONFIGS[args.dataset]
print(args.config)
init_block_kernel=(3,3)
mmt=0.1

tf.keras.backend.clear_session()
model3 = tf.keras.models.Sequential([
# init_block
  tf.keras.layers.Input(shape=[args.config["seq"],args.config["features"],args.config["in_channels"]]),
  tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.02),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
  tf.keras.layers.Conv2D(filters=init_block_channel,kernel_size=init_block_kernel,
                        strides=init_block_stride,padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 1st layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[0],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[0][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 2nd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[1],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[1][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 3rd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[2],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[2][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 4th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[3],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[3][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 5th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[4],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[4][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# Note: Add dropout layer after all blocks and before pooling
  tf.keras.layers.Dropout(rate=0.4),
  tf.keras.layers.AveragePooling2D(pool_size=get_pool_size(strides, args.config),data_format=channel_ordering),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(units=args.config["classes"],
                       kernel_initializer=initializer)
])

model3.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

{'in_channels': 3, 'features': 32, 'seq': 32, 'classes': 10}


In [None]:
model3.summary()

In [17]:
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=80,
      validation_data=(test_X, test_y)
)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80


Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [18]:
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0005, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=1,
      validation_data=(test_X, test_y)
)



In [19]:
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0001, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=2,
      validation_data=(test_X, test_y)
)

Epoch 1/2
Epoch 2/2


In [8]:
# model
import tensorflow_addons as tfa
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)
initializer = tf.keras.initializers.HeNormal()
args.config = DATASET_CONFIGS[args.dataset]
print(args.config)
init_block_kernel=(3,3)
mmt=0.2

tf.keras.backend.clear_session()
model3 = tf.keras.models.Sequential([
# init_block
  tf.keras.layers.Input(shape=[args.config["seq"],args.config["features"],args.config["in_channels"]]),
  tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.02),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
  tf.keras.layers.Conv2D(filters=init_block_channel,kernel_size=init_block_kernel,
                        strides=init_block_stride,padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 1st layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[0],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[0][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 2nd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[1],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[1][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 3rd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[2],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[2][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 4th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[3],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[3][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 5th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[4],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[4][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# Note: Add dropout layer after all blocks and before pooling
  tf.keras.layers.Dropout(rate=0.4),
  tf.keras.layers.AveragePooling2D(pool_size=get_pool_size(strides, args.config),data_format=channel_ordering),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(units=args.config["classes"],
                       kernel_initializer=initializer)
])

model3.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

{'in_channels': 3, 'features': 32, 'seq': 32, 'classes': 10}


In [9]:
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=80,
      validation_data=(test_X, test_y)
)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [10]:
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0005, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=1,
      validation_data=(test_X, test_y)
)



In [11]:
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0001, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=2,
      validation_data=(test_X, test_y)
)

Epoch 1/2
Epoch 2/2


In [12]:
# model
import tensorflow_addons as tfa
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)
initializer = tf.keras.initializers.HeNormal()
args.config = DATASET_CONFIGS[args.dataset]
print(args.config)
init_block_kernel=(3,3)
mmt=0.2

tf.keras.backend.clear_session()
model4 = tf.keras.models.Sequential([
# init_block
  tf.keras.layers.Input(shape=[args.config["seq"],args.config["features"],args.config["in_channels"]]),
  tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.02),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
  tf.keras.layers.Conv2D(filters=init_block_channel,kernel_size=init_block_kernel,
                        strides=init_block_stride,padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 1st layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[0],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[0][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 2nd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[1],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[1][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 3rd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[2],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[2][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 4th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[3],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[3][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 5th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[4],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[4][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# Note: Add dropout layer after all blocks and before pooling
  tf.keras.layers.Dropout(rate=0.4),
  tf.keras.layers.AveragePooling2D(pool_size=get_pool_size(strides, args.config),data_format=channel_ordering),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(units=args.config["classes"],
                       kernel_initializer=initializer)
])

model4.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

{'in_channels': 3, 'features': 32, 'seq': 32, 'classes': 10}


In [13]:
history = model4.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=80,
      validation_data=(test_X, test_y)
)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80


Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [14]:
model4.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0005, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model4.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=1,
      validation_data=(test_X, test_y)
)



In [15]:
model4.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0001, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model4.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=2,
      validation_data=(test_X, test_y)
)

Epoch 1/2
Epoch 2/2


# new arch

In [31]:
# MicroNet-AD-K
init_block_channel = 168
init_block_stride = (1,1)
# channels = [[192],[192],[192],[192],[192]]
channels = [[276],[276],[276],[276],[276]]
strides = [2, 1, 1, 2, 2]

def get_pool_size(strides, config):
    pool_size = None
    for stride in strides:
        if stride > 1:
            if pool_size is None:
                pool_size=[np.ceil(config["seq"]/2),np.ceil(config["features"]/2)]
            else:
                pool_size = [np.ceil(pool_size[0]/2),np.ceil(pool_size[1]/2)]
    # return pool_size
    return [np.ceil(pool_size[0]/2),np.ceil(pool_size[1]/2)]


In [33]:
# model
import tensorflow_addons as tfa
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)
initializer = tf.keras.initializers.HeNormal()
args.config = DATASET_CONFIGS[args.dataset]
print(args.config)
init_block_kernel=(3,3)
mmt=0.1

tf.keras.backend.clear_session()
model3 = tf.keras.models.Sequential([
# init_block
  tf.keras.layers.Input(shape=[args.config["seq"],args.config["features"],args.config["in_channels"]]),
  tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.02),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
  tf.keras.layers.Conv2D(filters=init_block_channel,kernel_size=init_block_kernel,
                        strides=init_block_stride,padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 1st layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[0],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[0][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 2nd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[1],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[1][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 3rd layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[2],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[2][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 4th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[3],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[3][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# 5th layer of separable depthwise conv2d
  tf.keras.layers.DepthwiseConv2D(depth_multiplier=1,kernel_size=(3,3),
                        strides=strides[4],padding='same',use_bias=False,
                                 kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=0.1,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.Conv2D(filters=channels[4][0],kernel_size=(1,1),
                        padding='same',use_bias=False,
                        kernel_initializer=initializer),
  tf.keras.layers.BatchNormalization(axis=bn_axis,momentum=mmt,epsilon=0.00001),
  tf.keras.layers.Activation('relu'),
# Note: Add dropout layer after all blocks and before pooling
  tf.keras.layers.Dropout(rate=0.4),
  tf.keras.layers.AveragePooling2D(pool_size=get_pool_size(strides, args.config),data_format=channel_ordering),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(units=args.config["classes"],
                       kernel_initializer=initializer)
])

#
model3.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=50,
      validation_data=(test_X, test_y)
)
#
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0005, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=1,
      validation_data=(test_X, test_y)
)
#
model3.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.0001, weight_decay=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
history = model3.fit(
      x=train_X,
      y=train_y,
      batch_size=32,
      epochs=2,
      validation_data=(test_X, test_y)
)

{'in_channels': 3, 'features': 32, 'seq': 32, 'classes': 10}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50


Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/2
Epoch 2/2


In [23]:
model3.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resizing (Resizing)          (None, 32, 32, 3)         0         
_________________________________________________________________
random_flip (RandomFlip)     (None, 32, 32, 3)         0         
_________________________________________________________________
random_rotation (RandomRotat (None, 32, 32, 3)         0         
_________________________________________________________________
random_zoom (RandomZoom)     (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 32, 168)       4536      
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 168)       672       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 168)       0