1.  Please visit this link to access the state-of-art DenseNet code for reference - DenseNet - cifar10 notebook link
2.  You need to create a copy of this and "retrain" this model to achieve 90+ test accuracy. 
3.  You cannot use DropOut layers.
4.  You MUST use Image Augmentation Techniques.
5.  You cannot use an already trained model as a beginning points, you have to initilize as your own
6.  You cannot run the program for more than 300 Epochs, and it should be clear from your log, that you have only used 300 Epochs
7.  You cannot use test images for training the model.
8.  You cannot change the general architecture of DenseNet (which means you must use Dense Block, Transition and Output blocks as mentioned in the code)
9.  You are free to change Convolution types (e.g. from 3x3 normal convolution to Depthwise Separable, etc)
10. You cannot have more than 1 Million parameters in total
11. You are free to move the code from Keras to Tensorflow, Pytorch, MXNET etc. 
12. You can use any optimization algorithm you need. 
13. You can checkpoint your model and retrain the model from that checkpoint so that no need of training the model from first if you lost at any epoch while training. You can directly load that model and Train from that epoch. 

In [1]:
# import keras
# from keras.datasets import cifar10
# from keras.models import Model, Sequential
# from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
# from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
# from keras.layers import Concatenate
# from keras.optimizers import A`dam
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from skimage.data import chelsea,astronaut
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)


In [2]:
tf.keras.backend.clear_session()

In [3]:
# Hyperparameters


num_classes = 10
num_filter = 12
compression = 0.5

In [4]:
# Load CIFAR10 Data
(X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.cifar10.load_data()
img_height, img_width, channel = X_train.shape[1],X_train.shape[2],X_train.shape[3]

#X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
#X_train, X_cv, y_train, y_cv = train_test_split(x_train, Y_train, test_size=0.2, random_state=42)




# convert to one hot encoing 
Y_train = tf.keras.utils.to_categorical(Y_train, num_classes)
Y_test = tf.keras.utils.to_categorical(Y_test, num_classes) 
#y_cv = tf.keras.utils.to_categorical(y_cv, num_classes) 

In [5]:
X_train.shape,len(Y_train)

((50000, 32, 32, 3), 50000)

In [6]:
X_test.shape

(10000, 32, 32, 3)

In [7]:
img_height, img_width, channel

(32, 32, 3)

In [8]:
# Dense Block
from tensorflow.keras import regularizers
def denseblock(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    temp = input
    for _ in range(l): 
        BatchNorm = layers.BatchNormalization()(temp)
        relu = layers.Activation('relu')(BatchNorm)
        Conv2D_3_3 = layers.Conv2D(int(num_filter*compression), (3,3), use_bias=False ,padding='same',
                                   kernel_initializer=tf.keras.initializers.HeNormal(),
                                   kernel_regularizer=regularizers.L2(0.0001)
                                  )(relu)
        #if dropout_rate>0:
        #    Conv2D_3_3 = layers.Dropout(dropout_rate)(Conv2D_3_3)
        concat = layers.Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        
    return temp

## transition Blosck
def transition(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = layers.Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same',
                                     kernel_initializer=tf.keras.initializers.HeNormal(),
                                      kernel_regularizer=regularizers.L2(0.0001)
                                     )(relu)
    #if dropout_rate>0:
    #     Conv2D_BottleNeck = layers.Dropout(dropout_rate)(Conv2D_BottleNeck)
    avg = layers.AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    return avg

#output layer
def output_layer(input):
    global compression
    BatchNorm = layers.BatchNormalization()(input)
    relu = layers.Activation('relu')(BatchNorm)
    AvgPooling = layers.AveragePooling2D(pool_size=(2,2),)(relu)
    flat = layers.Flatten()(AvgPooling)
    output = layers.Dense(num_classes, activation='softmax')(flat)
    return output

In [9]:
num_filter = 80
dropout_rate = 0.0
l = 12

from tensorflow.keras.layers import DepthwiseConv2D,SeparableConv2D,Conv2DTranspose,Conv3D,Conv3DTranspose


input = layers.Input(shape=(img_height, img_width, channel,))
First_Conv2D = layers.Conv2D(num_filter, (3,3),strides=(1,1), use_bias=False ,padding='same',activation = 'relu',
                             kernel_initializer=tf.keras.initializers.HeNormal(),kernel_regularizer=regularizers.L2(0.0001)
                            )(input)

#First_Conv2D = DepthwiseConv2D(kernel_size = (3,3), use_bias=False ,padding='same',activation = 'relu',
#                             depthwise_initializer=tf.keras.initializers.HeUniform(),
#                             depthwise_regularizer=regularizers.L2())(input)

#First_Conv2D = Conv2DTranspose(kernel_size = (10,10), filters = num_filter,activation = 'relu',
#                               use_bias=False ,padding='same',
#                               kernel_initializer=tf.keras.initializers.HeUniform(),
#                               kernel_regularizer=regularizers.L2())(input)


First_Block = denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = transition(First_Block, num_filter, dropout_rate)

Second_Block = denseblock(First_Transition, num_filter, dropout_rate)
Second_Transition = transition(Second_Block, num_filter, dropout_rate)

Third_Block = denseblock(Second_Transition, num_filter, dropout_rate)
Third_Transition = transition(Third_Block, num_filter, dropout_rate)

Last_Block = denseblock(Third_Transition,  num_filter, dropout_rate)
output = output_layer(Last_Block)

In [10]:
First_Block.shape,Second_Block.shape,Third_Block.shape,Last_Block.shape

(TensorShape([None, 32, 32, 560]),
 TensorShape([None, 16, 16, 520]),
 TensorShape([None, 8, 8, 520]),
 TensorShape([None, 4, 4, 520]))

In [11]:
#https://arxiv.org/pdf/1608.06993.pdf
#from IPython.display import IFrame, YouTubeVideo
#YouTubeVideo(id='-W6y8xnd--U', width=600)

In [12]:
model = Model(inputs=[input], outputs=[output])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 80)   2160        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 80)   320         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 80)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [13]:
print(len(model.layers))

211


In [14]:
epochs = 30
batch_size = 128
val_batch_size = 128
steps = len(Y_train)//batch_size
val_steps = len(Y_test)//val_batch_size

In [15]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
      width_shift_range = 0.1,height_shift_range = 0.1,#rescale=1./255.,
    horizontal_flip = True,rotation_range = 10,
    featurewise_center=True,
    featurewise_std_normalization=True,
    zoom_range = 0.2, shear_range = 10,
)
train_datagen.fit(X_train)

In [16]:
test_datagen = ImageDataGenerator(
      width_shift_range = 0.1,height_shift_range = 0.1,#rescale=1./255.,
    horizontal_flip = True,rotation_range = 10,
    featurewise_center=True,
    featurewise_std_normalization=True,
    zoom_range = 0.2, shear_range = 10,
)

test_datagen.fit(X_test)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from skimage.data import chelsea,astronaut
import matplotlib.pyplot as plt
import numpy as np

imgs = np.stack([X_train[1111] for i in range(4*4)], axis=0)#np.stack([astronaut() for i in range(4*4)], axis=0)

data_gen = ImageDataGenerator(
    width_shift_range = 0.1,height_shift_range = 0.1,#rescale=1./255.,
    fill_mode='nearest',validation_split=0.25,horizontal_flip = True,rotation_range = 90,
    preprocessing_function=lambda x: x[..., np.random.permutation([0, 1, 2])]
)
fig = plt.figure()
plt.subplots_adjust(wspace=.2, hspace=.2)
for index, image in enumerate(next(data_gen.flow(imgs)).astype(int)):
    ax = plt.subplot(4, 4, index + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(image)
plt.show()


In [17]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint('./DenseNet_cifar10.h5', save_weights_only=False,save_best_only=True, \
                                       mode='max', monitor='val_accuracy',verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=2,mode='max',verbose=1),
]

In [18]:
opt = tf.keras.optimizers.Adam()#SGD(learning_rate=0.1,momentum=0.9,)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [19]:
#model.compile(loss='categorical_crossentropy',
#              optimizer=tf.keras.optimizers.SGD(learning_rate = 0.1,momentum = 0.9,nesterov = True), #Adam(),
#              metrics=['accuracy'])
model.fit(train_datagen.flow(X_train, Y_train,),steps_per_epoch=steps,
          validation_data=test_datagen.flow(X_test, Y_test),validation_steps=val_steps,
          
          epochs=100,
          callbacks=callbacks,
         )


Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.43450, saving model to .\DenseNet_cifar10.h5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.43450 to 0.49279, saving model to .\DenseNet_cifar10.h5
Epoch 3/100

Epoch 00003: val_accuracy improved from 0.49279 to 0.52404, saving model to .\DenseNet_cifar10.h5
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.52404 to 0.59635, saving model to .\DenseNet_cifar10.h5
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.59635 to 0.64784, saving model to .\DenseNet_cifar10.h5
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.64784 to 0.68409, saving model to .\DenseNet_cifar10.h5
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.68409 to 0.72216, saving model to .\DenseNet_cifar10.h5
Epoch 8/100

Epoch 00008: val_accuracy improved from 0.72216 to 0.73357, saving model to .\DenseNet_cifar10.h5
Epoch 9/100

Epoch 00009: val_accuracy did not improve from 0.73357
Epoch 10/100

Epoch 00010: val_accuracy improve


Epoch 00035: val_accuracy did not improve from 0.89463
Epoch 36/100

Epoch 00036: val_accuracy did not improve from 0.89463

Epoch 00036: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 37/100

Epoch 00037: val_accuracy did not improve from 0.89463
Epoch 38/100

Epoch 00038: val_accuracy did not improve from 0.89463

Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-09.
Epoch 39/100

Epoch 00039: val_accuracy did not improve from 0.89463
Epoch 40/100

Epoch 00040: val_accuracy did not improve from 0.89463

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-10.
Epoch 41/100

Epoch 00041: val_accuracy did not improve from 0.89463
Epoch 42/100

Epoch 00042: val_accuracy did not improve from 0.89463

Epoch 00042: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-11.
Epoch 43/100

Epoch 00043: val_accuracy did not improve from 0.89463
Epoch 44/100

Epoch 00044: val_accuracy did not improve from 0.89463


Epoch 00069: val_accuracy did not improve from 0.89463
Epoch 70/100

Epoch 00070: val_accuracy did not improve from 0.89463

Epoch 00070: ReduceLROnPlateau reducing learning rate to 1.0000001181490946e-25.
Epoch 71/100

Epoch 00071: val_accuracy did not improve from 0.89463
Epoch 72/100

Epoch 00072: val_accuracy did not improve from 0.89463

Epoch 00072: ReduceLROnPlateau reducing learning rate to 1.0000001428009978e-26.
Epoch 73/100

Epoch 00073: val_accuracy did not improve from 0.89463
Epoch 74/100

Epoch 00074: val_accuracy did not improve from 0.89463

Epoch 00074: ReduceLROnPlateau reducing learning rate to 1.000000142800998e-27.
Epoch 75/100

Epoch 00075: val_accuracy did not improve from 0.89463
Epoch 76/100

Epoch 00076: val_accuracy did not improve from 0.89463

Epoch 00076: ReduceLROnPlateau reducing learning rate to 1.0000001235416984e-28.
Epoch 77/100

Epoch 00077: val_accuracy did not improve from 0.89463
Epoch 78/100

Epoch 00078: val_accuracy did not improve from 0.89

<keras.callbacks.History at 0x23f5ecbf100>

In [20]:
model.save('./MODEL')

INFO:tensorflow:Assets written to: ./MODEL\assets


In [17]:
from keras.models import load_model
m=load_model('./MODEL')

In [18]:

callbacks = [
    tf.keras.callbacks.ModelCheckpoint('./DenseNet_cifar10.h5', save_weights_only=False,save_best_only=True, \
                                       mode='max', monitor='val_accuracy',verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=2,mode='max',verbose=1),
]

opt = tf.keras.optimizers.Adam(learning_rate=0.0001)#SGD(learning_rate=0.1,momentum=0.9,)
m.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
m.fit(train_datagen.flow(X_train, Y_train,),steps_per_epoch=steps,
          validation_data=test_datagen.flow(X_test, Y_test),validation_steps=val_steps,
          
          epochs=100,
          callbacks=callbacks,
         )


Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.87059, saving model to .\DenseNet_cifar10.h5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.87059 to 0.88261, saving model to .\DenseNet_cifar10.h5
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.88261
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.88261 to 0.88982, saving model to .\DenseNet_cifar10.h5
Epoch 5/100

Epoch 00005: val_accuracy did not improve from 0.88982
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.88982

Epoch 00006: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.88982 to 0.89183, saving model to .\DenseNet_cifar10.h5
Epoch 8/100

Epoch 00008: val_accuracy did not improve from 0.89183
Epoch 9/100

Epoch 00009: val_accuracy did not improve from 0.89183

Epoch 00009: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 10/100

Epoch 00010: val_accuracy did not improve f


Epoch 00035: val_accuracy did not improve from 0.89784
Epoch 36/100

Epoch 00036: val_accuracy did not improve from 0.89784

Epoch 00036: ReduceLROnPlateau reducing learning rate to 9.999999424161285e-20.
Epoch 37/100

Epoch 00037: val_accuracy did not improve from 0.89784
Epoch 38/100

Epoch 00038: val_accuracy did not improve from 0.89784

Epoch 00038: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-21.
Epoch 39/100

Epoch 00039: val_accuracy did not improve from 0.89784
Epoch 40/100

Epoch 00040: val_accuracy did not improve from 0.89784

Epoch 00040: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-22.
Epoch 41/100

Epoch 00041: val_accuracy did not improve from 0.89784
Epoch 42/100

Epoch 00042: val_accuracy did not improve from 0.89784

Epoch 00042: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-23.
Epoch 43/100

Epoch 00043: val_accuracy did not improve from 0.89784
Epoch 44/100

Epoch 00044: val_accuracy did not improve from 0.89784


Epoch 00069: val_accuracy did not improve from 0.89784
Epoch 70/100

Epoch 00070: val_accuracy improved from 0.89784 to 0.89824, saving model to .\DenseNet_cifar10.h5
Epoch 71/100

Epoch 00071: val_accuracy did not improve from 0.89824
Epoch 72/100

Epoch 00072: val_accuracy did not improve from 0.89824

Epoch 00072: ReduceLROnPlateau reducing learning rate to 9.999999462560281e-37.
Epoch 73/100

Epoch 00073: val_accuracy did not improve from 0.89824
Epoch 74/100

Epoch 00074: val_accuracy did not improve from 0.89824

Epoch 00074: ReduceLROnPlateau reducing learning rate to 9.99999946256028e-38.
Epoch 75/100

Epoch 00075: val_accuracy did not improve from 0.89824
Epoch 76/100

Epoch 00076: val_accuracy did not improve from 0.89824

Epoch 00076: ReduceLROnPlateau reducing learning rate to 9.99999991097579e-39.
Epoch 77/100

Epoch 00077: val_accuracy did not improve from 0.89824
Epoch 78/100

Epoch 00078: val_accuracy did not improve from 0.89824

Epoch 00078: ReduceLROnPlateau reducin

KeyboardInterrupt: 

In [19]:
m.save('./MODEL2')

INFO:tensorflow:Assets written to: ./MODEL2\assets


In [17]:
from keras.models import load_model
m1=load_model('./MODEL2')

In [19]:

callbacks = [
    tf.keras.callbacks.ModelCheckpoint('./DenseNet_cifar10.h5', save_weights_only=False,save_best_only=True, \
                                       mode='max', monitor='val_accuracy',verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=2,mode='max',verbose=1),
]

opt = tf.keras.optimizers.Adam(learning_rate=0.000001)#SGD(learning_rate=0.1,momentum=0.9,)
m1.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
m1.fit(train_datagen.flow(X_train, Y_train,),steps_per_epoch=steps,
          validation_data=test_datagen.flow(X_test, Y_test),validation_steps=val_steps,
          epochs=100,
          callbacks=callbacks,
         )


Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.88662, saving model to .\DenseNet_cifar10.h5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.88662 to 0.89223, saving model to .\DenseNet_cifar10.h5
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.89223
Epoch 4/100

Epoch 00004: val_accuracy did not improve from 0.89223

Epoch 00004: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 5/100

Epoch 00005: val_accuracy did not improve from 0.89223
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.89223

Epoch 00006: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.89223 to 0.89263, saving model to .\DenseNet_cifar10.h5
Epoch 8/100

Epoch 00008: val_accuracy did not improve from 0.89263
Epoch 9/100

Epoch 00009: val_accuracy did not improve from 0.89263

Epoch 00009: ReduceLROnPlateau reducing learning rate to 9.999999939225292e-10.
Epoch 10/100

Epoc


Epoch 00035: val_accuracy did not improve from 0.89744
Epoch 36/100

Epoch 00036: val_accuracy did not improve from 0.89744

Epoch 00036: ReduceLROnPlateau reducing learning rate to 9.999999424161285e-20.
Epoch 37/100

Epoch 00037: val_accuracy did not improve from 0.89744
Epoch 38/100

Epoch 00038: val_accuracy did not improve from 0.89744

Epoch 00038: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-21.
Epoch 39/100

Epoch 00039: val_accuracy did not improve from 0.89744
Epoch 40/100

Epoch 00040: val_accuracy did not improve from 0.89744

Epoch 00040: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-22.
Epoch 41/100

Epoch 00041: val_accuracy did not improve from 0.89744
Epoch 42/100

Epoch 00042: val_accuracy did not improve from 0.89744

Epoch 00042: ReduceLROnPlateau reducing learning rate to 9.999999682655225e-23.
Epoch 43/100

Epoch 00043: val_accuracy did not improve from 0.89744
Epoch 44/100

Epoch 00044: val_accuracy did not improve from 0.89744


Epoch 00069: val_accuracy did not improve from 0.90865
Epoch 70/100

Epoch 00070: val_accuracy did not improve from 0.90865

Epoch 00070: ReduceLROnPlateau reducing learning rate to 9.999999319067318e-35.
Epoch 71/100

Epoch 00071: val_accuracy did not improve from 0.90865
Epoch 72/100

Epoch 00072: val_accuracy did not improve from 0.90865

Epoch 00072: ReduceLROnPlateau reducing learning rate to 9.999999319067319e-36.
Epoch 73/100

Epoch 00073: val_accuracy did not improve from 0.90865
Epoch 74/100

Epoch 00074: val_accuracy did not improve from 0.90865

Epoch 00074: ReduceLROnPlateau reducing learning rate to 9.999999462560281e-37.
Epoch 75/100

Epoch 00075: val_accuracy did not improve from 0.90865
Epoch 76/100

Epoch 00076: val_accuracy did not improve from 0.90865

Epoch 00076: ReduceLROnPlateau reducing learning rate to 9.99999946256028e-38.
Epoch 77/100

Epoch 00077: val_accuracy did not improve from 0.90865
Epoch 78/100

Epoch 00078: val_accuracy did not improve from 0.90865


<keras.callbacks.History at 0x1aefe27e040>