In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
# https://keras.io/
!pip install -q keras
import keras as k
print (k.__version__)
import tensorflow as tf
print (tf.__version__)

2.1.6
1.7.0


Using TensorFlow backend.


In [0]:
import keras
from keras.datasets import cifar10
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Concatenate
from keras.optimizers import Adam
import numpy as np

In [0]:
# this part will prevent tensorflow to allocate all the avaliable GPU Memory
# backend
import tensorflow as tf
from keras import backend as k

# Don't pre-allocate memory; allocate as-needed
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

# Create a session with the above options specified.
k.tensorflow_backend.set_session(tf.Session(config=config))

In [0]:
# Hyperparameters
batch_size = 32
num_classes = 10
epochs = 30
l = 6
num_filter = 128
compression = 0.5
dropout_rate = 0.5
filepath = "model_best_densenet.hdf5"

In [6]:
# Load CIFAR10 Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
img_height, img_width, channel = x_train.shape[1],x_train.shape[2],x_train.shape[3]

# convert to one hot encoing 
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [7]:
#take random sample of 50% of train data for training
np.random.seed(1117)
shuffle_indices = np.random.permutation(np.arange(len(y_train)))
xtr_shuffled, ytr_shuffled = x_train[shuffle_indices], y_train[shuffle_indices]
print (np.shape(xtr_shuffled), np.shape(ytr_shuffled))

#take random sample of 50% of test data for validation
shuffle_indices = np.random.permutation(np.arange(len(y_test)))
xte_shuffled, yte_shuffled = x_test[shuffle_indices], y_test[shuffle_indices]
print (np.shape(xte_shuffled), np.shape(yte_shuffled))

(50000, 32, 32, 3) (50000, 10)
(10000, 32, 32, 3) (10000, 10)


In [0]:
# Dense Block
def add_denseblock(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    temp = input
    for _ in range(l):
        BatchNorm = BatchNormalization()(temp)
        relu = Activation('relu')(BatchNorm)
        Conv2D_3_3 = Conv2D(int(num_filter*compression), (3,3), use_bias=False ,padding='same')(relu)
        if dropout_rate>0:
          Conv2D_3_3 = Dropout(dropout_rate)(Conv2D_3_3)
        concat = Concatenate(axis=-1)([temp,Conv2D_3_3])
        
        temp = concat
        
    return temp

In [9]:
!ls -lah

total 44K
drwxr-xr-x 1 root root 4.0K May  7 15:34 .
drwxr-xr-x 1 root root 4.0K May  7 15:32 ..
drwx------ 4 root root 4.0K May  7 15:33 .cache
drwxr-xr-x 3 root root 4.0K May  7 15:33 .config
drwxr-xr-x 1 root root 4.0K Apr 30 16:29 datalab
drwxr-xr-x 4 root root 4.0K May  7 15:33 .forever
drwxr-xr-x 5 root root 4.0K May  7 15:33 .ipython
drwxr-xr-x 3 root root 4.0K May  7 15:34 .keras
drwx------ 3 root root 4.0K May  7 15:33 .local
drwx------ 3 root root 4.0K May  7 15:34 .nv
-rw------- 1 root root 1.0K May  7 15:33 .rnd


In [0]:
def add_transition(input, num_filter = 12, dropout_rate = 0.2):
    global compression
    BatchNorm = BatchNormalization()(input)
    relu = Activation('relu')(BatchNorm)
    Conv2D_BottleNeck = Conv2D(int(num_filter*compression), (1,1), use_bias=False ,padding='same')(relu)
    if dropout_rate>0:
      Conv2D_BottleNeck = Dropout(dropout_rate)(Conv2D_BottleNeck)
    avg = AveragePooling2D(pool_size=(2,2))(Conv2D_BottleNeck)
    
    return avg

In [0]:
def output_layer(input):
    global compression
    BatchNorm = BatchNormalization()(input)
    relu = Activation('relu')(BatchNorm)
    AvgPooling = AveragePooling2D(pool_size=(2,2))(relu)
    flat = Flatten()(AvgPooling)
    output = Dense(num_classes, activation='softmax')(flat)
    
    return output

In [0]:
num_filter = 128
dropout_rate = 0.3
l = 6
input = Input(shape=(img_height, img_width, channel,))
First_Conv2D = Conv2D(num_filter, (7,7), use_bias=False ,padding='same')(input)

First_Block = add_denseblock(First_Conv2D, num_filter, dropout_rate)
First_Transition = add_transition(First_Block, num_filter, dropout_rate)

Second_Block = add_denseblock(First_Transition, num_filter, dropout_rate)
Second_Transition = add_transition(Second_Block, num_filter, dropout_rate)

Third_Block = add_denseblock(Second_Transition, num_filter, dropout_rate)
Third_Transition = add_transition(Third_Block, num_filter, dropout_rate)

Fourth_Block = add_denseblock(Third_Transition, num_filter, dropout_rate)
Fourth_Transition = add_transition(Fourth_Block, num_filter, dropout_rate)

Last_Block = add_denseblock(Fourth_Transition,  num_filter, dropout_rate)
output = output_layer(Last_Block)

In [13]:
model = Model(inputs=[input], outputs=[output])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 128)  18816       input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 128)  512         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 128)  0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [0]:
import sys,os
if os.path.exists(filepath):
    #reload the previously trained model for better performance
    model.load_weights(filepath)

# determine Loss function and Optimizer
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [0]:
from keras.callbacks import ModelCheckpoint
bestModel = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

In [17]:
model.fit(xtr_shuffled, ytr_shuffled,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(xte_shuffled, yte_shuffled),callbacks=[bestModel], verbose=0)


Epoch 00001: val_acc improved from -inf to 0.40040, saving model to model_best_densenet.hdf5

Epoch 00002: val_acc improved from 0.40040 to 0.64120, saving model to model_best_densenet.hdf5

Epoch 00003: val_acc did not improve from 0.64120

Epoch 00004: val_acc did not improve from 0.64120

Epoch 00005: val_acc improved from 0.64120 to 0.69810, saving model to model_best_densenet.hdf5

Epoch 00006: val_acc improved from 0.69810 to 0.75850, saving model to model_best_densenet.hdf5

Epoch 00007: val_acc did not improve from 0.75850

Epoch 00008: val_acc did not improve from 0.75850

Epoch 00009: val_acc improved from 0.75850 to 0.80500, saving model to model_best_densenet.hdf5

Epoch 00010: val_acc did not improve from 0.80500

Epoch 00011: val_acc did not improve from 0.80500

Epoch 00012: val_acc improved from 0.80500 to 0.82330, saving model to model_best_densenet.hdf5

Epoch 00013: val_acc did not improve from 0.82330

Epoch 00014: val_acc improved from 0.82330 to 0.85570, saving m

<keras.callbacks.History at 0x7f5a7afce8d0>

In [18]:
# Test the model
if os.path.exists(filepath):
  model.load_weights(filepath)
else: 
  raise SystemError('please perform training before testing the model.')  
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.4388419786572456
Test accuracy: 0.8806
