In [11]:
from __future__ import print_function
# To run on gpu
import os    
os.environ['THEANO_FLAGS'] = "device=gpu0"
import numpy as np
np.random.seed(1337)  # for reproducibility
from sklearn.cluster import KMeans
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import *
from keras import callbacks
from keras.regularizers import l2, activity_l2
import keras
from pprint import *
%matplotlib inline
import matplotlib.pyplot as plt

In [12]:
opt = {
    'batch_size': 256,
    'nb_classes': 10,
    'nb_epoch':5,
    'data_augmentation': False,
    'lr':0.01,
    'momentum':0.9,
    'decay':1e-6,
    'nesterov':True,
    'init':'normal',
    'optimizer':'sgd',
    'loss':'categorical_crossentropy',
    'img_rows':28,
    'img_cols':28
}

pprint(opt)

{'batch_size': 256,
 'data_augmentation': False,
 'decay': 1e-06,
 'img_cols': 28,
 'img_rows': 28,
 'init': 'normal',
 'loss': 'categorical_crossentropy',
 'lr': 0.01,
 'momentum': 0.9,
 'nb_classes': 10,
 'nb_epoch': 5,
 'nesterov': True,
 'optimizer': 'sgd'}


In [3]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], 1, opt['img_rows'], opt['img_cols'])
X_test = X_test.reshape(X_test.shape[0], 1, opt['img_rows'], opt['img_cols'])
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, opt['nb_classes'])
Y_test = np_utils.to_categorical(y_test, opt['nb_classes'])

X_train shape: (60000, 1, 28, 28)
60000 train samples
10000 test samples


In [4]:
def plotLogs(stats,name):
    epochs=list(range(1,opt['nb_epoch']+1))
    train_acc= stats['acc']
    train_loss= stats['loss']
    test_acc= stats['val_acc']
    test_loss= stats['val_loss']

    acc_plot = plt.plot()
    plt.plot(epochs, train_acc,'b-',label='Training Accuracy')
    plt.plot(epochs, test_acc, 'r-',label='Testing Accuracy')
    plt.title('Progression of Accuracy for '+name)
    plt.xlabel('No. of Epochs')
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

    loss_plot = plt.plot()
    plt.plot(epochs, train_loss,'b-',label='Training Loss') 
    plt.plot(epochs, test_loss, 'r-',label='Testing Loss')
    plt.title('Progression of Loss for '+name)
    plt.xlabel('No. of Epochs')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.show()

In [5]:
def reset_regularized_model(init_type,opt):
    model = Sequential()

    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(1, opt['img_rows'], opt['img_cols']), init= init_type ,W_regularizer=l2(0.01)))
    model.add(Activation('relu'))
    model.add(Convolution2D(32, 3, 3, init=  init_type,W_regularizer=l2(0.01)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(64, 3, 3, border_mode='same',init= init_type,W_regularizer=l2(0.01)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, init=init_type,W_regularizer=l2(0.01)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512,init=init_type,W_regularizer=l2(0.01)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(opt['nb_classes'], init=init_type,W_regularizer=l2(0.01)))
    model.add(Activation('softmax'))
    
    optim = SGD(momentum=opt['momentum'])

    return optim,model

In [6]:
init_type=opt['init']
optim,model = reset_regularized_model(init_type,opt)

In [7]:
optim,model = reset_regularized_model(init_type,opt)
model.compile(loss=opt['loss'],
                  optimizer=optim,
                  metrics=['accuracy'])

stats= model.fit(X_train, Y_train,
              batch_size=opt['batch_size'],
              nb_epoch=opt['nb_epoch'],
              validation_data=(X_test, Y_test),
              shuffle=True)

stats= stats.history

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test accuracy: 0.9779


In [9]:
print(model.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_5 (Convolution2D)  (None, 32, 28, 28)    320         convolution2d_input_2[0][0]      
____________________________________________________________________________________________________
activation_7 (Activation)        (None, 32, 28, 28)    0           convolution2d_5[0][0]            
____________________________________________________________________________________________________
convolution2d_6 (Convolution2D)  (None, 32, 26, 26)    9248        activation_7[0][0]               
____________________________________________________________________________________________________
activation_8 (Activation)        (None, 32, 26, 26)    0           convolution2d_6[0][0]            
___________________________________________________________________________________________

In [14]:
# Define the parameters for kmeans

kmeans = KMeans(n_clusters=256, random_state=0,  max_iter=500, precompute_distances='auto', verbose=0)


In [15]:
def non_uniform_quantize(weight):
    print("----------------------------------------------------")
    print("Number of unique parameters before quantization: " + str(len(np.unique(weight))))

    kmeans_fit = kmeans.fit(weight)
    clusters = kmeans_fit.cluster_centers_
    
    for i in range(0,len(weight)):
        dist= (clusters-weight[i])**2     
        weight[i]=clusters[np.argmin(dist)]
        
    print("Number of unique parameters after quantization: " + str(len(np.unique(weight))))
    
    return weight  

In [16]:
#Make Sure the model has been trained before accumulating the weights

mnist_weights = model.get_weights()

for i in range(len(mnist_weights)):
    weight = mnist_weights[i]
    dims = weight.shape
    weight = weight.flatten()
    weight = uniform_quantize(weight)
    weight=np.reshape(weight,dims)
    mnist_weights[i]=weight
    
model.set_weights(mnist_weights)

----------------------------------------------------
Number of unique parameters before quantization: 288
Number of unique parameters after quantization: 91
----------------------------------------------------
Number of unique parameters before quantization: 32
Number of unique parameters after quantization: 28
----------------------------------------------------
Number of unique parameters before quantization: 9216
Number of unique parameters after quantization: 207
----------------------------------------------------
Number of unique parameters before quantization: 32
Number of unique parameters after quantization: 28
----------------------------------------------------
Number of unique parameters before quantization: 18429
Number of unique parameters after quantization: 228
----------------------------------------------------
Number of unique parameters before quantization: 64
Number of unique parameters after quantization: 52
----------------------------------------------------
Num

In [17]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test accuracy: 0.9779
