In [10]:
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation,ActivityRegularization
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt

In [11]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [12]:
# building the input vector from the 28x28 pixels
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

In [13]:
# normalizing the data to help with the training
X_train= X_train/255
X_test=X_test/255

In [14]:
Y_train = to_categorical(y_train)
Y_test = to_categorical(y_test)

In [15]:
#plotting function
def plotting(history):
  fig = plt.figure()
  history_dict = history.history
  print(history_dict.keys())
  plt.subplot(2,1,1)
  plt.plot(history_dict['accuracy'])
  plt.plot(history_dict['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['Training Set', 'Validation Set'], loc='lower right')

  plt.subplot(2,1,2)


  plt.plot( history_dict['loss'])
  plt.plot( history_dict['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['Training Set', 'Validation Set'], loc='upper right')

  plt.tight_layout()

In [None]:
import time

# building a linear stack of layers with the sequential model
#Type of changes
 

 
start_time = time.time()
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))                            
model.add(Dropout(0.2))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation('softmax'))

# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# training the model and saving metrics in history
history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

plotting(history)
loss, acc = model.evaluate(X_test, Y_test)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))
print("--- %s seconds ---" % (time.time() - start_time))

This is the basic Neural Network that we build.

In [None]:
#activation functions 
activationFunctions = ["sigmoid","relu","softmax","tanh"]
import time
t = []
l= []
a = []
for activation in activationFunctions:
  print(activation)
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation(activation))                            
  model.add(Dropout(0.2))

  model.add(Dense(512))
  model.add(Activation(activation))
  model.add(Dropout(0.2))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)


Here I am using different acctivation functions.

ActivationFunctions = ["sigmoid","relu","softmax","tanh"]

Relu is the best activation function for this dataset.

If we use softmax then accuracy is not that much high and network performance is bad.



In [None]:
#dropout 
ddropout = [0.2,0.3,0.5]
import time
t = []
l= []
a = []
for d in ddropout:
 
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(Dropout(d))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(Dropout(d))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

Changing dropout has no effect on accuracy but the time of execution is more for dropout = 0.3

In [None]:
#optimizer 
optimizer = ["Adam","SGD","RMSprop","Ftrl"]
import time
t = []
l= []
a = []
for o in optimizer:
 
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(Dropout(0.2))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(Dropout(0.2))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=o)
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)
print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

Optimizer has major effect on network performance if we use Ftrl optimizer we get very low accuracy of about only 11 percent.

In [None]:
#batchsize
batchsize = [128,250,500]
import time
t = []
l= []
a = []
for b in batchsize:
 
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(Dropout(0.2))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(Dropout(0.2))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=b, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

Changing batch size has major effect on execution time of our network. For batch size 500 time is just 16 second as compared to 25 second required for normal execution.

In [None]:
#epochs number
epochsnumber = [5,10,20]
import time
t = []
l= []
a = []
for e in epochsnumber:
 
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(Dropout(0.2))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(Dropout(0.2))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=e,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

Changing number of epochs in this does not produce any effect because model is already stable.

In [None]:
#weights initiation methods
from tensorflow.keras import layers
from tensorflow.keras import initializers
import tensorflow as tf
 
initiationmethods = [tf.keras.initializers.Ones(),tf.keras.initializers.Zeros(),tf.keras.initializers.RandomNormal(mean=0., stddev=1.)]
import time
t = []
l= []
a = []
for ii in initiationmethods:
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,),kernel_initializer=ii))
  model.add(Activation("relu"))                            
  model.add(Dropout(0.2))

  model.add(Dense(512,kernel_initializer=ii))
  model.add(Activation("relu"))
  model.add(Dropout(0.2))

  model.add(Dense(10,kernel_initializer=ii))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

This point is very important. If we are using same weights for network then it will not converge properly. For 0's and 1's it gives accuracy of about 11 and 10 percent respectively.

In [None]:
#regularization types
#l1 and l2 regularization
#l1
l1 = [0.02,0.05,0.1]
import time
t = []
l= []
a = []
for ll in l1:
 
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(ActivityRegularization(ll,0))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(ActivityRegularization(ll,0))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

For high l1 value like 0.05 or 1 model is giving very low accuracy which means it is unable to learn proper weights.

In [None]:
import time

#regularization types
#l1 and l2 regularization
#l2
l2 = [0.2,0.3,0.5]
import time
t = []
l= []
a = []
for ll in l2:
  start_time = time.time()
  model = Sequential()
  model.add(Dense(512, input_shape=(784,)))
  model.add(Activation("relu"))                            
  model.add(ActivityRegularization(0,ll))

  model.add(Dense(512))
  model.add(Activation("relu"))
  model.add(ActivityRegularization(0,ll))

  model.add(Dense(10))
  model.add(Activation("softmax"))

  # compiling the sequential model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
  # training the model and saving metrics in history
  history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

  #plotting(history)
  loss, acc = model.evaluate(X_test, Y_test)
  l.append(loss)
  a.append((100.0 * acc))
  t.append(time.time() - start_time)

print("loss")
print(l)
print("accuracy")
print(a)
print("Time for execution")
print(t)

For high l2 value like 0.03 or 0.05 model accuracy is decreasing.

In [None]:
import time
#add or remove hidden layers
#part1
start_time = time.time()
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))                            
model.add(Dropout(0.2))
 

model.add(Dense(10))
model.add(Activation('softmax'))

# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# training the model and saving metrics in history
history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

plotting(history)
loss, acc = model.evaluate(X_test, Y_test)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))
print("--- %s seconds ---" % (time.time() - start_time))

Using only one hidden layer reduces accuray by 0.1 percent.

In [None]:
import time
#add or remove hidden layers
#part2
start_time = time.time()
model = Sequential()
model.add(Dense(100, input_shape=(784,)))
model.add(Activation('relu'))                            
model.add(Dropout(0.2))

model.add(Dense(20))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation('softmax'))

# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# training the model and saving metrics in history
history = model.fit(X_train, Y_train,
          batch_size=128, epochs=5,
          validation_split=0.3)

plotting(history)
loss, acc = model.evaluate(X_test, Y_test)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))
print("--- %s seconds ---" % (time.time() - start_time))

If we use very less parameters in the hidden network training will be very fast but network would not be able to learn more parameters for accurate predictions.