In [None]:
import numpy as np 
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import joblib
import time 

In [81]:
#Load and prepare the MNIST dataset to be acceptable for the model
(x_train , y_train),(x_test , y_test) = mnist.load_data()
x_train = x_train.reshape((-1 , 784)) /255.0
x_test = x_test.reshape((-1 ,784))/255.0


When loading the MNIST dataset from Keras, it is common to divide the pixel values by 255.0. This step is performed as a form of data normalization or scaling.

The pixel values in the MNIST dataset range from 0 to 255, representing different levels of grayscale intensity. Dividing the pixel values by 255.0 scales them down to a range of 0 to 1. This normalization process ensures that all pixel values are within a standardized range, which can benefit the training process of machine learning models.

In [82]:
print(x_train.shape,x_test.shape , y_train.shape , y_test.shape)

(60000, 784) (10000, 784) (60000,) (10000,)


### Train the model with batch size 8 and layers  3(128,64,32) 

In [89]:
# Build a simple ANN architecture (input layer = 1 )

cnnmodel = Sequential([
    Dense(128, activation = 'relu' , input_shape=(784 , )),
    Dense(64 , activation = 'relu'),
    Dense(32, activation= 'softmax'),
    Dense(10, activation= 'softmax') ])

In [None]:
## `ReLU (Rectified Linear Unit):`
    
 ReLU is a popular activation function used in the hidden layers of deep neural networks. It computes the element-wise maximum of 0 and the input value. Mathematically, ReLU can be defined as f(x) = max(0, x). `ReLU introduces non-linearity into the network, enabling the model to learn complex relationships between features`. It helps alleviate the vanishing gradient problem and can lead to faster convergence during training.

In [90]:
# Compile the model 
cnnmodel.compile(optimizer = 'adam' , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

In [91]:
# Train the model on batchsize 8
S = time.time()
cnnmodel.fit(x_train,y_train , epochs=5, batch_size=8 , validation_data=(x_test , y_test))
#cnnmodel.fit(x_train,y_train ,epochs=5 ,  validation_data=(x_test , y_test))
E = time.time()

# if we donot mention batch size , by default it takes 32 batch size 
# There are 60,000 examples and if we mention batch size 64 , then dividing by examples by  64 , it will give 938 examples

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [92]:
#Evaluate the model 
test_loss , test_acc = cnnmodel.evaluate(x_test , y_test)

# joblib.dump(cnnmodel, 'cnn_model'.joblib)

print(f"Test Accuracy : {test_acc}") 
print("Total time taken while training " , E-S)


Test Accuracy : 0.9728000164031982
Total time taken while training  104.1966233253479


### Train the model on Batch size 16 , and hidden layers 3( 64 ,64 , 32)

In [94]:
cnnmodel_16 = Sequential([
    Dense(64, activation = 'relu' , input_shape=(784 , )),
    Dense(64 , activation = 'relu'),
    Dense(32, activation= 'softmax'),
    Dense(10, activation= 'softmax')
    
])

In [95]:
# Compile the model 
cnnmodel_16.compile(optimizer = 'adam' , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

In [96]:
# Train the model on 
S = time.time()
cnnmodel_16.fit(x_train,y_train , epochs=5, batch_size=16 , validation_data=(x_test , y_test))
#cnnmodel.fit(x_train,y_train ,epochs=5 ,  validation_data=(x_test , y_test))
E = time.time()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [97]:
#Evaluate the model 
test_loss_16 , test_acc_16 = cnnmodel_16.evaluate(x_test , y_test)

# joblib.dump(cnnmodel, 'cnn_model'.joblib)

print(f"Test Accuracy : {test_acc_16}") 
print("Total time taken while training " , E-S)


Test Accuracy : 0.968500018119812
Total time taken while training  45.207777976989746


### Train the model on 32 batch size , 2 hidden layers (64 ,32)

In [98]:
cnnmodel_32 = Sequential([
    Dense(64, activation = 'relu' , input_shape=(784 , )),
    Dense(32 , activation = 'relu'),
    Dense(10, activation= 'softmax')
    
    
])

In [99]:
# Compile the model 
cnnmodel_32.compile(optimizer = 'adam' , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

In [100]:
# Train the model 
S = time.time()
cnnmodel_32.fit(x_train,y_train , epochs=5, batch_size=32 , validation_data=(x_test , y_test))
#cnnmodel.fit(x_train,y_train ,epochs=5 ,  validation_data=(x_test , y_test))
E = time.time()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [101]:
#Evaluate the model 
test_loss_32 , test_acc_32 = cnnmodel_32.evaluate(x_test , y_test)

# joblib.dump(cnnmodel, 'cnn_model'.joblib)

print(f"Test Accuracy : {test_acc_32}") 
print("Total time taken while training " , E-S)


Test Accuracy : 0.9678000211715698
Total time taken while training  23.051255226135254


### Train the model on 64 batch size and  2 hidden layers 2(128,64)

In [102]:
cnnmodel_64 = Sequential([
    Dense(128, activation = 'relu' , input_shape=(784 , )),
    Dense(64 , activation = 'relu'),
    Dense(10, activation= 'softmax')
    
    
])

In [103]:
# Compile the model 
cnnmodel_64.compile(optimizer = 'adam' , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

In [104]:
# Train the model 
S = time.time()
cnnmodel_64.fit(x_train,y_train , epochs=5, batch_size=64 , validation_data=(x_test , y_test))
#cnnmodel.fit(x_train,y_train ,epochs=5 ,  validation_data=(x_test , y_test))
E = time.time()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [105]:
#Evaluate the model 
test_loss_64 , test_acc_64 = cnnmodel_64.evaluate(x_test , y_test)

# joblib.dump(cnnmodel, 'cnn_model'.joblib)

print(f"Test Accuracy : {test_acc_64}") 
print("Total time taken while training " , E-S)


Test Accuracy : 0.9758999943733215
Total time taken while training  16.147433280944824


### Train the model on 128 batch size , 2 hidden layers (64 ,32)

In [106]:
cnnmodel_128 = Sequential([
    Dense(64, activation = 'relu' , input_shape=(784 , )),
    Dense(32 , activation = 'relu'),
    Dense(10, activation= 'softmax')
    
    
])

In [107]:
# Compile the model 
cnnmodel_128.compile(optimizer = 'adam' , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

In [108]:
# Train the model 
S = time.time()
cnnmodel_128.fit(x_train,y_train , epochs=5, batch_size=128 , validation_data=(x_test , y_test))
#cnnmodel.fit(x_train,y_train ,epochs=5 ,  validation_data=(x_test , y_test))
E = time.time()


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [109]:
test_loss_128 , test_acc_128 = cnnmodel_64.evaluate(x_test , y_test)

# joblib.dump(cnnmodel, 'cnn_model'.joblib)

print(f"Test Accuracy : {test_acc_128}") 
print("Total time taken while training " , E-S)


Test Accuracy : 0.9758999943733215
Total time taken while training  8.488483667373657


## Comparison

In [110]:
import pandas as pd

comparison = pd.DataFrame({
'Model' : ['Batch_size = 8','Batch_size =16  ','Batch_size = 32','Batch_size = 64 ',' Batch_size = 128' ],
'Accuracy': [ test_acc, test_acc_16 , test_acc_32, test_acc_64, test_acc_128 ] })

comparison


Unnamed: 0,Model,Accuracy
0,Batch_size = 8,0.9728
1,Batch_size =16,0.9685
2,Batch_size = 32,0.9678
3,Batch_size = 64,0.9759
4,Batch_size = 128,0.9759
