<a href="https://colab.research.google.com/github/SyedaMeem/mnist/blob/main/MNIST_using_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [None]:
#flattening the row,column into one column
#converting float64->float32 to minimize computation
#divide by 255 gray level value to normalize all pixels values between 0 to 1

x_train=x_train.reshape(-1,28*28).astype("float32")/255.0
x_test=x_test.reshape(-1,28*28).astype("float32")/255.0


#the above all reshaping operations are numpy operations we can explicitly convert these numpy to tensor as follow,but tensorflow implicitly done this job so we don't have to do this manually
#x_train=tf.convert_to_tensor(x_train)
#as like x_train we can do it for x_test,y_train,y_test

#Sequential API(convenient but not flexible)

In [None]:
##Network

model=keras.Sequential(
    [
     keras.Input(shape=(28*28)),
     layers.Dense(512,activation='relu'),
     layers.Dense(256,activation='relu'),
     layers.Dense(10),
    ]
)
print(model.summary())

#we can write the above code as follows:
#model=keras.Sequential()
#model.add(keras.Input(shape=(28*28))) #layer index -> -4
#model.add(layers.Dense(512,activation='relu',name="first_layer")) #layer index -> -3
#model.add(layers.Dense(256,activation='relu',name="second_layer")) #layer index -> -2
#model.add(layers.Dense(10)) #layer index -> -1

##how to extract specific layer outputs which is useful for debugging purpose
#@using layer index
#model=keras.Model(inputs=model.inputs,outputs=[model.layers[-1].output])
#feature=model.predict(x-train)
#print(feature.shape)

#@using layer name
#model=keras.Model(inputs=model.inputs,outputs=[model.get_layer("first_layer").output])
#feature=model.predict(x-train)
#print(feature.shape)

#@getting all layers feature output
#model=keras.Model(inputs=model.inputs,outputs=[layer.output for layer in model.layers])
#features=model.predict(x-train)
#for feature in features:
#    print(feature.shape)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_4 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_5 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
##Training  network configuration
# from_logits=True ->it's going to send the softmax function to the output layer of the network 
#and then it's going to map it to sparse categorical cross entropy

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)

In [None]:
#More of the concrete training of that network

model.fit(x_train,y_train,batch_size=32,epochs=5,verbose=2)
model.evaluate(x_test,y_test,batch_size=32,verbose=2)


Epoch 1/5
1875/1875 - 8s - loss: 0.1868 - accuracy: 0.9435
Epoch 2/5
1875/1875 - 8s - loss: 0.0787 - accuracy: 0.9757
Epoch 3/5
1875/1875 - 8s - loss: 0.0538 - accuracy: 0.9828
Epoch 4/5
1875/1875 - 8s - loss: 0.0413 - accuracy: 0.9869
Epoch 5/5
1875/1875 - 8s - loss: 0.0324 - accuracy: 0.9895
313/313 - 1s - loss: 0.0808 - accuracy: 0.9770


[0.08084864169359207, 0.9769999980926514]

#Functional API(A bit more flexible,handle multiple inputs and multiple outputs)

In [None]:
##Network

inputs=keras.Input(shape=(28*28))
x=layers.Dense(512,activation='relu',name="first_layer")(inputs)
x=layers.Dense(256,activation='relu',name="second_layer")(x)
outputs=layers.Dense(10,activation='softmax')(x)
model=keras.Model(inputs=inputs,outputs=outputs)

print(model.summary())

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
first_layer (Dense)          (None, 512)               401920    
_________________________________________________________________
second_layer (Dense)         (None, 256)               131328    
_________________________________________________________________
dense_9 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
##Training  network configuration
# from_logits=False ->it's going to only map network to sparse categorical cross entropy
#it is set by default,so do not mention it explicitly
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)

In [None]:
#More of the concrete training of that network

model.fit(x_train,y_train,batch_size=32,epochs=5,verbose=2)
model.evaluate(x_test,y_test,batch_size=32,verbose=2)

Epoch 1/5
1875/1875 - 8s - loss: 0.1841 - accuracy: 0.9438
Epoch 2/5
1875/1875 - 8s - loss: 0.0774 - accuracy: 0.9757
Epoch 3/5
1875/1875 - 8s - loss: 0.0530 - accuracy: 0.9835
Epoch 4/5
1875/1875 - 8s - loss: 0.0415 - accuracy: 0.9872
Epoch 5/5
1875/1875 - 8s - loss: 0.0312 - accuracy: 0.9898
313/313 - 1s - loss: 0.0871 - accuracy: 0.9768


[0.08705500513315201, 0.9768000245094299]