In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### Data

In [2]:
mnist_dataset,mnist_info = tfds.load(name='mnist',with_info=True,as_supervised=True) 
mnist_info

#readt the below output carefully, we will extract a lot of values from it.

tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_path='C:\\Users\\kunal\\tensorflow_datasets\\mnist\\3.0.1',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
)

In [3]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test'] 

#### Now getting number of vbalidation samlpes

In [4]:
num_validation_samples = 0.1* mnist_info.splits['train'].num_examples #this will hold the number of validation samples
#Since this value could be a float, we will convert it to tf.int64

num_validation_samples = tf.cast(num_validation_samples,tf.int64)
num_validation_samples

<tf.Tensor: shape=(), dtype=int64, numpy=6000>

In [5]:
#similarly storing the number of test samples.

num_test_samples = mnist_info.splits['test'].num_examples #this will hold the number of validation samples
#Since this value could be a float, we will convert it to tf.int64

num_test_samples = tf.cast(num_test_samples,tf.int64)
num_test_samples

<tf.Tensor: shape=(), dtype=int64, numpy=10000>

In [6]:
def scale(image,label): #Writing down a function to scale the data of 0 to 255 to 0 to 1.
    image = tf.cast(image,tf.float64) #Just making sure the number is a float and not an integer.
    image = image/255.00 #Important to give .00 after 255 to make it a flaot.
    return image,label


In [7]:
scaled_train_and_validation_data = mnist_train.map(scale) #This will run the scale function through the dataset...
#and store them in the following.
scaled_train_and_validation_data

<_MapDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [8]:
#similarly scaling the the test data as well.
test_data = mnist_test.map(scale)
test_data

<_MapDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

### Shuffling the train and validation data.

In [9]:
BUFFER_SIZE = 10000 #This will be the size of one batch that will be shuffled at a time.

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

#The below code will create validation_data with the correct size. Using : '.take()' method.
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
#For selecting for train data, we drop the rest of validation. Using : '.skip()' method.
train_data = shuffled_train_and_validation_data.skip(num_validation_samples) #Since, the remaining will be train data.


### Preparing data for batching since we are using SGD.

In [10]:
BATCH_SIZE = 100 

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)#model expects validation_data to be in batch form. 
#The entire thing is the same but just batch form of the same size. Same with test_data
test_data = test_data.batch(num_test_samples)
test_data

<_BatchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [11]:
validation_input,validation_target = next(iter(validation_data))
validation_input.shape, validation_target.shape

(TensorShape([6000, 28, 28, 1]), TensorShape([6000]))

## Model

In [12]:
input_size = 784
output_size = 10 #Since there are 10 digits to predict the value.
hidden_layer_size = 80 #This was initially set to 50 but then I have changed to different values like 150,100 but found this...
#as the best one beacuse this gives the val_acc and test_acc very close which means that there is overfitting of the model...
#with respect to the validation data.

model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),             #Layer 1 - input 
                            tf.keras.layers.Dense(hidden_layer_size,activation='relu'), #Layer 2 - hidden
                            tf.keras.layers.Dense(hidden_layer_size,activation='relu'), #Layer 3 - hidden
                            tf.keras.layers.Dense(output_size,activation='softmax')     #Layer 4 - output
                            ]) #Notice for the activation we ues softmax, Since we want to give probality.


### Selecting optimizer and loss function.

In [13]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

## Training.

In [14]:
NUM_EPOCHS = 5
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_input,validation_target), verbose=2)


Epoch 1/5
540/540 - 2s - loss: 0.3526 - accuracy: 0.8998 - val_loss: 0.1844 - val_accuracy: 0.9462 - 2s/epoch - 3ms/step
Epoch 2/5
540/540 - 1s - loss: 0.1491 - accuracy: 0.9552 - val_loss: 0.1260 - val_accuracy: 0.9627 - 862ms/epoch - 2ms/step
Epoch 3/5
540/540 - 1s - loss: 0.1071 - accuracy: 0.9679 - val_loss: 0.1055 - val_accuracy: 0.9690 - 847ms/epoch - 2ms/step
Epoch 4/5
540/540 - 1s - loss: 0.0845 - accuracy: 0.9744 - val_loss: 0.0865 - val_accuracy: 0.9742 - 851ms/epoch - 2ms/step
Epoch 5/5
540/540 - 1s - loss: 0.0677 - accuracy: 0.9797 - val_loss: 0.0713 - val_accuracy: 0.9765 - 835ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x1cb4c1e0c10>

# Testing the model.

In [15]:
test_loss, test_acc = model.evaluate(test_data)



In [16]:
test_loss 

0.08749296516180038

In [17]:
print("Test accuracy : " + str(test_acc*100.0))

Test accuracy : 97.13000059127808
