<a href="https://colab.research.google.com/github/Divyam-Deep/Deep-Learning-on-MNIST-dataset/blob/main/MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
# my action plan
# 1- prepare data and process it ( train,test validation)
# 2- outline the model and choose the activation function
# 3- set advanced optimizer and loss function
# 4- make it learn
# 5- Test the acc of our model

In [46]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [47]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info = True , as_supervised=True) #as_supervised is used to split data into train and test

In [48]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples  #making the validation set using 10% of train data
num_validation_samples = tf.cast(num_validation_samples, tf.int64)   #tf.cast() is used to convert the data type to int
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)


def scale(image, label):   #scaling the data
    image = tf.cast(image, tf.float32)
    image /= 255.   #we know that the image will range from 0-255 so to display between 0-1 we devide by 255 and '.' represnt the float value
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

#we will suffle the data
BUFFER_SIZE = 10000  #it means that first take 10,000 data and suffle it then take another 10,000 data and suffle
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

#Now optimizing the data using mini batched gd (batch size = 1 then sgd ; batch size = no. of samples then its gd) we will use between the both
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))  #iter() is used to make an oject which can be iterated one element at a time and next() is used to load next element from iteration object


In [49]:
#outline the model
input_size = 728
output_size = 10
hidden_layer_size = 128

model = tf.keras.Sequential([                  #sequential is used to laying down the model (stack layers)
                            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),

                            # Add first dense layer followed by batch normalization and dropout
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.BatchNormalization(),
                            tf.keras.layers.Dropout(0.3),  # Dropout rate set to 30% it stop 30% of networks as to overcome overfitting

                            # Second dense layer with batch normalization and dropout
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.BatchNormalization(),
                            tf.keras.layers.Dropout(0.3),

                            # Third dense layer
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.BatchNormalization(),
                            tf.keras.layers.Dropout(0.3),

                            # Output layer with softmax activation
                            tf.keras.layers.Dense(output_size, activation='softmax')


                                               ])

In [50]:
#choose the optimizer and loss function
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [51]:
# Use early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [52]:
#training
NUM_EPOCHS = 30

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)


Epoch 1/30
540/540 - 14s - 26ms/step - accuracy: 0.8476 - loss: 0.4976 - val_accuracy: 0.9520 - val_loss: 0.1666
Epoch 2/30
540/540 - 7s - 14ms/step - accuracy: 0.9290 - loss: 0.2384 - val_accuracy: 0.9613 - val_loss: 0.1196
Epoch 3/30
540/540 - 7s - 14ms/step - accuracy: 0.9449 - loss: 0.1870 - val_accuracy: 0.9650 - val_loss: 0.1093
Epoch 4/30
540/540 - 11s - 20ms/step - accuracy: 0.9518 - loss: 0.1624 - val_accuracy: 0.9743 - val_loss: 0.0863
Epoch 5/30
540/540 - 6s - 12ms/step - accuracy: 0.9558 - loss: 0.1497 - val_accuracy: 0.9770 - val_loss: 0.0752
Epoch 6/30
540/540 - 10s - 19ms/step - accuracy: 0.9599 - loss: 0.1316 - val_accuracy: 0.9758 - val_loss: 0.0773
Epoch 7/30
540/540 - 12s - 22ms/step - accuracy: 0.9623 - loss: 0.1242 - val_accuracy: 0.9793 - val_loss: 0.0664
Epoch 8/30
540/540 - 10s - 19ms/step - accuracy: 0.9655 - loss: 0.1116 - val_accuracy: 0.9782 - val_loss: 0.0706
Epoch 9/30
540/540 - 10s - 19ms/step - accuracy: 0.9671 - loss: 0.1083 - val_accuracy: 0.9825 - val

<keras.src.callbacks.history.History at 0x7c2cf3017490>

In [53]:
#now testing the model
test_loss, test_accuracy = model.evaluate(test_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.9823 - loss: 0.0628
