<a href="https://colab.research.google.com/github/SakshiKasture/MNIST_digit_classification/blob/main/MNIST_digit_classification_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D
from tensorflow.keras.utils import to_categorical

In [33]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Preprocess data: Reshape to 28x28 images with 1 channel (grayscale)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [34]:
# Normalize pixel values to range 0-1
x_train, x_test = x_train / 255.0, x_test / 255.0
# One-hot encode labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [35]:
model = tf.keras.models.Sequential()

In [37]:
# Convolutional layer with 32 filters, kernel size 3x3, activation function ReLU
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))

In [38]:
# Pooling layer, takes max value from 2x2 grid and helps to downsample
model.add(layers.MaxPooling2D((2, 2)))

In [39]:
# Convolutional layer with 64 filters
'''multiple convolution layers are needed as they help detect diff features. Adding more convolution layers
allows the model to detect more complex features at different levels. For example, the first layer detects basic features
 like edges, while deeper layers may detect more complex structures like shapes or objects.
 Pooling layers reduce the spatial size of the image after each convolution, helping the network become
 more computationally efficient, reduce overfitting, and retain important features.
  With multiple pooling layers, we gradually downsample the image, allowing the network to focus on high-level features while keeping the computation manageable.'''
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# Pooling layer
model.add(layers.MaxPooling2D((2, 2)))

In [40]:
# Flatten the 2D data to 1D
''' To connect the learned features to a fully connected layer (Dense layer), the 2D data must be flattened into a 1D vector.in CNNs,
the output of convolutional and pooling layers needs flattening to connect to the fully connected (dense) layers, whereas in FNNs,
the input needs flattening to fit into the first layer.'''
model.add(layers.Flatten())

In [41]:
# Fully connected layer
'''This adds a Dense (fully connected) layer with 64 neurons. It uses the ReLU activation function, which helps the model learn non-linear
relationships and introduces sparsity by activating only some neurons.'''
model.add(layers.Dense(64, activation='relu'))
'''This adds another Dense layer with 10 neurons (one for each class, i.e., the digits 0-9). The softmax activation function
converts the output into a probability distribution, where each neuron represents the probability of the input image belonging to a particular class.
The first line is a fully connected layer to process learned features.
The second line is the output layer, giving probabilities for each class.'''
# Output layer with 10 units (one for each digit)
model.add(layers.Dense(10, activation='softmax'))

In [42]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 143ms/step - accuracy: 0.8797 - loss: 0.3981 - val_accuracy: 0.9842 - val_loss: 0.0494
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 143ms/step - accuracy: 0.9844 - loss: 0.0513 - val_accuracy: 0.9862 - val_loss: 0.0446
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 145ms/step - accuracy: 0.9903 - loss: 0.0311 - val_accuracy: 0.9907 - val_loss: 0.0291
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 144ms/step - accuracy: 0.9929 - loss: 0.0231 - val_accuracy: 0.9897 - val_loss: 0.0328
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 142ms/step - accuracy: 0.9945 - loss: 0.0180 - val_accuracy: 0.9894 - val_loss: 0.0356


<keras.src.callbacks.history.History at 0x7e865e794df0>

In [31]:
# Evaluate the model on test data
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.9860 - loss: 0.0442
Test accuracy: 0.989799976348877
