## CNN Basics: MNIST dataset

[Detailed Solution for MNIST dataset image classification](https://www.kaggle.com/code/kanncaa1/convolutional-neural-network-cnn-tutorial/notebook)

In [1]:
from keras import layers, models
from keras.datasets import mnist
from keras.utils import to_categorical

Define the CNN model

In [3]:
# This creates a Sequential model, where each layer has exactly one input tensor and one output tensor.
model = models.Sequential()

# This adds a 2D convolutional layer with 32 filters, each with a 5x5 kernel, using the ReLU activation function.
# The input_shape parameter specifies the shape of the input data. In this case, it's (28, 28, 1), meaning 28x28 images with a single channel (grayscale).
model.add(layers.Conv2D(32, (5, 5), activation='relu', input_shape=(28, 28, 1)))
# This adds a max pooling layer with a 2x2 pool size.
model.add(layers.MaxPooling2D((2, 2)))

# This adds another 2D convolutional layer with 64 filters, each with a 5x5 kernel, using the ReLU activation function.
model.add(layers.Conv2D(64, (5, 5), activation='relu'))
# Another max pooling layer with a 2x2 pool size is added.
model.add(layers.MaxPooling2D((2, 2)))

# This flattens the 2D output from the previous layer into a 1D array.
model.add(layers.Flatten())

# This adds a fully connected layer with 10 output units (since there are 10 classes in the MNIST dataset) and applies the softmax activation function.
model.add(layers.Dense(10, activation='softmax'))

# This prints a summary of the model, including the number of parameters in each layer.
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 24, 32)        832       
                                                                 
 max_pooling2d (MaxPooling2  (None, 12, 12, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 8, 64)          51264     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 4, 4, 64)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 1024)              0         
                                                                 
 dense (Dense)               (None, 10)                1

In [4]:
# Split the data into training and test sets
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Preprocess the data by reshaping it to the appropriate shape (adding a channel dimension for the grayscale images) and then normalizing the pixel values to be between 0 and 1.
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

# This converts the labels to one-hot encoded format.
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Train the model
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size=100, epochs=5, verbose=1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x182ab3fe0d0>

In [5]:
# Test the model's accuracy
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')

Test accuracy: 0.9708999991416931
