<a href="https://colab.research.google.com/github/Siddhant-Thendral-Arasu/Siddhant-Thendral-Arasu/blob/CNN_signdigits_classification/CNN_signdigits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Using MNIST database to classify sign digits (0-9) using Convolutional network CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> DENSE
Some basic padding and conv functions and full implementation using functionaL Keras API

In [57]:
import numpy as np
import h5py
from tensorflow.keras.datasets import mnist

In [58]:
import math
import scipy
import h5py
from PIL import Image
import pandas as pd
import tensorflow as tf
from tensorflow.python.framework import ops

In [59]:
def load_dataset():
    with h5py.File('mnist_sign_digits.h5', 'r') as h5f:
        # Access datasets
        train_images = h5f['x_train'][:]
        train_labels = h5f['y_train'][:]
        test_images = h5f['x_test'][:]
        test_labels = h5f['y_test'][:]
    # Print shapes to verify the data
    print("Train Images shape:", train_images.shape)
    print("Train Labels shape:", train_labels.shape)
    print("Test Images shape:", test_images.shape)
    print("Test Labels shape:", test_labels.shape)
    return train_images, train_labels, test_images, test_labels

In [60]:
def zero_padding(X, pad_values):
    return np.pad(X, ((0,0), (pad_values,pad_values), (pad_values,pad_values), (0,0)), mode='constant', constant_values = (0,0))

In [61]:
def conv_1_layer (A_prev, W_filter, b):
    a = np.multiply(A_prev, W_filter)
    return np.sum(a) + float(b)

In [62]:
def fwd_conv (A_prev, W, b, hparameters):
    m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape[0], A_prev.shape[1], A_prev.shape[2], A_prev.shape[3]
    f, f, n_C_prev, n_C = W.shape[0], W.shape[1], W.shape[2], W.shape[3]
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    n_H = (int)((n_H_prev + (2 * pad) - f) / stride) + 1
    n_W = (int)((n_W_prev + (2 * pad) - f) / stride) + 1
    Z = np.zeros((m,n_H, n_W,n_C))
    A_prev_pad = zero_padding(A_prev,pad)

    for i in range(0,m):
        a_prev_pad = A_prev_pad[i]
        print(a_prev_pad.shape)
        for h in range(0,a_prev_pad.shape[0],stride):
            vert_start = h
            vert_end = h + f

            if (vert_end > a_prev_pad.shape[0]):
                    break
            for w in range(0,a_prev_pad.shape[1],stride):
                horiz_start = w
                horiz_end = w + f

                if (horiz_end > a_prev_pad.shape[1]):
                    break

                for c in range(0,W.shape[3]):
                    a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
                    weights = np.sum(a_slice_prev * W[:,:,:,c])
                    biases = b[0][0][0][c]
                    Z[i, (int)(h/stride), (int)(w/stride), c] = weights + biases
    cache = (A_prev, W, b, hparameters)

    return Z, cache

In [64]:
def fwd_pool(A_prev, hparameters, mode = "max"):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    f = hparameters["f"]
    stride = hparameters["stride"]
    n_H = int(1 + (n_H_prev - f) / stride)
    n_W = int(1 + (n_W_prev - f) / stride)
    n_C = n_C_prev
    A = np.zeros((m, n_H, n_W, n_C))
    for i in range(0,m):
        for h in range(0,A_prev.shape[1],stride):
            vert_start = h
            vert_end = h + f
            if (vert_end > A_prev.shape[1]):
                break

            for w in range(0,A_prev.shape[2],stride):
                horiz_start = w
                horiz_end = w + f

                if (horiz_end > A_prev.shape[2]):
                    break

                for c in range (0,A.shape[3]):


                    a_prev_slice = A_prev[i,vert_start:vert_end,horiz_start:horiz_end,c]

                    if mode == "max":
                        A[i, (int)(h/stride), (int)(w/stride), c] = np.max(a_prev_slice)
                    elif mode == "average":
                        A[i, (int)(h/stride), (int)(w/stride), c] = np.mean(a_prev_slice)
    cache = (A_prev, hparameters)
    return A, cache

In [65]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Activation
from tensorflow.keras.models import Model

In [66]:
def conv_model(input_shape):
    """
    Builds a convolutional neural network with the functional API:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> DENSE.

    Args:
    - input_shape: tuple, the shape of the input images (height, width, channels)

    Returns:
    - model: A compiled Keras functional model
    """
    # Input layer
    inputs = Input(shape=input_shape)

    # First Convolutional Layer: CONV2D -> RELU -> MAXPOOL
    x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)

    # Second Convolutional Layer: CONV2D -> RELU -> MAXPOOL
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)

    # Flatten Layer
    x = Flatten()(x)

    # Dense Layer (Fully Connected Layer)
    x = Dense(units=128, activation='relu')(x)

    # Output Layer
    outputs = Dense(units=10, activation='softmax')(x)

    # Create the model
    model = Model(inputs=inputs, outputs=outputs)

    return model

In [67]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess the data
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Build the model
model = conv_model(input_shape=(28, 28, 1))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.1)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

Epoch 1/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 85ms/step - accuracy: 0.8929 - loss: 0.3547 - val_accuracy: 0.9843 - val_loss: 0.0496
Epoch 2/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 81ms/step - accuracy: 0.9849 - loss: 0.0471 - val_accuracy: 0.9892 - val_loss: 0.0350
Epoch 3/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 88ms/step - accuracy: 0.9911 - loss: 0.0286 - val_accuracy: 0.9882 - val_loss: 0.0397
Epoch 4/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 90ms/step - accuracy: 0.9933 - loss: 0.0206 - val_accuracy: 0.9898 - val_loss: 0.0348
Epoch 5/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 83ms/step - accuracy: 0.9949 - loss: 0.0159 - val_accuracy: 0.9902 - val_loss: 0.0360
Epoch 6/10
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 84ms/step - accuracy: 0.9961 - loss: 0.0107 - val_accuracy: 0.9915 - val_loss: 0.0351
Epoch 7/10
[1m8

In [68]:
model.summary()