In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
class ImageLoader:
    def __init__(self):
        self.num_images = 0
        self.images = None
        self.labels = None
        self.mnist = fetch_openml('mnist_784', version=1)

    def load_data(self, num_images=5000):
        self.num_images = num_images
        mnist = self.mnist
        X, y = mnist['data'], mnist['target'].astype(int)
        X_array = X.values  # Convert DataFrame to NumPy array
        self.images = X_array[:self.num_images].reshape(self.num_images, 28, 28)  # Reshape images to 2D
        self.labels = y[:self.num_images]  # Get the corresponding labels

In [165]:
class ConvLayer:
    def __init__(self, num_filters, filter_size=3):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.01
        self.filter_grads = np.zeros_like(self.filters)  # To store gradients
        self.output_height = None
        self.output_width = None

    def conv2d(self, input_data):
        num_filters, filter_size = self.num_filters, self.filter_size
        num_input_filters, height, width = input_data.shape
        self.output_height = max(0, height - filter_size + 1)  # Save output dimensions
        self.output_width = max(0, width - filter_size + 1)
        output = np.zeros((num_filters, self.output_height, self.output_width))

        for f in range(num_filters):
            filter = self.filters[f]
            for i in range(self.output_height):
                for j in range(self.output_width):
                    region = input_data[:, i:i+filter_size, j:j+filter_size]
                    output[f, i, j] = np.sum(region * filter)

        return output

    def max_pooling(self, feature_maps, pool_size=2, stride=2):
        num_filters, height, width = feature_maps.shape
        self.output_height = (height - pool_size) // stride + 1  # Save output dimensions
        self.output_width = (width - pool_size) // stride + 1
        output = np.zeros((num_filters, self.output_height, self.output_width))

        for f in range(num_filters):
            for i in range(0, self.output_height * stride, stride):
                for j in range(0, self.output_width * stride, stride):
                    if i + pool_size <= height and j + pool_size <= width:
                        region = feature_maps[f, i:i+pool_size, j:j+pool_size]
                        output[f, i//stride, j//stride] = np.max(region)

        return output

    def update_filters(self, learning_rate=0.01):
        self.filters -= learning_rate * self.filter_grads
        self.filter_grads = np.zeros_like(self.filters)  # Reset gradients

    def compute_gradients(self, input_data, grad_output):
        self.filter_grads = np.zeros_like(self.filters)
        filter_size = self.filters.shape[1]  # Get filter size from the current layer

        for f in range(self.num_filters):
            for i in range(self.output_height):
                for j in range(self.output_width):
                    # Ensure input_data slice matches the number of filters in the previous layer
                    self.filter_grads[f] += np.sum(input_data[:, i:i+filter_size, j:j+filter_size] * grad_output[f, i, j], axis=0)

        # print("Filter Gradients:", self.filter_grads)  # Print the calculated filter gradients
        return self.filter_grads


    def backward(self, input_data, grad_output):
        num_input_filters, height, width = input_data.shape
        num_filters, filter_size = self.num_filters, self.filter_size
        d_input = np.zeros_like(input_data)  # Gradient with respect to input data
        self.filter_grads = np.zeros_like(self.filters)  # Reset filter gradients

        # print("Input Data Shape:", input_data.shape)
        # print("Gradient Output Shape:", grad_output.shape)
        # print("Filter Shape:", self.filters.shape)

        # Calculate gradients with respect to filters
        for f in range(num_filters):
            for i in range(self.output_height):
                for j in range(self.output_width):
                    region = input_data[:, i:i+filter_size, j:j+filter_size]
                    self.filter_grads[f] += np.sum(
                        region * grad_output[f, i, j], axis=0
                    )

                    # # Debug messages for filter gradients
                    # print(f"Filter Grad {f} Shape:", self.filter_grads[f].shape)
                    # print(f"Region Shape: {region.shape}")
                    # print(f"Grad Output [{f}, {i}, {j}] Shape: {grad_output[f, i, j].shape}")

        # Calculate gradients with respect to input data
        for f in range(num_filters):
            for i in range(self.output_height):
                # print(f"Filter {f} Shape:", self.filters[f].shape)
                for j in range(self.output_width):
                    region = input_data[:, i:i+filter_size, j:j+filter_size]
                    d_input[:, i:i+filter_size, j:j+filter_size] += self.filters[f] * grad_output[f, i, j]

        #             print(f"Grad Output [{f}, {i}, {j}] Shape: {grad_output[f, i, j].shape}")
        #             print(f"d_input Region Shape: {d_input[:, i:i+filter_size, j:j+filter_size].shape}")

        # print("Computed Gradients with Respect to Input:", d_input)
        return d_input

In [4]:
import matplotlib.pyplot as plt

def display_images(original_image, first_layer_output, second_layer_output):
    # Calculate number of rows for subplots based on number of filters
    num_rows_first = first_layer_output.shape[0] // 2 + (first_layer_output.shape[0] % 2)
    num_rows_second = second_layer_output.shape[0] // 3 + (second_layer_output.shape[0] % 3)

    # Create a figure with enough subplots
    fig = plt.figure(figsize=(15, 5 + 3 * (num_rows_first + num_rows_second)))

    # Original Image
    plt.subplot(num_rows_first + num_rows_second + 1, 3, 1)
    plt.imshow(original_image, cmap='gray')
    plt.title('Original Image')
    plt.axis('off')

    # First Layer Outputs
    for i in range(first_layer_output.shape[0]):
        plt.subplot(num_rows_first + num_rows_second + 1, 3, 3 + i + 1)
        plt.imshow(first_layer_output[i], cmap='gray')
        plt.title(f'First Layer Filter {i+1}')
        plt.axis('off')

    # Second Layer Outputs
    for i in range(second_layer_output.shape[0]):
        plt.subplot(num_rows_first + num_rows_second + 1, 3, 3 + first_layer_output.shape[0] + i + 1)
        plt.imshow(second_layer_output[i], cmap='gray')
        plt.title(f'Second Layer Filter {i+1}')
        plt.axis('off')

    plt.tight_layout()  # Adjust layout to prevent overlapping
    plt.show()

In [5]:
loader = ImageLoader()

  warn(


In [124]:
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def relu(self, x):
        return np.maximum(0, x)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, x):
        self.z1 = np.dot(x, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def compute_loss(self, predictions, targets):
        m = targets.shape[0]
        log_likelihood = -np.log(predictions[range(m), targets])
        loss = np.sum(log_likelihood) / m
        return loss

    def backward(self, x, predictions, targets):
        m = x.shape[0]  # Number of samples

        # Gradient of the loss with respect to the output of the final layer
        d_z2 = predictions
        d_z2[range(m), targets] -= 1
        d_z2 /= m

        # Gradients for weights and biases of the final layer
        d_W2 = np.dot(self.a1.T, d_z2)
        d_b2 = np.sum(d_z2, axis=0, keepdims=True)

        # Gradient with respect to activations of the first layer
        d_a1 = np.dot(d_z2, self.W2.T)
        d_z1 = d_a1 * (self.a1 > 0)  # ReLU derivative

        # Gradients for weights and biases of the first layer
        d_W1 = np.dot(x.T, d_z1)
        d_b1 = np.sum(d_z1, axis=0, keepdims=True)

        # Update weights and biases
        self.W1 -= 0.01 * d_W1
        self.b1 -= 0.01 * d_b1
        self.W2 -= 0.01 * d_W2
        self.b2 -= 0.01 * d_b2

        # Return gradient with respect to the input
        return np.dot(d_z1, self.W1.T)



In [7]:
def flatten_feature_maps(feature_maps):
    num_filters, height, width = feature_maps.shape
    return feature_maps.reshape(num_filters * height * width)


def predict(cnn, X):
    # Handle the case where X is already flattened
    if X.ndim == 2:
        predictions = [nn.forward(x[np.newaxis, :]) for x in X]
    else:
        predictions = [nn.forward(x[np.newaxis, :, :]) for x in X]
    return np.argmax(np.vstack(predictions), axis=1)

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

In [174]:
loader.load_data(num_images=10000)


In [175]:
conv1 = ConvLayer(num_filters=4)
conv2 = ConvLayer(num_filters=9)

# Prepare training data
X_train = []
y_train = []

for i in range(loader.num_images):
    original_image = loader.images[i]
    label = loader.labels[i]

    first_layer_output = conv1.conv2d(original_image[np.newaxis, :, :])
    first_layer_output = conv1.max_pooling(first_layer_output)

    second_layer_output = conv2.conv2d(first_layer_output)
    second_layer_output = conv2.max_pooling(second_layer_output)

    X_train.append(second_layer_output)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [167]:
input_size = 225
hidden_size = 128
# Calculate the number of unique classes in the training labels
output_size = 10

nn = SimpleNN(input_size, hidden_size, output_size)

In [168]:
def train_nn_with_conv(nn, X_train, y_train, epochs=10, learning_rate=0.01):
    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(X_train)):
            flattened_output = flatten_feature_maps(X_train[i])
            label = y_train[i]

            # Forward pass
            predictions = nn.forward(flattened_output[np.newaxis, :])

            # Compute loss and gradients
            loss = nn.compute_loss(predictions, np.array([label]))
            d_flattened_output = nn.backward(flattened_output[np.newaxis, :], predictions, np.array([label]))

            second_layer_output_shape = (conv2.num_filters, conv2.output_height, conv2.output_width)
            second_layer_grad = d_flattened_output.reshape(second_layer_output_shape)

            # Compute gradients for ConvLayer
            conv2_grads = conv2.compute_gradients(first_layer_output, second_layer_grad)
            conv2.update_filters(learning_rate)

            # Compute gradients for the first ConvLayer
            first_layer_grad = conv2.backward(first_layer_output, second_layer_grad)
            conv1_grads = conv1.compute_gradients(original_image[np.newaxis, :, :], first_layer_grad)
            conv1.update_filters(learning_rate)

            total_loss += loss

            # print('d_flattened_output_shape :',d_flattened_output.shape)

        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(X_train)}')

In [176]:
train_nn_with_conv(nn, X_train, y_train, epochs=10, learning_rate=0.01)

Epoch 1/10, Loss: 0.5768630866078364
Epoch 2/10, Loss: 0.36347822219080583
Epoch 3/10, Loss: 0.30012252933896816
Epoch 4/10, Loss: 0.25791541576778015
Epoch 5/10, Loss: 0.22789794834524604
Epoch 6/10, Loss: 0.20403406026544246
Epoch 7/10, Loss: 0.18537302412447373
Epoch 8/10, Loss: 0.16971274886756946
Epoch 9/10, Loss: 0.15695023172019046
Epoch 10/10, Loss: 0.14595960439535066


In [177]:
def test_nn_with_random_subset(nn, X_train, y_train, num_samples=1000):
    # Randomly select indices from the training data
    indices = np.random.choice(len(X_train), num_samples, replace=False)

    # Create a random subset of the training data
    X_subset = X_train[indices]
    y_subset = y_train[indices]

    correct_predictions = 0
    total_samples = num_samples

    for i in range(total_samples):
        # Get the input and target label
        x = X_subset[i]
        y_true = y_subset[i]

        x = flatten_feature_maps(x)
        # Forward pass
        predictions = nn.forward(x[np.newaxis, :])

        # Convert predictions to class labels
        predicted_class = np.argmax(predictions)

        # Compare with true label
        if predicted_class == y_true:
            correct_predictions += 1

    accuracy = correct_predictions / total_samples
    print(f"Test Accuracy on Random Subset: {accuracy * 100:.2f}%")

# Assuming `nn` is your neural network model, `X_train` is your training input data, and `y_train` is your training labels
test_nn_with_random_subset(nn, X_train, y_train)

Test Accuracy on Random Subset: 95.70%


In [179]:
print(conv1.filters, conv2.filters)

[[[-0.00533629  0.00276472 -0.00598206]
  [ 0.00620088 -0.00499089  0.00648803]
  [-0.01431477  0.00910912  0.01205135]]

 [[ 0.00836681 -0.00377282  0.03214547]
  [-0.00257807  0.00652423  0.00011743]
  [ 0.00237463 -0.00516728  0.00316406]]

 [[-0.00776258  0.00686992  0.00097155]
  [-0.00316623 -0.01503088  0.02035842]
  [ 0.00184129  0.00127207 -0.00165518]]

 [[ 0.007705    0.00160803  0.00147585]
  [ 0.01189663 -0.01955609  0.0017215 ]
  [-0.00420174 -0.00106368 -0.00150472]]] [[[-4.09691254e+01 -7.98003319e+01 -8.54250997e+01]
  [-8.28698396e+01 -1.13366964e+02 -1.28935174e+02]
  [-9.31884154e+01 -1.30916108e+02 -1.37050016e+02]]

 [[ 6.75392619e+00  4.61338684e+00  4.94504003e+00]
  [ 8.78022529e+00  1.88547137e+00  4.99226905e+00]
  [-4.41378276e+00 -2.12129851e+01 -3.04094476e+01]]

 [[ 7.78117647e+00  1.95113430e+01  4.53247116e+01]
  [ 2.21243633e+01  4.16464688e+01  8.21300215e+01]
  [ 2.54165714e+01  4.62836207e+01  7.53026283e+01]]

 [[ 1.13115607e+00  9.11946841e+00  3.