In [5]:
#import lib
import numpy as np
import time
from keras.datasets import cifar10

In [6]:
#data set preparation
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

#  cifar10.load_data() - function loads the CIFAR-10 dataset
#  60,000 32x32 color images
#  categorized into 10 classes (airplane, car, bird, etc.)
#  80-20 Split (Common Practice)
# 80% for training (50,000 images)-80% ensures the model has sufficient examples to learn effectively.
# 20% for testing (10,000 images)-20% ensures reliable evaluation of the model without overloading the testing set.

# Normalize the dataset
x_train = x_train / 255.0
x_test = x_test / 255.0
#Each pixel value in the images is initially an integer between 0 and 255 . Dividing by 255.0 normalizes these values to the range [0, 1]
#Dividing by 255.0 normalizes these values to the range [0, 1], which improves numerical stability--- and ensures faster training.testing dataset.
#Values are scaled to [0, 1]

# Convert labels to one-hot encoding
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((labels.size, num_classes))    #Creates a 2D array filled with zeros.
    one_hot[np.arange(labels.size), labels.flatten()] = 1
    return one_hot

# This function converts the integer labels into a binary matrix format known as one-hot encoding.
# Example: If num_classes=10 and the label is 2, the one-hot encoded vector is [0, 0, 1, 0, 0, 0, 0, 0, 0, 0].
# labels:Input array containing the class labels.
# Total number of classes, default is 10 for CIFAR-10.
# Returns a numpy array where each row is the one-hot encoded vector for the corresponding label.


y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

# Converts the integer labels in y_train and y_test into one-hot encoded matrices using the one_hot_encode function.



Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [7]:
class BPN:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights for input to hidden layer with small random values
        self.w1 = np.random.randn(input_size, hidden_size) * 0.01
        # Initialize biases for hidden layer to zero
        self.b1 = np.zeros((1, hidden_size))
        # Initialize weights for hidden to output layer with small random values
        self.w2 = np.random.randn(hidden_size, output_size) * 0.01
        # Initialize biases for output layer to zero
        self.b2 = np.zeros((1, output_size))

    def sigmoid(self, x):
        # Apply sigmoid activation function element-wise
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        # Calculate derivative of sigmoid activation function
        return x * (1 - x)

    def forward(self, X):
        # Store input for use in backward pass
        self.X = X
        # Calculate pre-activation values for the hidden layer
        self.z1 = np.dot(X, self.w1) + self.b1
        # Apply sigmoid activation to hidden layer
        self.a1 = self.sigmoid(self.z1)
        # Calculate pre-activation values for the output layer
        self.z2 = np.dot(self.a1, self.w2) + self.b2
        # Apply sigmoid activation to output layer
        self.output = self.sigmoid(self.z2)
        # Return the final output of the network
        return self.output

    def backward(self, y):
        # Calculate the error at the output layer
        output_error = y - self.output
        # Calculate the gradient of the error with respect to output activations
        output_delta = output_error * self.sigmoid_derivative(self.output)
        # Calculate the error propagated to the hidden layer
        a1_error = np.dot(output_delta, self.w2.T)
        # Calculate the gradient of the error with respect to hidden activations
        a1_delta = a1_error * self.sigmoid_derivative(self.a1)

        # Update weights between hidden and output layer using gradients
        self.w2 += np.dot(self.a1.T, output_delta) * 0.01
        # Update biases for the output layer using gradients
        self.b2 += np.sum(output_delta, axis=0, keepdims=True) * 0.01
        # Update weights between input and hidden layer using gradients
        self.w1 += np.dot(self.X.T, a1_delta) * 0.01
        # Update biases for the hidden layer using gradients
        self.b1 += np.sum(a1_delta, axis=0, keepdims=True) * 0.01

    def train(self, X, y, epochs):
        for epoch in range(epochs):  # Iterate through the specified number of epochs
            self.forward(X)          # Perform forward propagation for the input data
            self.backward(y)         # Perform backward propagation to update weights


In [3]:
import numpy as np

class CNN:
    def __init__(self, input_shape, num_classes, kernel_size=3, num_kernels=8, learning_rate=0.01):
        # Initialize CNN parameters
        self.input_shape = input_shape  # Shape of input images (height, width)
        self.num_classes = num_classes  # Number of output classes
        self.kernel_size = kernel_size  # Size of convolutional filters (e.g., 3x3)
        self.num_kernels = num_kernels  # Number of convolutional filters
        self.learning_rate = learning_rate  # Learning rate for gradient updates

        # Initialize convolution kernels with random values
        self.kernels = np.random.rand(num_kernels, kernel_size, kernel_size) - 0.5
        # Calculate the size of the flattened convolution output
        flattened_size = num_kernels * ((input_shape[0] - kernel_size + 1) *
                                        (input_shape[1] - kernel_size + 1))
        # Initialize weights for fully connected layer
        self.weights = np.random.rand(flattened_size, num_classes) - 0.5
        # Initialize biases for the fully connected layer
        self.bias = np.zeros((1, num_classes))

    def relu(self, x):
        # Apply ReLU activation function
        return np.maximum(0, x)

    def relu_derivative(self, x):
        # Compute derivative of ReLU for backpropagation
        return (x > 0).astype(float)

    def softmax(self, x):
        # Apply softmax activation function for output probabilities
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stabilize with max subtraction
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)  # Normalize to probabilities

    def convolve(self, x, kernel):
        # Perform valid convolution on the input image with a kernel
        output_dim = x.shape[0] - kernel.shape[0] + 1  # Calculate output size
        convolved = np.zeros((output_dim, output_dim))  # Initialize output matrix
        for i in range(output_dim):  # Slide kernel over rows
            for j in range(output_dim):  # Slide kernel over columns
                convolved[i, j] = np.sum(x[i:i+kernel.shape[0], j:j+kernel.shape[1]] * kernel)
                # Compute the sum of element-wise multiplication
        return convolved

    def forward(self, x):
        # Forward pass through the network
        # Apply convolution with all kernels
        self.convolved_outputs = np.array([self.convolve(x[0], kernel) for kernel in self.kernels])
        self.feature_map_shape = self.convolved_outputs.shape  # Save shape for backpropagation

        # Apply ReLU activation
        self.convolved_outputs = self.relu(self.convolved_outputs)

        # Flatten feature maps for the fully connected layer
        fc_input = self.convolved_outputs.flatten().reshape(1, -1)

        # Compute class scores and probabilities using softmax
        self.output = self.softmax(np.dot(fc_input, self.weights) + self.bias)

        return self.output  # Return predicted probabilities

    def backward(self, x, y):
        # Compute gradients and update weights
        m = y.shape[0]  # Number of samples in the batch

        # Compute error at the output layer
        output_error = self.output - y

        # Compute gradients for the fully connected layer weights and biases
        fc_input = self.convolved_outputs.flatten().reshape(1, -1)
        self.fc_weights_gradient = np.dot(fc_input.T, output_error) / m  # Gradient for weights
        self.fc_bias_gradient = np.sum(output_error, axis=0, keepdims=True) / m  # Gradient for biases

        # Propagate error back to the feature maps
        flattened_error = np.dot(output_error, self.weights.T)  # Backpropagate to the flattened layer
        feature_map_error = flattened_error.reshape(self.feature_map_shape)  # Reshape to feature map dimensions

        # Apply ReLU derivative to feature map error
        relu_error = feature_map_error * self.relu_derivative(self.convolved_outputs)

        # Compute gradients for convolutional kernels
        self.conv_weights_gradient = np.zeros_like(self.kernels)
        input_image = x[0]  # Assuming a single input image
        for i, kernel in enumerate(self.kernels):  # Iterate over each kernel
            for j in range(relu_error.shape[1]):  # Iterate over rows of error map
                for k in range(relu_error.shape[2]):  # Iterate over columns of error map
                    # Update gradient for the current kernel
                    self.conv_weights_gradient[i] += (
                        relu_error[i, j, k] * input_image[j:j+self.kernel_size, k:k+self.kernel_size]
                    )

        # Normalize convolution kernel gradients
        self.conv_weights_gradient /= m

        # Update weights and biases with gradients
        self.weights -= self.learning_rate * self.fc_weights_gradient
        self.bias -= self.learning_rate * self.fc_bias_gradient
        self.kernels -= self.learning_rate * self.conv_weights_gradient

    def train(self, x, y, epochs=10):
        # Train the CNN for the specified number of epochs
        for epoch in range(epochs):  # Iterate over epochs
            output = self.forward(x)  # Perform forward pass
            loss = -np.sum(y * np.log(output)) / y.shape[0]  # Compute cross-entropy loss
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")  # Print loss for monitoring
            self.backward(x, y)  # Perform backward pass and update weights


In [8]:
# **Prepare data for BPN**
x_train_flat = x_train.reshape(x_train.shape[0], -1)
 # Flatten the training images into 1D arrays
x_test_flat = x_test.reshape(x_test.shape[0], -1)
 # Flatten the testing images into 1D arrays

# **Train BPN**
bpn = BPN(input_size=x_train_flat.shape[1], hidden_size=64, output_size=10)
 # Initialize BPN with input size, hidden size, and output size
start_time = time.time()
 # Record start time to measure training duration
bpn.train(x_train_flat[:1000], y_train[:1000], epochs=10)
# Train the BPN using the first 1000 training samples for 10 epochs
bpn_time = time.time() - start_time
 # Calculate the total time taken for BPN training

# **Train CNN**
cnn = CNN(input_shape=x_train[0].shape, num_classes=10)
 # Initialize CNN with input image shape and number of classes
start_time = time.time()
# Record start time to measure training duration
for i in range(1000):
 # Loop through the first 1000 training samples
    cnn.forward(x_train[i:i+1])
     # Perform forward pass for each sample (training step can be extended)
cnn_time = time.time() - start_time
 # Calculate the total time taken for CNN forward passes


In [10]:
print(f"BPN Training Time: {bpn_time:.2f} seconds")
print(f"CNN Training Time: {cnn_time:.2f} seconds")

BPN Training Time: 0.38 seconds
CNN Training Time: 96.84 seconds


In [9]:
# **Accuracy Calculation**
def calculate_accuracy(predictions, labels):
  # Function to calculate accuracy
    predicted_classes = np.argmax(predictions, axis=1)
    # Get class with the highest probability from predictions
    true_classes = np.argmax(labels, axis=1)
     # Get the actual class from labels
    accuracy = np.mean(predicted_classes == true_classes) * 100
     # Calculate the percentage of correct predictions
    return accuracy
     # Return the computed accuracy

# **BPN Accuracy**
bpn_predictions = bpn.forward(x_test_flat[:1000])
# Get BPN predictions for the first 1000 test samples
bpn_accuracy = calculate_accuracy(bpn_predictions, y_test[:1000])
 # Calculate accuracy for BPN predictions
print(f"BPN Accuracy: {bpn_accuracy:.2f}%")
 # Print the BPN accuracy percentage

# **CNN Accuracy**
cnn_predictions = []
 # Initialize a list to store CNN predictions
for i in range(1000):
   # Loop through the first 1000 test samples
    cnn_predictions.append(cnn.forward(x_test[i:i+1]))
     # Get CNN predictions for each test sample
cnn_predictions = np.vstack(cnn_predictions)
 # Combine all predictions into a single array
cnn_accuracy = calculate_accuracy(cnn_predictions, y_test[:1000])
 # Calculate accuracy for CNN predictions
print(f"CNN Accuracy: {cnn_accuracy:.2f}%")
# Print the CNN accuracy percentage


BPN Accuracy: 10.90%
CNN Accuracy: 12.70%
