In [1]:
import numpy as np

# Activation functions
## Sigmoid function
The sigmoid function is used as an activation function because it squashes the output to a probability value between 0 and 1, which is useful when the output is a probability or binary; hence, it is commonly used in binary classification models. The function also allows the network to learn more complex decision bondaries. The formula for the sigmoid function is $$ σ(x) = \frac{1}{1 + e^{-x}}. $$
## Derivative of sigmoid
Back propagation is essential to calculate the grandient of the loss function with respect to the weights and biases in a neural network. It allows the netowrk to effectively learn from its errors and adapt its weights based on the activating functions to update. The backward pass for sigmoid is the deravative of the sigmoid function, which can be mathematically expressed as $$ σ'(x) = σ(x) \cdot \bigl(1 - σ(x)\bigr) $$

In [3]:
class Sigmoid:
    @staticmethod
    def forward(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def backward(x):
        return Sigmoid.forward(x) * (1 - Sigmoid.forward(x))

## Tanh function
The output for the tanh function is symmetric around the origin, which can help learning alorithms converge. This function outperforms the sigmoid function in multi-layer neural networks. The formula for the tanh function can be expressed as $$ tanh(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} $$
## Derivative of tanh
Similarly to the backward pass of the sigmoid function, the backward pass of the tanh function is the derivate of it, which can be expressed as $$tanh'(x){dx} = 1 - tanh(x)^{2} $$

In [5]:
class Tanh:
    @staticmethod
    def forward(x):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

    @staticmethod
    def backward(x):
        return 1 - (Tanh.forward(x) ** 2)

## ReLU function
The ReLU (Rectified Linear Unit) function helps the model learn more complex relationships in data and makes accurate predictions, and it's computationally efficient, due to its non-linearity. The ReLU function can be expressed as 
$$
\text{ReLU}(x) = 
\begin{cases} 
x, & \text{if } x \geq 0 \\ 
0, & \text{if } x < 0 
\end{cases}
$$
## Derivative of ReLU
The backward pass for the relu function can be expressed as 
$$
\text{ReLU}'(x) = 
\begin{cases} 
1, & \text{if } x > 0 \\ 
0, & \text{if } x \leq 0 
\end{cases}
$$

In [7]:
class Relu:
    @staticmethod
    def forward(x):
        return np.maximum(0, x)
    @staticmethod
    def backward(x):
        return (x > 0).astype(float)

# Loss function
## mean Squared error

In [9]:
# loss function and its derivative
class MSE:
    @staticmethod
    def forward(y_true, y_pred):
        return np.mean(np.power(y_true-y_pred, 2));

    @staticmethod
    def backward(y_true, y_pred):
        return 2*(y_pred-y_true)/y_true.size;

## Softmax function
Unlike the sigmoid function, the softmax function is used in multiclass classification tasks: the function converts the output into probabilities, where the probability represents the likelihood of the input being in each class.The softmax function can mathematically be expressed as $$\text{softmax}(z_i) = \frac{e^{z_i}}{\sum_{j=1}^n e^{z_j}}$$
## Derivative of Softmax
$$
softmax'(z_i) = \text{softmax}(z_i) \cdot (\delta_{ik} - \text{softmax}(z_k))
$$

$$
\text{where } \delta_{ik} = 
\begin{cases} 
1, & \text{if } i = k \\ 
0, & \text{if } i \neq k
\end{cases}
$$


In [11]:
class Softmax:
    @staticmethod
    def forward(vector):
        e = np.exp(vector)
        return e / np.sum(e)

    @staticmethod
    def backward(vector, y):
        p = softmax_forward(vector)
        return p - y

# Dropout function
Dropout prevents overfitting and regularises by randomly "dropping" connections between neurons in successive layers when training.

In [13]:
def dropout(X, dropout_rate, training=True):
    if training:
        mask = np.random.rand(*X.shape) < (1 - dropout_rate)
        X = X * mask / (1 - dropout_rate)
    return X

# Implemented Neural network

In [15]:
class Layer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.zeros((1, output_size))

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [16]:
class ActivationLayer:
    def __init__(self, activation):
        self.activation = activation

    def forward_propagation(self, input_data):
        self.input = input_data
        if self.activation == "sigmoid":
            self.output = Sigmoid.forward(self.input)
        elif self.activation == "tanh":
            self.output = Tanh.forward(self.input)
        elif self.activation == "relu":
            self.output = Relu.forward(self.input)

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        if self.activation == "sigmoid":
            return Sigmoid.backward(self.input) * output_error
        elif self.activation == "tanh":
            return Tanh.backward(self.input) * output_error
        elif self.activation == "relu":
            return Relu.backward(self.input) * output_error

In [17]:
import numpy as np

class ConvLayer:
    def __init__(self, num_filters, filter_size, stride=1, padding=0):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.1  # Initialize filters
        self.biases = np.zeros(num_filters)  # Initialize biases

    def forward_propagation(self, input_data):
        # Add padding if needed
        if self.padding > 0:
            input_data = np.pad(input_data, [(self.padding, self.padding), (self.padding, self.padding)], mode='constant')

        self.input = input_data
        input_height, input_width = input_data.shape
        filter_height, filter_width = self.filters.shape[1], self.filters.shape[2]

        # Calculate output dimensions
        output_height = (input_height - filter_height) // self.stride + 1
        output_width = (input_width - filter_width) // self.stride + 1

        self.output = np.zeros((self.num_filters, output_height, output_width))

        for f in range(self.num_filters):
            for i in range(0, input_height - filter_height + 1, self.stride):
                for j in range(0, input_width - filter_width + 1, self.stride):
                    self.output[f, i // self.stride, j // self.stride] = np.sum(input_data[i:i + filter_height, j:j + filter_width] * self.filters[f]) + self.biases[f]

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        filter_height, filter_width = self.filters.shape[1], self.filters.shape[2]
        input_height, input_width = self.input.shape
        output_height, output_width = output_error.shape[1], output_error.shape[2]

        # Initialize gradients
        filter_gradient = np.zeros(self.filters.shape)
        bias_gradient = np.zeros(self.biases.shape)
        input_gradient = np.zeros(self.input.shape)

        for f in range(self.num_filters):
            for i in range(output_height):
                for j in range(output_width):
                    # Compute the gradient for the filter and bias
                    region = self.input[i * self.stride:i * self.stride + filter_height, j * self.stride:j * self.stride + filter_width]
                    filter_gradient[f] += region * output_error[f, i, j]
                    bias_gradient[f] += output_error[f, i, j]

                    # Compute the gradient for the input
                    input_gradient[i * self.stride:i * self.stride + filter_height, j * self.stride:j * self.stride + filter_width] += self.filters[f] * output_error[f, i, j]

        # Update filters and biases
        self.filters -= learning_rate * filter_gradient
        self.biases -= learning_rate * bias_gradient

        return input_gradient


In [18]:
class MaxPoolLayer:
    def __init__(self, pool_size=2, stride=2):
        self.pool_size = pool_size
        self.stride = stride

    def forward_propagation(self, input_data):
        # input_data shape: (batch_size, channels, height, width)
        self.input = input_data
        batch_size, channels, input_height, input_width = input_data.shape

        # Calculate output dimensions
        output_height = (input_height - self.pool_size) // self.stride + 1
        output_width = (input_width - self.pool_size) // self.stride + 1

        # Output shape will be: (batch_size, channels, output_height, output_width)
        self.output = np.zeros((batch_size, channels, output_height, output_width))
        self.indices = np.zeros_like(self.output, dtype=int)

        for b in range(batch_size):
            for c in range(channels):
                for i in range(0, input_height - self.pool_size + 1, self.stride):
                    for j in range(0, input_width - self.pool_size + 1, self.stride):
                        region = input_data[b, c, i:i + self.pool_size, j:j + self.pool_size]
                        self.output[b, c, i // self.stride, j // self.stride] = np.max(region)
                        self.indices[b, c, i // self.stride, j // self.stride] = np.argmax(region)

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        batch_size, channels, output_height, output_width = output_error.shape
        input_gradient = np.zeros(self.input.shape)

        for b in range(batch_size):
            for c in range(channels):
                for i in range(output_height):
                    for j in range(output_width):
                        region_start_i = i * self.stride
                        region_start_j = j * self.stride
                        idx = self.indices[b, c, i, j]

                        # Place the error in the correct location based on the max pool index
                        input_gradient[b, c, region_start_i + idx // self.pool_size, region_start_j + idx % self.pool_size] += output_error[b, c, i, j]

        return input_gradient


In [19]:
class FlattenLayer:
    def forward_propagation(self, input_data):
        self.input_shape = input_data.shape
        self.output = input_data.flatten()
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return output_error.reshape(self.input_shape)

In [20]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []
        self.loss = None

    def add(self, layer):
        self.layers.append(layer)
    

    def fit(self, X, y, learning_rate, epochs):
        dims = len(X)
        for i in range(epochs):
            err = 0
            for j in range(dims):
                output = X[j].reshape(1, -1)
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                err += MSE.forward(y[j], output)
    
                # Backward pass
                error = MSE.backward(y[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

                if j % 200 == 0: print(j)
    
            # Print the error at the end of each epoch
            err /= dims
            print(f"Epoch {i+1}/{epochs} Error: {err:.6f}")




    def predict(self, test):
        dims = len(test)
        results = []

        for i in range(dims):
            print(i)
            output = test[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            results.append(output)

        return results



In [21]:
from keras.datasets import cifar10

In [22]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [23]:
labels = {
    0:"airplane",
    1:"automobile",
    2:"bird",
    3:"cat",
    4:"deer",
    5:"dog",
    6:"frog",
    7:"horse",
    8:"ship",
    9:"truck"
}

In [24]:
x_train = x_train / 255.0
x_test = x_test /255.0

In [25]:
# Create a network instance
net = NeuralNetwork()

# Add convolutional layers, max pooling, and flattening
net.add(Layer(3072, 1024))
net.add(ActivationLayer("relu"))
net.add(Layer(1024, 256))
net.add(ActivationLayer("relu"))
net.add(Layer(256, 64))
net.add(ActivationLayer("relu"))
net.add(Layer(64, 10))
net.add(ActivationLayer("relu"))

# Training the network (fit)
net.fit(x_train, y_train, epochs=24, learning_rate=0.01)


0


  return np.mean(np.power(y_true-y_pred, 2));


KeyboardInterrupt: 

In [None]:
y_pred = net.predict(x_test.flatten())
y_pred