In [90]:
import numpy as np

# Activation functions
## Sigmoid function
The sigmoid function is used as an activation function because it squashes the output to a probability value between 0 and 1, which is useful when the output is a probability or binary; hence, it is commonly used in binary classification models. The function also allows the network to learn more complex decision bondaries. The formula for the sigmoid function is $$ σ(x) = \frac{1}{1 + e^{-x}}. $$
## Derivative of sigmoid
Back propagation is essential to calculate the grandient of the loss function with respect to the weights and biases in a neural network. It allows the netowrk to effectively learn from its errors and adapt its weights based on the activating functions to update. The backward pass for sigmoid is the deravative of the sigmoid function, which can be mathematically expressed as $$ σ'(x) = σ(x) \cdot \bigl(1 - σ(x)\bigr) $$

In [3]:
class Sigmoid:
    def forward(x):
        return 1 / (1 + np.exp(-x))
    def backward(x):
        return sigmoid_forward(x) * (1 - sigmoid_forward(x))

## Tanh function
The output for the tanh function is symmetric around the origin, which can help learning alorithms converge. This function outperforms the sigmoid function in multi-layer neural networks. The formula for the tanh function can be expressed as $$ tanh(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} $$
## Derivative of tanh
Similarly to the backward pass of the sigmoid function, the backward pass of the tanh function is the derivate of it, which can be expressed as $$tanh'(x){dx} = 1 - tanh(x)^{2} $$

In [5]:
class Tanh:
    def forward(x):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
    
    def backward(x):
        return 1 - (forward_tanh(x) ** 2)

## ReLU function
The ReLU (Rectified Linear Unit) function helps the model learn more complex relationships in data and makes accurate predictions, and it's computationally efficient, due to its non-linearity. The ReLU function can be expressed as 
$$
\text{ReLU}(x) = 
\begin{cases} 
x, & \text{if } x \geq 0 \\ 
0, & \text{if } x < 0 
\end{cases}
$$
## Derivative of ReLU
The backward pass for the relu function can be expressed as 
$$
\text{ReLU}'(x) = 
\begin{cases} 
1, & \text{if } x > 0 \\ 
0, & \text{if } x \leq 0 
\end{cases}
$$

In [7]:
class Relu:
    def forward(x):
        return np.maximum(0, x)
        
    def backward(x):
        return 1 if x > 0 else 0

# Loss function
## mean Squared error

In [9]:
# loss function and its derivative
class MSE:
    def forward(self, y_true, y_pred):
        return np.mean(np.power(y_true-y_pred, 2));
    
    def backward(self, y_true, y_pred):
        return 2*(y_pred-y_true)/y_true.size;

## Softmax function
Unlike the sigmoid function, the softmax function is used in multiclass classification tasks: the function converts the output into probabilities, where the probability represents the likelihood of the input being in each class.The softmax function can mathematically be expressed as $$\text{softmax}(z_i) = \frac{e^{z_i}}{\sum_{j=1}^n e^{z_j}}$$
## Derivative of Softmax
$$
softmax'(z_i) = \text{softmax}(z_i) \cdot (\delta_{ik} - \text{softmax}(z_k))
$$

$$
\text{where } \delta_{ik} = 
\begin{cases} 
1, & \text{if } i = k \\ 
0, & \text{if } i \neq k
\end{cases}
$$


In [11]:
class Softmax:
    def softmax_forward(vector):
        e = np.exp(vector)
        return e / np.sum(e)
    def softmax_backward(vector, y):
        p = softmax_forward(vector)
        return p - y

# Dropout function
Dropout prevents overfitting and regularises by randomly "dropping" connections between neurons in successive layers when training.

In [13]:
def dropout(X, dropout_rate, training=True):
    if training:
        mask = np.random.rand(*X.shape) < (1 - dropout_rate)
        X = X * mask / (1 - dropout_rate)
    return X

# Implemented Neural network

In [154]:
class Layer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [25]:
class ActivationLayer:
    def __init__(self, activation):
        self.activation = activation

    def forward_propagation(self, input_data):
        self.input = input_data
        if self.activation == "sigmoid":
            self.output = Sigmoid.forward(self.input)
        elif self.activation == "tanh":
            self.output = Tanh.forward(self.input)
        elif self.activation == "relu":
            self.output = Relu.forward(self.input)

    def backward_propagation(self, output_error, learning_rate):
        if self.activation == "sigmoid":
            return Sigmoid.backward(self.input) * output_error
        elif self.activation == "tanh":
            return Tanh.backward(self.input) * output_error
        elif self.activation == "relu":
            return Relu.backward(self.input) * output_error

In [196]:
class Conv2DLayer:
    def __init__(self, num_filters, kernel_size, input_shape):
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.input_shape = input_shape
        self.filters = np.random.randn(num_filters, kernel_size, kernel_size, input_shape[2]) / kernel_size**2
        self.biases = np.zeros((num_filters, 1))

    def forward_propagation(self, input_data):
        self.input = input_data
        print(input_data)
        h, w, c = input_data.shape
        out_h = h - self.kernel_size + 1
        out_w = w - self.kernel_size + 1
        self.output = np.zeros((out_h, out_w, self.num_filters))

        for f in range(self.num_filters):
            for i in range(out_h):
                for j in range(out_w):
                    region = input_data[i:i+self.kernel_size, j:j+self.kernel_size, :]
                    self.output[i, j, f] = np.sum(region * self.filters[f]) + self.biases[f]
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        d_filters = np.zeros_like(self.filters)
        d_biases = np.zeros_like(self.biases)
        d_input = np.zeros_like(self.input)

        out_h, out_w, _ = output_error.shape
        for f in range(self.num_filters):
            for i in range(out_h):
                for j in range(out_w):
                    region = self.input[i:i+self.kernel_size, j:j+self.kernel_size, :]
                    d_filters[f] += output_error[i, j, f] * region
                    d_input[i:i+self.kernel_size, j:j+self.kernel_size, :] += output_error[i, j, f] * self.filters[f]
            d_biases[f] = np.sum(output_error[:, :, f])

        self.filters -= learning_rate * d_filters
        self.biases -= learning_rate * d_biases
        return d_input


In [188]:
class MaxPoolingLayer:
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        print(input_data)
        h, w = input_data.shape
        out_h = h // self.pool_size
        out_w = w // self.pool_size
        self.output = np.zeros((out_h, out_w))

        for i in range(out_h):
            for j in range(out_w):
                region = input_data[i*self.pool_size:(i+1)*self.pool_size,
                                    j*self.pool_size:(j+1)*self.pool_size]
                self.output[i, j] = np.max(region)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        d_input = np.zeros_like(self.input)
        out_h, out_w = output_error.shape
        for i in range(out_h):
            for j in range(out_w):
                region = self.input[i*self.pool_size:(i+1)*self.pool_size,
                                    j*self.pool_size:(j+1)*self.pool_size]
                max_val = np.max(region)
                for x in range(self.pool_size):
                    for y in range(self.pool_size):
                        if region[x, y] == max_val:
                            d_input[i*self.pool_size + x, j*self.pool_size + y] = output_error[i, j]
        return d_input


In [160]:
class FlattenLayer:
    def forward_propagation(self, input_data):
        self.input_shape = input_data.shape
        return input_data.flatten()

    def backward_propagation(self, output_error, learning_rate):
        return output_error.reshape(self.input_shape)


In [122]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []
        self.loss = None

    def add(self, layer):
        self.layers.append(layer)
    

    def fit(self, X, y, learning_rate, epochs):
        dims = len(X)

        for i in range(epochs):
            err = 0
            for j in range(dims):
                output = X[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                err += MSE.forward(y[j], output)

                error += MSE.backward(y[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

                err /= dims
                print('epoch %d/%d   error=%f' % (i+1, epochs, err))

    def predict(self, test):
        dims = len(test)
        results = []

        for i in range(dims):
            output = test[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            results.append(output)

        return results



In [200]:
from keras.datasets import mnist

In [202]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [32]:
labels = {
    0:"airplane",
    1:"automobile",
    2:"bird",
    3:"cat",
    4:"deer",
    5:"dog",
    6:"frog",
    7:"horse",
    8:"ship",
    9:"truck"
}

In [204]:
x_train = x_train / 255.0
x_test = x_test /255.0

In [212]:
x = np.array([i.flatten() for i in x_train])

In [216]:
x.shape

(60000, 784)

In [198]:
net = NeuralNetwork()

net.add(Layer(784, 512))
net.add(ActivationLayer("relu"))
net.add(Layer(512, 256))
net.add(ActivationLayer("relu"))
net.add(Layer(256, 128))
net.add(ActivationLayer("relu"))
net.add(Layer(128, 64))
net.add(ActivationLayer("relu"))
net.add(Layer(64, 32))
net.add(ActivationLayer("relu"))
net.add(Layer(32, 16))
net.add(ActivationLayer("relu"))
net.add(Layer(16, 10))

net.fit(x, y_train, epochs=100, learning_rate=0.01)

ValueError: too many values to unpack (expected 2)

In [104]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
model = Sequential()

model.add(Conv2D(16, (3, 3), activation="relu", input_shape=(32, 32, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))


model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(10, activation="softmax"))

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, epochs=15)

Epoch 1/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.2918 - loss: 1.8766
Epoch 2/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.5134 - loss: 1.3344
Epoch 3/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.5785 - loss: 1.1750
Epoch 4/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6241 - loss: 1.0550
Epoch 5/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6487 - loss: 0.9889
Epoch 6/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6782 - loss: 0.9150
Epoch 7/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6982 - loss: 0.8546
Epoch 8/15
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.7119 - loss: 0.8181
Epoch 9/15
[1m1563/156

<keras.src.callbacks.history.History at 0x11de1e04530>