In [1]:
import numpy as np

# Activation functions
## Sigmoid function
The sigmoid function is used as an activation function because it squashes the output to a probability value between 0 and 1, which is useful when the output is a probability or binary; hence, it is commonly used in binary classification models. The function also allows the network to learn more complex decision bondaries. The formula for the sigmoid function is $$ σ(x) = \frac{1}{1 + e^{-x}}. $$
## Derivative of sigmoid
Back propagation is essential to calculate the grandient of the loss function with respect to the weights and biases in a neural network. It allows the netowrk to effectively learn from its errors and adapt its weights based on the activating functions to update. The backward pass for sigmoid is the deravative of the sigmoid function, which can be mathematically expressed as $$ σ'(x) = σ(x) \cdot \bigl(1 - σ(x)\bigr) $$

In [3]:
class Sigmoid:
    @staticmethod
    def forward(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def backward(x):
        return sigmoid_forward(x) * (1 - sigmoid_forward(x))

## Tanh function
The output for the tanh function is symmetric around the origin, which can help learning alorithms converge. This function outperforms the sigmoid function in multi-layer neural networks. The formula for the tanh function can be expressed as $$ tanh(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} $$
## Derivative of tanh
Similarly to the backward pass of the sigmoid function, the backward pass of the tanh function is the derivate of it, which can be expressed as $$tanh'(x){dx} = 1 - tanh(x)^{2} $$

In [5]:
class Tanh:
    @staticmethod
    def forward(x):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

    @staticmethod
    def backward(x):
        return 1 - (forward_tanh(x) ** 2)

## ReLU function
The ReLU (Rectified Linear Unit) function helps the model learn more complex relationships in data and makes accurate predictions, and it's computationally efficient, due to its non-linearity. The ReLU function can be expressed as 
$$
\text{ReLU}(x) = 
\begin{cases} 
x, & \text{if } x \geq 0 \\ 
0, & \text{if } x < 0 
\end{cases}
$$
## Derivative of ReLU
The backward pass for the relu function can be expressed as 
$$
\text{ReLU}'(x) = 
\begin{cases} 
1, & \text{if } x > 0 \\ 
0, & \text{if } x \leq 0 
\end{cases}
$$

In [7]:
class Relu:
    @staticmethod
    def forward(x):
        return np.maximum(0, x)
    @staticmethod
    def backward(x):
        return (x > 0).astype(float)

# Loss function
## mean Squared error

In [9]:
# loss function and its derivative
class MSE:
    @staticmethod
    def forward(y_true, y_pred):
        return np.mean(np.power(y_true-y_pred, 2));

    @staticmethod
    def backward(y_true, y_pred):
        return 2*(y_pred-y_true)/y_true.size;

## Softmax function
Unlike the sigmoid function, the softmax function is used in multiclass classification tasks: the function converts the output into probabilities, where the probability represents the likelihood of the input being in each class.The softmax function can mathematically be expressed as $$\text{softmax}(z_i) = \frac{e^{z_i}}{\sum_{j=1}^n e^{z_j}}$$
## Derivative of Softmax
$$
softmax'(z_i) = \text{softmax}(z_i) \cdot (\delta_{ik} - \text{softmax}(z_k))
$$

$$
\text{where } \delta_{ik} = 
\begin{cases} 
1, & \text{if } i = k \\ 
0, & \text{if } i \neq k
\end{cases}
$$


In [11]:
class Softmax:
    @staticmethod
    def softmax_forward(vector):
        e = np.exp(vector)
        return e / np.sum(e)

    @staticmethod
    def softmax_backward(vector, y):
        p = softmax_forward(vector)
        return p - y

# Dropout function
Dropout prevents overfitting and regularises by randomly "dropping" connections between neurons in successive layers when training.

In [13]:
def dropout(X, dropout_rate, training=True):
    if training:
        mask = np.random.rand(*X.shape) < (1 - dropout_rate)
        X = X * mask / (1 - dropout_rate)
    return X

# Implemented Neural network

In [15]:
class Layer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.zeros((1, output_size))

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [16]:
class ActivationLayer:
    def __init__(self, activation):
        self.activation = activation

    def forward_propagation(self, input_data):
        self.input = input_data
        if self.activation == "sigmoid":
            self.output = Sigmoid.forward(self.input)
        elif self.activation == "tanh":
            self.output = Tanh.forward(self.input)
        elif self.activation == "relu":
            self.output = Relu.forward(self.input)

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        if self.activation == "sigmoid":
            return Sigmoid.backward(self.input) * output_error
        elif self.activation == "tanh":
            return Tanh.backward(self.input) * output_error
        elif self.activation == "relu":
            return Relu.backward(self.input) * output_error

In [33]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []
        self.loss = None

    def add(self, layer):
        self.layers.append(layer)
    

    def fit(self, X, y, learning_rate, epochs):
        dims = len(X)
        for i in range(epochs):
            err = 0
            for j in range(dims):
                output = X[j].reshape(1, -1)
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                err += MSE.forward(y[j], output)
    
                # Backward pass
                error = MSE.backward(y[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

                if j % 200 == 0: print(j)
    
            # Print the error at the end of each epoch
            err /= dims
            print(f"Epoch {i+1}/{epochs} Error: {err:.6f}")




    def predict(self, test):
        dims = len(test)
        results = []

        for i in range(dims):
            output = test[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            results.append(output)

        return results



In [18]:
from keras.datasets import mnist

In [19]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [20]:
labels = {
    0:"airplane",
    1:"automobile",
    2:"bird",
    3:"cat",
    4:"deer",
    5:"dog",
    6:"frog",
    7:"horse",
    8:"ship",
    9:"truck"
}

In [21]:
x_train = x_train / 255.0
x_test = x_test /255.0
y_train_onehot = np.eye(10)[y_train]

In [22]:
x = x_train.reshape(x_train.shape[0], -1)

In [23]:
x.shape

(60000, 784)

In [35]:
net = NeuralNetwork()

net.add(Layer(784, 512))
net.add(ActivationLayer("relu"))
net.add(Layer(512, 128))
net.add(ActivationLayer("relu"))
net.add(Layer(128, 16))
net.add(ActivationLayer("relu"))
net.add(Layer(16, 10))

net.fit(x, y_train_onehot, epochs=5, learning_rate=0.01)

0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400
14600
14800
15000
15200
15400
15600
15800
16000
16200
16400
16600
16800
17000
17200
17400
17600
17800
18000
18200
18400
18600
18800
19000
19200
19400
19600
19800
20000
20200
20400
20600
20800
21000
21200
21400
21600
21800
22000
22200
22400
22600
22800
23000
23200
23400
23600
23800
24000
24200
24400
24600
24800
25000
25200
25400
25600
25800
26000
26200
26400
26600
26800
27000
27200
27400
27600
27800
28000
28200
28400
28600
28800
29000
29200
29400
29600
29800
30000
30200
30400
30600
30800
31000
31200
31400
31600
31800
32000
32200
32400
32600
32800
33000
33200
33400
33600
33800
34000
34200
34400
34600
34800
35000
3

In [36]:
test = x_test.reshape(x_test.shape[0], -1)

In [49]:
y_pred = net.predict(test)

In [57]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.0958

In [55]:
y_pred = [np.argmax(i[0]) for i in y_pred]

IndexError: invalid index to scalar variable.

In [53]:
y_pred

[6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
