<a href="https://colab.research.google.com/github/ImmortalizedInHearts/NeuralNetwork/blob/main/123.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [206]:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt

from tqdm import tqdm

In [314]:
mat = scipy.io.loadmat('data.mat')
trainInputs = np.array(mat['data']['test'][0][0][0][0][0])
trainTargets = np.array(mat['data']['test'][0][0][0][0][1])

testInputs = np.array(mat['data']['training'][0][0][0][0][0])
testTargets = np.array(mat['data']['training'][0][0][0][0][1])

validationInputs = np.array(mat['data']['validation'][0][0][0][0][1])
validationTargets = np.array(mat['data']['validation'][0][0][0][0][0])

x_train = trainInputs
y_train = trainTargets

In [320]:
def forward(nn, inputs):
    x = inputs
    for l in nn:
        x = l.forward(x)
    return x

def backward(nn, loss, prediction, target, learning_rate):
    error = loss.forward(prediction, target)
    derivative = loss.backward()

    for l in reversed(nn):
        derivative = l.backward(derivative, learning_rate)

    return nn, error

def train(nn, loss, x, y, learning_rate, num_iter):
    losses = []

    for i in tqdm(range(num_iter)):
        p = forward(nn, x)
        nn, error = backward(nn, loss, p, y, learning_rate)
        losses.append(error)
    
    return nn, losses

def accuracy(nn, x, y):
    p = forward(nn, x)
    return np.sum((np.argmax(p, 0) == np.argmax(y, 0))*1)/p.shape[1]*100

In [321]:
class CrossEntropy:
    def forward(self, prediction, targets):
        loss = -np.sum(targets*np.log(prediction))
        self._derivative = -targets/prediction
        return loss
    
    def backward(self):
        return self._derivative

In [322]:
class BaseActivation:
    def forward(self, x):
        return x

    def backward(self, grad):
        return grad


class Sigmoid(BaseActivation):
    def forward(self, x):
        self._y = 1. / (1. + np.exp(-x))
        return self._y

    def backward(self, grad):
        derivative = self._y * (1 - self._y)
        return derivative * grad


class Softmax(BaseActivation):
    def forward(self, x):
        self._y = np.exp(x) / np.exp(x).sum(axis=1) [:,None]
        return self._y

    def backward(self, grad):
        return self._y * (grad - (grad * self._y).sum(axis=1)[:, None])


class Dense:
    def __init__(self, input_size, output_size, activation=BaseActivation()):
        self._activation = activation
        self._input_size = input_size
        self._output_size = output_size
        self._w = np.random.randn(output_size, input_size) * np.sqrt(2/input_size)
        self._b = np.random.randn(output_size, 1)

    def forward(self, x):
        self._x = x
        z = self._w @ x + self._b
        return self._activation.forward(z)

    def backward(self, grad, learning_rate):
        # z = w*x + b
        # dz/dx = w
        # dz/dw = x
        # dz/db = 1
        
        dy = self._activation.backward(grad)
        
        dx = self._w.T @ dy
        dw = dy @ self._x.T
        db = np.sum(dy, axis=1).reshape((self._output_size, 1))

        self._w -= learning_rate * dw / self._x.shape[1]
        self._b -= learning_rate * db / self._x.shape[1]
 
        return dx

In [323]:
nn = [Dense(256, 64, Sigmoid()), 
      Dense(64, 10,  Softmax())]

loss = CrossEntropy()

learning_rate = 0.75
num_iter = 2000

nn, losses = train(nn, loss, x_train, y_train, learning_rate, num_iter)









  0%|          | 0/2000 [00:00<?, ?it/s][A[A[A[A[A[A





  0%|          | 1/2000 [00:00<03:55,  8.50it/s][A[A[A[A[A[A





  0%|          | 3/2000 [00:00<03:34,  9.31it/s][A[A[A[A[A[A





  0%|          | 4/2000 [00:00<03:31,  9.46it/s][A[A[A[A[A[A





  0%|          | 6/2000 [00:00<03:17, 10.11it/s][A[A[A[A[A[A





  0%|          | 8/2000 [00:00<03:09, 10.51it/s][A[A[A[A[A[A





  0%|          | 10/2000 [00:00<03:01, 10.93it/s][A[A[A[A[A[A





  1%|          | 12/2000 [00:01<02:55, 11.31it/s][A[A[A[A[A[A





  1%|          | 14/2000 [00:01<02:53, 11.47it/s][A[A[A[A[A[A





  1%|          | 16/2000 [00:01<02:51, 11.57it/s][A[A[A[A[A[A





  1%|          | 18/2000 [00:01<02:50, 11.64it/s][A[A[A[A[A[A





  1%|          | 20/2000 [00:01<02:50, 11.63it/s][A[A[A[A[A[A





  1%|          | 22/2000 [00:01<02:49, 11.67it/s][A[A[A[A[A[A





  1%|          | 24/2000 [00:02<02:49, 11.69it/s][A[A[A

KeyboardInterrupt: ignored

In [303]:
plt.plot(losses[10:])
plt.show()

print("accuracy = ", accuracy(nn, x_test, y_test))