In [1]:
from sklearn.datasets import load_digits #import the dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


digits = load_digits()

data = digits.data 
old_labels = digits.target  

labels = np.zeros((data.shape[0], 10))
labels[np.arange(data.shape[0]), old_labels] = 1


train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=41)

scale = StandardScaler()
scale.fit_transform(train_data)
scale.transform(test_data)


def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_prime(x):
    sig = sigmoid(x)
    return sig * (1-sig)


def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    return np.where(x > 0, 1, 0)


def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return  1 - np.tanh(x)**2





In [2]:
class net():
    def __init__(self, act, act_prime, alpha) -> None:
        self.act  = act
        self.dact = act_prime
        self.w_1 = np.random.randn(64,30) * np.sqrt(2/64)
        self.w_2 = np.random.randn(30,10) * np.sqrt(2/30)

        self.b_1, self.b_2 = np.zeros((1,30)), np.zeros((1,10))
        self.alpha = alpha


    def set_input(self, x):
        self.A_0 = x

    def forward_1(self):
        self.Z_1 = (self.A_0 @ self.w_1) + self.b_1
        self.A_1 = self.act(self.Z_1)
        self.Z_2 = (self.A_1 @ self.w_2) + self.b_2
        self.A_2 = self.act(self.Z_2)

    def loss(self, label):
        return (self.A_2 - label)**2

    def loss_prime(self, label):
        return 2*(self.A_2 - label)

    def grad(self, label):
        dE_dA2 = self.loss_prime(label)
        dA2_dZ2 = self.dact(self.Z_2)
        dZ2_dW2 = self.A_1.T
        self.dE_dW2 = dZ2_dW2 @ (dE_dA2 * dA2_dZ2)
        self.dE_dB2 = np.sum(dE_dA2 * dA2_dZ2, axis=0) 

        dZ2_A1 = self.w_2
        dA1_Z1 = self.dact(self.Z_1)
        dZ1_dW1 = self.A_0

        self.dE_dW1 = dZ1_dW1.T @ (((dE_dA2 * dA2_dZ2) @ dZ2_A1.T) * dA1_Z1)
        self.dE_dB1 = np.sum((((dE_dA2 * dA2_dZ2) @ dZ2_A1.T) * dA1_Z1), axis=0)

    def update(self, label):
        scalar = label.shape[0]
        self.w_2 = self.w_2 - (self.alpha * self.dE_dW2/scalar)
        self.w_1 = self.w_1 - (self.alpha * self.dE_dW1/scalar)

        self.b_2 = self.b_2 - (self.alpha * self.dE_dB2/scalar)
        self.b_1 = self.b_1 - (self.alpha * self.dE_dB1/scalar)
        

#    def train(self, x, y):
##        indices = np.random.permutation(x.shape[0])
##        x = x[indices]
##        y = y[indices]
##

    def train(self, x, y):
        self.set_input(x)
        self.forward_1()
        self.grad(y)
        self.update(y)

    def test(self, x, y):
        self.set_input(x)
        self.forward_1()
        predicted_labels = np.argmax(self.A_2, axis=1)
        true_labels = np.argmax(y, axis=1)
        accuracy = accuracy_score(predicted_labels, true_labels)
        return accuracy,np.sum(self.loss(y))



network = net(tanh,tanh_prime,0.0001)
#for epoch in range(100):
#    for i in range(30):
#        network.train(train_data, train_labels)
#    network.test(test_data, test_labels)

    



In [11]:
network = net(sigmoid,sigmoid_prime,0.001)
for epoch in range(100):
    for i in range(1000):
        network.train(train_data, train_labels)
        
    acc, loss = network.test(test_data, test_labels)
    if (epoch % 10) == 0:
        print("Epoch:", epoch)
        print("    Accuracy:" , acc*100)
        print("    Test loss: ", loss)
        

Epoch: 0
    Accuracy: 11.11111111111111
    Test loss:  339.3347044250885
Epoch: 10
    Accuracy: 47.5
    Test loss:  278.7822843307516
Epoch: 20
    Accuracy: 70.83333333333334
    Test loss:  227.11885301485887
Epoch: 30
    Accuracy: 81.66666666666667
    Test loss:  182.75193861037113
Epoch: 40
    Accuracy: 89.16666666666667
    Test loss:  145.1408939270961
Epoch: 50
    Accuracy: 92.5
    Test loss:  118.00254536085657
Epoch: 60
    Accuracy: 93.33333333333333
    Test loss:  99.19928607938768
Epoch: 70
    Accuracy: 94.16666666666667
    Test loss:  85.70591232754226
Epoch: 80
    Accuracy: 94.16666666666667
    Test loss:  75.6481006858996
Epoch: 90
    Accuracy: 94.44444444444444
    Test loss:  67.93435533019945


In [None]:
network = net(relu,relu_prime,0.00001)
for epoch in range(1000):
    for i in range(1000):
        network.train(train_data, train_labels)
        
    acc, loss = network.test(test_data, test_labels)
    if (epoch % 10) == 0:
        print("Epoch:", epoch)
        print("    Accuracy:" , acc*100)
        print("    Test loss: ", loss)
    if acc > 85:
        break
        

Epoch: 0
    Accuracy: 10.555555555555555
    Test loss:  464.5288966466875
Epoch: 10
    Accuracy: 10.555555555555555
    Test loss:  362.5962603802849


In [34]:
network = net(tanh,tanh_prime,0.0001)
for epoch in range(5000):
    for i in range(1000):
        network.train(train_data, train_labels)
        
    acc, loss = network.test(test_data, test_labels)
    if (epoch % 10) == 0:
        print("Epoch:", epoch)
        print("    Accuracy:" , acc*100)
        print("    Test loss: ", loss)
    if acc > 85:
        break
        

Epoch: 0
    Accuracy: 9.444444444444445
    Test loss:  1386.7519494203993
Epoch: 10
    Accuracy: 42.5
    Test loss:  337.4819823622838
Epoch: 20
    Accuracy: 49.44444444444444
    Test loss:  272.3192676820024
Epoch: 30
    Accuracy: 55.00000000000001
    Test loss:  247.6054819100758
Epoch: 40
    Accuracy: 59.166666666666664
    Test loss:  231.02634755147244
Epoch: 50
    Accuracy: 58.611111111111114
    Test loss:  220.09287351774185
Epoch: 60
    Accuracy: 60.27777777777777
    Test loss:  209.7743922439231
Epoch: 70
    Accuracy: 60.83333333333333
    Test loss:  201.56638445137474
Epoch: 80
    Accuracy: 63.33333333333333
    Test loss:  190.44421713957027
Epoch: 90
    Accuracy: 66.11111111111111
    Test loss:  179.76150859740494
Epoch: 100
    Accuracy: 66.66666666666666
    Test loss:  172.31939417009391
Epoch: 110
    Accuracy: 68.33333333333333
    Test loss:  164.76345148631032
Epoch: 120
    Accuracy: 73.05555555555556
    Test loss:  152.69841366462555
Epoch: 130
 

KeyboardInterrupt: 