<a href="https://colab.research.google.com/github/MLandML/MLandML/blob/main/neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nnfs

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nnfs
  Downloading nnfs-0.5.1-py3-none-any.whl (9.1 kB)
Installing collected packages: nnfs
Successfully installed nnfs-0.5.1


In [None]:
#@title scihou NN using only numpy

import numpy as np
import matplotlib.pyplot as plt
#import pandas as pd
#import tensorflow as tf

import nnfs

from nnfs.datasets import spiral_data

nnfs.init()

np.random.seed(0)

class Layer_Dense:

    def __init__(self, n_inputs, n_outputs):
        self.weights = 0.01*np.random.randn(n_inputs, n_outputs)
        self.biases = np.zeros((1, n_outputs))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights)+self.biases

    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues,self.weights.T)

class Activation_ReLu:
    def forward(self,inputs):
        self.inputs=inputs
        self.output=np.maximum(0,inputs)

    def backward(self,dvalues):
        self.dinputs=dvalues.copy()
        self.dinputs[self.inputs<=0]=0

class Softmax_Activation:
    def forward(self,inputs):
        exp_values=np.exp(inputs-np.max(inputs,axis=1,keepdims=True))
        probabilities=exp_values/np.sum(exp_values,axis=1,keepdims=True)
        self.output=probabilities

    """def backward(self,dvalues):
        self.inputs=np.empty_like(dvalues)

        for index,(single_outputs, single_dvalues) in enumerate(zip(self.output,dvalues)):
            single_ouptuts=single_outputs.reshape(-1,1)
            jacobian_matrix=np.diagflat(single_outputs)-np.dot(single_outputs,single_outputs.T)
            self.dinputs[index]=np.dot(jacobian_matrix,single_dvalues)"""

class Loss:
    def calculate(self,outputs,y):
        sample_loss=self.forward(outputs,y)
        data_loss=np.mean(sample_loss)
        return data_loss

class Categorical_cross_entropy(Loss):
    def forward(self,y_pred,y_true):
        samples=len(y_pred)
        y_pred_clipped=np.clip(y_pred,1e-7,1-1e-7)

        if len(y_true.shape)==1:
            correct_confidences=y_pred_clipped[range(samples),y_true]
        if len(y_true.shape)==2:
            correct_confidences=np.sum(y_pred_clipped*y_true,axis=1)

        neg_loss= -np.log(correct_confidences)
        return neg_loss

    """def backward(self,y_pred,y_true):
        samples = len(y_pred)
        labels = len(samples[0])

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        self.dinputs = -y_true/y_pred
        self.dinputs = self.dinputs/samples"""

"""class Accuracy:
    def calculate(self,y_pred,y_true):
        predictions=np.argmax(y_pred,axis=1)

        if len(y_true.shape)==1:
            self.accuracy=np.mean(predictions==y_true)
        
        if len(y_true.shape)==2:
            class_targets=np.argmax(y_true,axis=1)
            self.accuracy=np.mean(predictions==class_targets)"""
        
class Activation_Softmax_Loss_Categorical_Crossentropy():

    def __init__(self):
        self.activation=Softmax_Activation()
        self.loss=Categorical_cross_entropy()

    def forward(self,inputs,y_true):
        self.activation.forward(inputs)
        self.output=self.activation.output
        return self.loss.calculate(self.output,y_true)

    def backward(self,dvalues,y_true):
        samples=len(dvalues)

        if len(y_true.shape)==2:
            y_true=np.argmax(y_true,axis=1)
        
        self.dinputs=dvalues.copy()
        self.dinputs[range(samples),y_true] -=1
        self.dinputs=self.dinputs/samples

class Optimizer_SGD:
    def __init__(self,learning_rate=1.,decay=0.,momentum=0.):
        self.learning_rate=learning_rate
        self.current_learning_rate=learning_rate
        self.decay=decay
        self.momentum=momentum
        self.iterations=0

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate=self.learning_rate*(1/(1. + self.decay*self.iterations))

    def update_params(self,layer):

        if self.momentum:
            if not hasattr(layer,'weight_momentum'):
                layer.weight_momentum=np.zeros_like(layer.weights)
                layer.bias_momentum=np.zeros_like(layer.biases)
            
            weight_updates= self.momentum * layer.weight_momentum - self.current_learning_rate * layer.dweights
            layer.weight_momentum=weight_updates
            bias_updates= self.momentum * layer.bias_momentum - self.current_learning_rate * layer.dbiases
            layer.bias_momentum=bias_updates
            
        else:
            weight_updates = -self.learning_rate * layer.dweights
            bias_updates = -self.learning_rate * layer.dbiases
        
        layer.weights += weight_updates
        layer.biases += bias_updates

    def post_update_params(self):
        self.iterations +=1

class Optimizer_AdaGrad:
    def __init__(self,learning_rate=1.,decay=0.,epsilon=1e-7):
        self.learning_rate=learning_rate
        self.current_learning_rate=learning_rate
        self.decay=decay
        self.epsilon=epsilon
        self.iterations=0

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate=self.learning_rate*(1./(1. + self.decay*self.iterations))

    def update_params(self,layer):

        if not hasattr(layer,'weight_cache'):
            layer.weight_cache=np.zeros_like(layer.weights)
            layer.bias_cache=np.zeros_like(layer.biases)
        
        layer.weight_cache += layer.dweights **2
        layer.weights += -self.current_learning_rate*layer.dweights /(np.sqrt(layer.weight_cache)+self.epsilon)
        layer.bias_cache += layer.dbiases **2
        layer.biases += -self.current_learning_rate*layer.dbiases /(np.sqrt(layer.bias_cache)+self.epsilon)

    def post_update_params(self):
        self.iterations +=1

class Optimizer_RMSProp:
    def __init__(self,learning_rate=0.001,decay=0.,epsilon=1e-7,rho=0.9):
        self.learning_rate=learning_rate
        self.current_learning_rate=learning_rate
        self.decay=decay
        self.epsilon=epsilon
        self.rho=rho
        self.iterations=0

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate=self.learning_rate*(1./(1. + self.decay*self.iterations))

    def update_params(self,layer):

        if not hasattr(layer,'weight_cache'):
            layer.weight_cache=np.zeros_like(layer.weights)
            layer.bias_cache=np.zeros_like(layer.biases)
        
        layer.weight_cache = self.rho*layer.weight_cache + (1-self.rho)*layer.dweights**2
        layer.weights += -self.current_learning_rate*layer.dweights /(np.sqrt(layer.weight_cache)+self.epsilon)
        layer.bias_cache = self.rho*layer.bias_cache + (1-self.rho)*layer.dbiases**2
        layer.biases += -self.current_learning_rate*layer.dbiases /(np.sqrt(layer.bias_cache)+self.epsilon)

    def post_update_params(self):
        self.iterations +=1

class Optimizer_Adam:
    def __init__(self,learning_rate=0.001,decay=0.,epsilon=1e-7,bias_1=0.9,bias_2=0.999):
        self.learning_rate=learning_rate
        self.current_learning_rate=learning_rate
        self.decay=decay
        self.epsilon=epsilon
        self.bias_1=bias_1
        self.bias_2=bias_2
        self.iterations=0

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate=self.learning_rate*(1./(1. + self.decay*self.iterations))

    def update_params(self,layer):

        if not hasattr(layer,'weight_cache'):
            layer.weight_cache=np.zeros_like(layer.weights)
            layer.weight_momentum=np.zeros_like(layer.weights)
            layer.bias_cache=np.zeros_like(layer.biases)
            layer.bias_momentum=np.zeros_like(layer.biases)
        
        layer.weight_cache = 
        layer.weights += -self.current_learning_rate*layer.dweights /(np.sqrt(layer.weight_cache)+self.epsilon)
        layer.bias_cache = self.rho*layer.bias_cache + (1-self.rho)*layer.dbiases**2
        layer.biases += -self.current_learning_rate*layer.dbiases /(np.sqrt(layer.bias_cache)+self.epsilon)

    def post_update_params(self):
        self.iterations +=1


X,y= spiral_data(samples=100,classes=3)

layer1=Layer_Dense(2,64)
activation1=Activation_ReLu()
layer2=Layer_Dense(64,3)

final_activation_loss=Activation_Softmax_Loss_Categorical_Crossentropy()

optimizer=Optimizer_SGD(learning_rate=1.,decay=1e-3,momentum=0.9)
optimizer2=Optimizer_AdaGrad(decay=1e-4)
optimizer3=Optimizer_RMSProp(learning_rate=0.02,decay=1e-4,rho=0.999)

for epoch in range(10001):

    layer1.forward(X)
    activation1.forward(layer1.output)

    layer2.forward(activation1.output)

    loss=final_activation_loss.forward(layer2.output,y)

    """final_output=Softmax_Activation()
    final_output.forward(layer2.output)

    loss_function=Categorical_cross_entropy()
    loss=loss_function.calculate(final_output.output,y)

    acc=Accuracy()
    acc.calculate(final_activation_loss.output,y)"""

    predictions=np.argmax(final_activation_loss.output,axis=1)
    if len(y.shape)==2:
        y=np.argmax(y,axis=1)
    accuracy=np.mean(predictions==y)

    if not epoch % 100:
        print(f'epoch: {epoch}, '+
              f'acc: {accuracy:.3f}, '+
              f'loss: {loss:.3f}, '+
              f'lr: {optimizer3.current_learning_rate}')
    
    final_activation_loss.backward(final_activation_loss.output,y)
    layer2.backward(final_activation_loss.dinputs)
    activation1.backward(layer2.dinputs)
    layer1.backward(activation1.dinputs)

    optimizer3.pre_update_params()
    optimizer3.update_params(layer2)
    optimizer3.update_params(layer1)
    optimizer3.post_update_params()

"""print(layer1.dweights)
print(layer1.dbiases)
print(layer2.dweights)
print(layer2.dbiases)"""

epoch: 0, acc: 0.360, loss: 1.099, lr: 0.02
epoch: 100, acc: 0.463, loss: 1.015, lr: 0.019803940984255867
epoch: 200, acc: 0.520, loss: 0.963, lr: 0.019609765663300322
epoch: 300, acc: 0.533, loss: 0.908, lr: 0.01941936110301971
epoch: 400, acc: 0.577, loss: 0.857, lr: 0.019232618521011637
epoch: 500, acc: 0.590, loss: 0.825, lr: 0.01904943327935994
epoch: 600, acc: 0.640, loss: 0.769, lr: 0.018869704689121615
epoch: 700, acc: 0.640, loss: 0.749, lr: 0.018693335825778108
epoch: 800, acc: 0.680, loss: 0.704, lr: 0.01852023335494027
epoch: 900, acc: 0.717, loss: 0.662, lr: 0.018350307367648408
epoch: 1000, acc: 0.733, loss: 0.644, lr: 0.018183471224656786
epoch: 1100, acc: 0.743, loss: 0.591, lr: 0.018019641409135957
epoch: 1200, acc: 0.780, loss: 0.588, lr: 0.01785873738726672
epoch: 1300, acc: 0.763, loss: 0.575, lr: 0.017700681476236834
epoch: 1400, acc: 0.750, loss: 0.572, lr: 0.017545398719185895
epoch: 1500, acc: 0.770, loss: 0.536, lr: 0.017392816766675364
epoch: 1600, acc: 0.793,

'print(layer1.dweights)\nprint(layer1.dbiases)\nprint(layer2.dweights)\nprint(layer2.dbiases)'