In [None]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

class dense_layer:
    def __init__(self,n_input,neuron):
        self.weight = 0.1*np.random.randn(n_input,neuron)
        self.biases = np.zeros((1,neuron))
        
    def forward(self,input):
        output = np.dot(input,self.weight)+self.biases
        return output
    
class Relu:
    def forward(self,input):
            return np.maximum(0,input)

class Softmax:
    def forward(self,input):
        exp = np.exp(input - np.max(input, axis=1,keepdims=True))
        output = exp/np.sum(exp,axis=1,keepdims=True) 
        return output

x,y = spiral_data(classes=3,samples=100)

first_layer = dense_layer(2,3)
output1 = first_layer.forward(x)

activation = Relu()
activation_result = activation.forward(output1)

second_layer = dense_layer(3,3)
output2 = second_layer.forward(activation_result)

SM = Softmax()
final_result = SM.forward(output2)

print(final_result[:5])
len(final_result.shape)


[[0.33333334 0.33333334 0.33333334]
 [0.33331734 0.3333183  0.33336434]
 [0.3332888  0.33329153 0.33341965]
 [0.33325943 0.33326396 0.33347666]
 [0.33323312 0.33323926 0.33352762]]


300

In [None]:
class CROSS_ENTROPY:
    def forward(self,target,input):
        samples = len(input)
        clipping = np.clip(input,1e-7,1 - 1e-7)
        if len(input.shape) == 1:
            confidence = clipping[range(samples),target]
            
        else:
            confidence_matrix = clipping * target
            confidence = np.sum(confidence_matrix,keepdims=True,axis=1)
        
        loss = -np.log(confidence)
        
        return loss
            

In [None]:
import numpy as np 

class dense_layer:
    def __init__(self,n_input,neurons):
        self.weights = 0.01*np.random.randn(n_input,neurons)
        self.bias = np.zeros((1,neurons))
    
    def forward(self,inputs):
        self.inputs = inputs
        self.output = np.dot(self.inputs,self.weights)+self.bias
        
    def backward(self,dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)
       
class Relu :
    def forward(self,input):
        self.input = input
        self.output = np.maximum(0,input)
        
    def backward(self,dvalue):
        self.dinput = dvalue.copy()
        self.dinput[self.input<=0] = 0
        
        
class Softmax:
    def forward(self,input):
        exp = np.exp(input - np.max(input,axis=1,keepdims=True))
        prob = exp/np.sum(exp,axis = 1,keepdims=True)
        return prob
    
class Loss:
    def calculate(self,y_pred,y_true):
        loss = self.forward(y_pred,y_true)
        total_loss = np.mean(loss)
        return total_loss

class Categorical_CE(Loss):
    def forward(self,y_pred,y_true):
        
        y_clip = np.clip(y_pred,1e-7,1-1e-7)
        if len(y_true.shape) == 2:
            correct_confidence = np.sum(y_clip * y_true,axis = 1)
            
        if len(y_true.shape) == 1:
            correct_confidence = y_clip[range(len(y_pred)),y_true ]
            
        negative_likhelihood = -np.log(correct_confidence)
        
        return negative_likhelihood
    
class Softmax_and_Loss_activation:
    def __init__(self):
        self.softmax = Softmax()
        self.loss = Categorical_CE()
        
    def forward(self,input,y):
        self.y_true = y
        activation = self.softmax.forward(input)
        self.output = activation
        return self.loss.calculate(self.output,self.y_true)
    
    def backward(self,dvalues):
        samples = len(dvalues)
        if len(self.y_true.shape)==2:
            self.y_true = np.argmax(self.y_true,axis = 1)
        self.dinput = dvalues.copy()
        self.dinput[range(samples),self.y_true]-=1
        self.dinput = self.dinput/samples
        
class Optimizer_SGD:
    def __init__(self,learning_rate=1.,decay=0.,momentum = 0,epsilon=1e-7):
        self.lr = learning_rate
        self.current_learning = learning_rate
        self.decay = decay
        self.itteration = 0
        self.momentum = momentum
        self.epsilon = epsilon
        
    def preupdate(self):
        if self.decay:
            self.current_learning = self.lr / (1 + (self.decay * self.itteration))
    def update(self,layer):
        if self.momentum :

            if not hasattr(layer, 'weight_momentums'):
                layer.weight_momentums = np.zeros_like(layer.weights)
                layer.bias_momentums = np.zeros_like(layer.bias)
                
            weight_update = -(self.current_learning *layer.dweights )+(layer.weight_momentums * self.momentum) #direction
            layer.weight_momentums = weight_update 
            bias_update = -(self.current_learning *layer.dbiases )+(layer.bias_momentums * self.momentum)
            layer.bias_momentums = bias_update  
            
        else:    
            weight_update = -self.current_learning *layer.dweights
            bias_update   = -self.current_learning * layer.dbiases
            
        layer.weights += weight_update
        layer.bias += bias_update
    
    def postupdate(self):
        self.itteration+=1

        
class Optimizer_ADAGRAD:
    def __init__(self,layer,epsilon=1e-7,decay=0.,lr=1):
        self.lr = lr
        self.epsilon = epsilon
        self.decay = decay
        self.current_lr = lr
        self.itteration = 0
    def preupdate(self):
        self.current_lr = self.lr / (1 + (self.decay * self.itteration))
    
    def update(self,layer):
        if not hasattr(layer,'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.bias)
                
            layer.weight_cache += layer.dweights**2
            layer.bias_cache += layer.dbiases**2
            
            layer.weights += -self.current_learning * layer.dweights / (np.sqrt(layer.weight_cache) + self.epsilon)
            layer.bias += -self.current_learning * layer.dbiases / (np.sqrt(layer.bias_cache) + self.epsilon)    
        
        
    def postupdate(self):
        self.itteration+=1
            


In [26]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

x,y = spiral_data(classes=3,samples=300)

layer1 = dense_layer(2,64)
activation1 = Relu()
layer2 = dense_layer(64,3)
Softmaxandloss = Softmax_and_Loss_activation()
optimizer = Optimizer_SGD(decay=1e-3)

for epoch in range(10001):
    layer1.forward(x)

    activation1.forward(layer1.output)

    layer2.forward(activation1.output)

    loss = Softmaxandloss.forward(layer2.output,y)

    prediction = np.argmax(Softmaxandloss.output,axis=1)
    if len(y.shape)==2:
        y = np.argmax(y,axis = 1)
    accuracy = np.mean(prediction==y)

    if not epoch % 100:
        print(f'epoch: {epoch}, '+
            f'acc: {accuracy:.3f} '+
            f'loss : {loss:.3f}')


    Softmaxandloss.backward(Softmaxandloss.output)
    layer2.backward(Softmaxandloss.dinput)
    activation1.backward(layer2.dinputs)
    layer1.backward(activation1.dinput)
    
    optimizer.preupdate()
    optimizer.update(layer1)
    optimizer.update(layer2)
    optimizer.postupdate()


epoch: 0, acc: 0.333 loss : 1.099
epoch: 100, acc: 0.418 loss : 1.083
epoch: 200, acc: 0.418 loss : 1.072
epoch: 300, acc: 0.423 loss : 1.071
epoch: 400, acc: 0.421 loss : 1.071
epoch: 500, acc: 0.419 loss : 1.070
epoch: 600, acc: 0.417 loss : 1.070
epoch: 700, acc: 0.420 loss : 1.070
epoch: 800, acc: 0.420 loss : 1.070
epoch: 900, acc: 0.422 loss : 1.069
epoch: 1000, acc: 0.427 loss : 1.068
epoch: 1100, acc: 0.428 loss : 1.067
epoch: 1200, acc: 0.429 loss : 1.066
epoch: 1300, acc: 0.432 loss : 1.064
epoch: 1400, acc: 0.432 loss : 1.061
epoch: 1500, acc: 0.433 loss : 1.058
epoch: 1600, acc: 0.436 loss : 1.055
epoch: 1700, acc: 0.436 loss : 1.050
epoch: 1800, acc: 0.438 loss : 1.046
epoch: 1900, acc: 0.434 loss : 1.040
epoch: 2000, acc: 0.453 loss : 1.035
epoch: 2100, acc: 0.470 loss : 1.029
epoch: 2200, acc: 0.467 loss : 1.023
epoch: 2300, acc: 0.480 loss : 1.016
epoch: 2400, acc: 0.474 loss : 1.010
epoch: 2500, acc: 0.459 loss : 1.003
epoch: 2600, acc: 0.462 loss : 0.997
epoch: 2700, 

In [None]:
import numpy as np
class Dense_layer:
    def __init__(self,n_input,neuron):
        self.weights = 0.01*np.random.randn(n_input,neuron)
        self.biases = np.zeros((1,neuron))
        
    def forward(self,input):
        self.input = input
        self.output = np.dot(input,self.weights)+self.biases
    def backward(self,dvalue):
        self.dweights = np.dot(self.input.T,dvalue)
        self.dbiases = np.sum(dvalue, axis=0, keepdims=True)
        self.dinput = np.dot(dvalue,self.weights.T)
    
class Relu:
    def forward(self,input):
        self.input = input
        self.output = np.maximum(0,self.input)
        
    def backward(self,dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.input <= 0] = 0

class Softmax:
    def forward(self,input):
        exp = np.exp(input - np.max(input,axis=1,keepdims=True))
        prob = exp/np.sum(exp,axis=1,keepdims=True)
        return prob
        
class Loss:
    def calculate(self,y_pred,y_true):
        loss = self.forward(y_pred,y_true)
        total_loss = np.mean(loss)
        return total_loss
    
    
class CrossCategorical(Loss) :
    def forward(self,y_pred,y_true):
        y_pred_clip = np.clip(y_pred,1e-7,1-1e-7)
        
        if len(y_true.shape) == 2:
            correct_confidence = np.sum(y_true * y_pred_clip,axis=1)
        
        elif len(y_true.shape) == 1:
            correct_confidence = y_pred_clip[range(len(y_pred)),y_true]
            
        negative_likelihood = -np.log(correct_confidence)
        
        return negative_likelihood
    
class Softmax_and_Loss_CE:
    def __init__(self):
        self.softmax = Softmax()
        self.loss = CrossCategorical()
        
    def forward(self,input,y_true):
        self.output = self.softmax.forward(input)
        return self.loss.calculate(self.output,y_true)
    
    def backward(self,dvalues,y_true):
        self.dinput = dvalues.copy()
        if len(y_true.shape)==2:
            y_true = np.argmax(y_true,axis=1)
        self.dinput[range(len(dvalues)),y_true] -= 1
        self.dinput = self.dinput/len(dvalues)
        
class Adam_Optimizer:
    def __init__(self,epsilon= 1e-7,lr = 1,decay = 0, beta1 = 0.9,beta2=0.999):
        self.epsilon = epsilon
        self.lr = lr
        self.current_lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.itteration = 0
        self.decay = decay
        
    def preupdate(self):
        if self.decay:
            self.current_lr = self.lr / (1 + (self.decay * self.itteration)) 
    
    def update(self,layer):
        if not hasattr(layer,'weights_cache'):
            layer.weights_momentum = np.zeros_like(layer.weights)
            layer.biases_momentum = np.zeros_like(layer.biases)
            layer.weights_cache = np.zeros_like(layer.weights)
            layer.biases_cache = np.zeros_like(layer.biases)
        
        layer.weights_momentum = (self.beta1 * layer.weights_momentum) + ((1-self.beta1) * layer.dweights)
        corrected_weights_momentum = layer.weights_momentum / (1-self.beta1**(self.itteration+1))
        layer.biases_momentum =  (self.beta1 * layer.biases_momentum) + ((1-self.beta1) * layer.dbiases)
        corrected_bias_momentum = layer.biases_momentum / (1-self.beta1**(self.itteration+1))
        
        layer.weights_cache = (self.beta2 * layer.weights_cache) + ((1-self.beta2) * (layer.dweights**2))
        corrected_weights_cache = layer.weights_cache / (1-self.beta2**(self.itteration+1))
        layer.biases_cache = (self.beta2 * layer.biases_cache) + ((1-self.beta2) * (layer.dbiases**2))
        corrected_bias_cache = layer.biases_cache / (1-self.beta2**(self.itteration+1))
        
        layer.weights += -(self.current_lr * corrected_weights_momentum/(np.sqrt(corrected_weights_cache)+self.epsilon))
        layer.biases += -(self.current_lr * corrected_bias_momentum/(np.sqrt(corrected_bias_cache)+self.epsilon))
    
    def postupdate(self):
        self.itteration += 1
        

In [30]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

x,y = spiral_data(classes=3,samples=300)

layer1 = Dense_layer(2,64)
layer2 = Dense_layer(64,3)
activation1 = Relu()
activation2 = Softmax_and_Loss_CE()
optimizer = Adam_Optimizer(lr=0.01, decay=1e-3)

for epoch in range (10000):
    layer1.forward(x)
    activation1.forward(layer1.output)
    layer2.forward(activation1.output)
    loss = activation2.forward(layer2.output,y)
    probs = activation2.output
    y_true = y

    if len(y.shape) == 2:
        y_true = np.argmax(y,axis=1)
    prediction = np.argmax(probs,axis=1)
    accuracy = np.mean(y_true == prediction)
    
    if epoch % 100 == 0 :
        print(f'epoch : {epoch:.3f} Accuracy :{accuracy:.3f} Loss : {loss:.3f}')
        
    activation2.backward(probs,y)
    layer2.backward(activation2.dinput)
    activation1.backward(layer2.dinput)
    layer1.backward(activation1.dinputs)
     
    optimizer.preupdate()
    optimizer.update(layer1)
    optimizer.update(layer2)
    optimizer.postupdate()



epoch : 0.000 Accuracy :0.360 Loss : 1.099
epoch : 100.000 Accuracy :0.463 Loss : 0.995
epoch : 200.000 Accuracy :0.639 Loss : 0.863
epoch : 300.000 Accuracy :0.704 Loss : 0.761
epoch : 400.000 Accuracy :0.717 Loss : 0.702
epoch : 500.000 Accuracy :0.732 Loss : 0.658
epoch : 600.000 Accuracy :0.751 Loss : 0.608
epoch : 700.000 Accuracy :0.769 Loss : 0.574
epoch : 800.000 Accuracy :0.777 Loss : 0.545
epoch : 900.000 Accuracy :0.780 Loss : 0.515
epoch : 1000.000 Accuracy :0.794 Loss : 0.489
epoch : 1100.000 Accuracy :0.806 Loss : 0.466
epoch : 1200.000 Accuracy :0.820 Loss : 0.447
epoch : 1300.000 Accuracy :0.828 Loss : 0.431
epoch : 1400.000 Accuracy :0.836 Loss : 0.416
epoch : 1500.000 Accuracy :0.839 Loss : 0.402
epoch : 1600.000 Accuracy :0.838 Loss : 0.389
epoch : 1700.000 Accuracy :0.847 Loss : 0.379
epoch : 1800.000 Accuracy :0.849 Loss : 0.369
epoch : 1900.000 Accuracy :0.858 Loss : 0.361
epoch : 2000.000 Accuracy :0.863 Loss : 0.353
epoch : 2100.000 Accuracy :0.862 Loss : 0.346
