In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data,vertical_data

nnfs.init()

In [2]:
x,y = vertical_data(samples=100, classes=3)
x[:10]

array([[ 0.17640524,  0.6883151 ],
       [ 0.04001572,  0.3652241 ],
       [ 0.0978738 ,  0.3729515 ],
       [ 0.22408931,  0.5969397 ],
       [ 0.1867558 ,  0.38268766],
       [-0.09772779,  0.6943621 ],
       [ 0.09500884,  0.4586381 ],
       [-0.01513572,  0.42525452],
       [-0.01032189,  0.69229424],
       [ 0.04105985,  0.6480515 ]], dtype=float32)

In [3]:
class Dense_Layer():
    def __init__(self,n_input,n_neurons):
        self.weights = np.random.randn(n_input,n_neurons)
        self.biases = np.zeros((1,n_neurons))
        
    def forward(self,inputs):
        self.inputs = inputs
        self.output = np.dot(inputs,self.weights) + self.biases
        
    def backward(self,dvalues):
        self.dbiases = np.sum(dvalues,axis=0,keepdims=True)
        self.dweights = np.dot(self.inputs.T,dvalues)
        self.dinputs = np.dot(dvalues,self.weights.T)

In [4]:
class ReLU():
    def forward(self,inputs):
        self.inputs = inputs
        self.output = np.maximum(0,inputs)
        
    def backward(self,dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

In [5]:
class Softmax():
    def forward(self,inputs):
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        self.output = exp_values/np.sum(exp_values,axis=1,keepdims=True)
        
    def backward(self, dvalues):
        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)
        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in \
            enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output and
            jacobian_matrix = np.diagflat(single_output) - \
            np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix,
            single_dvalues)

In [6]:
class Categorical_Cross_Entropy():
    def forward(self,inputs,targets):
        clipped_input = np.clip(inputs,1e-7,(1-1e-7))
        if (len(targets.shape) == 1):
            confidence = np.array(inputs)[range(len(inputs)),targets]
        elif (len(targets.shape) == 2):
            confidence = np.sum(np.array(inputs)*targets)
        
        loss = -np.log(confidence)
        self.output = np.mean(loss)
        
    def backward(self,dvalues,y_true):
        samples = len(dvalues)
        labels = len(dvalues[0])
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        
        self.dinputs = -y_true/dvalues
        self.dinputs = self.dinputs/samples

In [7]:
def Accuracy_Calculate(inputs,targets):
    confidences = np.argmax(inputs,axis=1)
    if (len(targets.shape) == 2):
        targets = np.argmax(targets,axis=1)
        
    accuracy = np.mean(confidences==targets)
    return accuracy

In [15]:
dense1 = Dense_Layer(2,3)
dense2 = Dense_Layer(3,3)
activation1 = ReLU()
activation2 = Softmax()
loss = Categorical_Cross_Entropy()

dense1.forward(x)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)
print(activation2.output[:5])

loss.forward(activation2.output,y)
accuracy = Accuracy_Calculate(activation2.output,y)

print(loss.output)
print(accuracy)

[[0.33333334 0.33333334 0.33333334]
 [0.33695617 0.32418767 0.33885616]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]]
1.0820343
0.08333333333333333


In [16]:
dense1 = Dense_Layer(2,3)
dense2 = Dense_Layer(3,3)
activation1 = ReLU()
activation2 = Softmax()
loss = Categorical_Cross_Entropy()

In [18]:
best_loss = 999999
best_weight_1 = dense1.weights.copy()
best_weight_2 = dense2.weights.copy()
best_bias_1 = dense1.biases.copy()
best_bias_2 = dense2.biases.copy()

for i in range(20000):
    dense1.weights += 0.05 * np.random.randn(2,3)
    dense2.weights += 0.05 * np.random.randn(3,3)
    dense1.biases += 0.05 * np.random.randn(1,3)
    dense2.biases += 0.05 * np.random.randn(1,3)

    dense1.forward(x)
    activation1.forward(dense1.output)

    dense2.forward(activation1.output)
    activation2.forward(dense2.output)

    loss.forward(activation2.output,y)
    accuracy = Accuracy_Calculate(activation2.output,y)
    
    if loss.output < best_loss:
        print(f"trial - {i}, Loss : {loss.output} , Accuracy : {accuracy}")
        best_loss = loss.output
        best_weight_1 = dense1.weights.copy()
        best_weight_2 = dense2.weights.copy()
        best_bias_1 = dense1.biases.copy()
        best_bias_2 = dense2.biases.copy()
        
    else:
        dense1.weights = best_weight_1.copy()
        dense2.weights = best_weight_2.copy()
        dense1.biases = best_bias_1.copy()
        dense2.biases = best_bias_2.copy()


trial - 0, Loss : 0.17628316581249237 , Accuracy : 0.93
trial - 18, Loss : 0.17472998797893524 , Accuracy : 0.93
trial - 21, Loss : 0.1740097850561142 , Accuracy : 0.93
trial - 23, Loss : 0.1739235669374466 , Accuracy : 0.9333333333333333
trial - 31, Loss : 0.17391355335712433 , Accuracy : 0.9266666666666666
trial - 57, Loss : 0.17365212738513947 , Accuracy : 0.93
trial - 103, Loss : 0.1736154854297638 , Accuracy : 0.9266666666666666
trial - 124, Loss : 0.17357933521270752 , Accuracy : 0.9333333333333333
trial - 265, Loss : 0.17352183163166046 , Accuracy : 0.9366666666666666
trial - 427, Loss : 0.17347225546836853 , Accuracy : 0.9333333333333333
trial - 504, Loss : 0.1734367161989212 , Accuracy : 0.9366666666666666
trial - 964, Loss : 0.17342451214790344 , Accuracy : 0.9333333333333333
trial - 1094, Loss : 0.17342229187488556 , Accuracy : 0.9333333333333333
trial - 1248, Loss : 0.1733967512845993 , Accuracy : 0.9366666666666666
trial - 1338, Loss : 0.1733880639076233 , Accuracy : 0.936