Coding an artificial Neural Network from Scratch

In [3]:
import sys
import numpy as np
import matplotlib

How does a Neuron work

In [4]:
inputs = [1, 2, 3]
weights = [0.2, 0.8, -0.5]
bias = 2.0

#Each neuron has only 1 bias

ooutput = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + bias
ooutput

2.3

In [5]:
inputs = [1, 2, 3, 2.5]
weights = [0.2, 0.8, -0.5, 1.0]
bias = 2.0

ooutput = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + inputs[3]*weights[3] + bias
ooutput

4.8

4 Inputs into 3 Neurons

In [6]:
inputs = [1, 2, 3, 2.5]
weights1 = [0.2, 0.8, -0.5, 1.0]
weights2 = [0.5, -0.91, 0.26, -0.5]
weights3 = [-0.26, -0.27, 0.17, 0.87]
bias1 = 2
bias2 = 3
bias3 = 0.5

output = [inputs[0]*weights1[0] + inputs[1]*weights1[1] + inputs[2]*weights1[2] + inputs[3]*weights1[3] + bias1,
          inputs[0]*weights2[0] + inputs[1]*weights2[1] + inputs[2]*weights2[2] + inputs[3]*weights2[3] + bias2,
          inputs[0]*weights3[0] + inputs[1]*weights3[1] + inputs[2]*weights3[2] + inputs[3]*weights3[3] + bias3,]
output

[4.8, 1.21, 2.385]

Doing the real thing using Matrices and Vectors

In [7]:
inputs = [1, 2, 3, 2.5]
weights = [[0.2, 0.8, -0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]]
biases = [2, 3, 0.5]

layer_outputs = []
for neuron_weights, neuron_bias in zip(weights, biases): # zip makes it a bundle
    neuron_output = 0
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output += n_input*weight
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)

layer_outputs

[4.8, 1.21, 2.385]

numpy implementation

In [8]:
output = np.dot(np.array(inputs), np.array(weights).T) + biases #order matters here, thus weights some  before inputs
output

array([4.8  , 1.21 , 2.385])

Doing things in batches / Parallel computing (using GPU)

In [9]:
inputs = [[1,2, 3, 2.5],
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

weights = [[0.2, 0.8, -0.5, 1.0],
           [0.5, -0.91, 0.26, -0.5],
           [-0.26, -0.27, 0.17, 0.87]] #Transpose of weights matrix will help in multiplication

#Both are now matrices

biases = [2, 3, 0.5]

layer1_output = np.dot(np.array(inputs), np.array(weights).T) + biases #biases are added row-wise
 #output r1 = calc r1 + biases

weights2 = [[0.1, -0.14, 0.5],
            [-0.5, 0.12, -0.33],
            [-0.44, 0.73, -0.13]]

biases2 = [-1, 2, -0.5]

layer2_outputs = np.dot(np.array(layer1_output), np.array(weights2).T) + biases2

layer2_outputs

array([[ 0.5031 , -1.04185, -2.03875],
       [ 0.2434 , -2.7332 , -5.7633 ],
       [-0.99314,  1.41254, -0.35655]])

This can create a lot of code lines, instead we make an object

In [10]:
X = [[1,2, 3, 2.5],
    [2.0, 5.0, -1.0, 2.0],
    [-1.5, 2.7, 3.3, -0.8]]

np.random.seed(0)

# We normalize weights between (-1, 1) so that the values are small while calculating
# Start the bias as a non-zero nunmber if everything becomes zero for each neuron

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.10*np.random.randn(n_inputs, n_neurons) # We put inputs before neuron numbers because it saves us from the hardwork of trnasposing
        self.biases = np.zeros((1, n_neurons)) # we add a tuple to determine shape
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

layer1 = Layer_Dense(4,5) #This column should be equal to
layer2 = Layer_Dense(5,2) #This row
layer3 = Layer_Dense(2,10)

X_np = np.array(X)
layer1.forward(X_np) # output has 3 rows because X has so
print(layer1.output)
print()
layer2.forward(layer1.output)
print(layer2.output)
print()
layer3.forward(layer2.output)
print(layer3.output)

[[ 0.10758131  1.03983522  0.24462411  0.31821498  0.18851053]
 [-0.08349796  0.70846411  0.00293357  0.44701525  0.36360538]
 [-0.50763245  0.55688422  0.07987797 -0.34889573  0.04553042]]

[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]

[[ 0.01110314  0.01753274  0.00116307 -0.04575625 -0.00087954  0.00599737
   0.0287652   0.01130176  0.00780897 -0.00269652]
 [ 0.00359036  0.0072357  -0.01023086 -0.03054481 -0.0042225   0.00279179
   0.01902682  0.01591173 -0.00329805 -0.00397739]
 [ 0.01076291  0.01796332 -0.00542691 -0.05408584 -0.00328605  0.00634035
   0.03389322  0.01852949  0.00397137 -0.00453283]]


Activation Functions / They come into play after the sum of inputs*weights.T + bias

Code for ReLU activation function

In [11]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
outputs = []
for i in inputs:
    '''
    if i > 0:
        outputs.append(i)
    elif i <= 0:
        outputs.append(0)
    '''
    #better way to code
    outputs.append(max(0,i))
outputs

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]

Creating Object for ReLU

In [12]:
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

In [13]:
#!pip install nnfs
#Why install this package? To repeat the results in the video

import nnfs

nnfs.init()

from nnfs.datasets import spiral_data
X, y = spiral_data(100,3)

In [14]:

# def create_data(points, classes):
#     X = np.zeros((points*classes, 2))
#     y = np.zeros(points*classes, dtype='uint8')
#     for class_number in range(classes):
#         ix = range(points*class_number, points*(class_number+1))
#         r = np.linspace(0.0, 1, points)  # radius
#         t = np.linspace(class_number*4, (class_number+1)*4, points) + np.random.randn(points)*0.2
#         X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
#         y[ix] = class_number
#     return X, y

# import matplotlib.pyplot as plt
# print("here")
# X, y = create_data(100, 3)

# plt.scatter(X[:,0], X[:,1])
# plt.show()

# plt.scatter(X[:,0], X[:,1], c=y, cmap='brg')
# plt.show()


In [15]:
layer1 = Layer_Dense(2,5)
activation1 = Activation_ReLU()

layer1.forward(X)
activation1.forward(layer1.output)

print(activation1.output[96:100])

[[0.07766829 0.08039562 0.12873529 0.         0.        ]
 [0.13565907 0.         0.05062564 0.04318418 0.        ]
 [0.10540763 0.02938177 0.12049235 0.         0.        ]
 [0.12440884 0.         0.104376   0.         0.        ]]


SoftMax activation function especially for the output layer

In [16]:
import math

#Starting fresh for understanding this
layer_outputs = [4.8, 1.21, 2.385] #Accurate

# layer_outputs = [4.8, 4.79, 4.25] #Precise

#Raw python code for exponentiation
E = math.e

exp_values = []

for output in layer_outputs:
    exp_values.append(E**output)

print(exp_values)
norm_base = sum(exp_values)
norm_values = []

for value in exp_values:
    norm_values.append(value/norm_base)

print(norm_values)
print(sum(norm_values))

[121.51041751873483, 3.353484652549023, 10.859062664920513]
[0.8952826639572619, 0.024708306782099374, 0.0800090292606387]
0.9999999999999999


In [17]:
#Next thing we're gonna do is to convert this into numpy
layer_outputs = [[4.8, 1.21, 2.385],
                 [8.9, -1.81, 0.2],
                 [1.41, 1.051, 0.026]]

exp_values = np.exp(layer_outputs) #every value in layer_outputs is exponentiated

norm_values = exp_values / np.sum(exp_values, axis=1, keepdims=True)


# print(exp_values)
# print(np.sum(layer_outputs, axis = 1, keepdims = True)) #sum of a row, axis = 0 will be sum of a column
print(norm_values)

[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]


Combination of Exponentiation and Normalization is Softmax

In exponentiation, overflow can easily occur with positive inputs as large as 1000. To avoid this, we subtract the max value in input array from all the elements so that the largest value in the input array is 0, exponentiation of which is 1. So all values in output are between 0 and 1.

In [18]:
#creating a class for softmax activation
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True)) #This subtraction is performed to prevent overflow
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3,3)
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

(activation2.output)

array([[0.33333334, 0.33333334, 0.33333334],
       [0.33333507, 0.33333328, 0.3333316 ],
       [0.33327106, 0.3333301 , 0.33339888],
       [0.33329675, 0.33330458, 0.33339873],
       [0.33331198, 0.33331373, 0.33337426],
       [0.33334184, 0.33333313, 0.33332503],
       [0.33334365, 0.33333308, 0.3333232 ],
       [0.3333453 , 0.33333308, 0.33332166],
       [0.33334795, 0.333333  , 0.33331898],
       [0.33335003, 0.33333293, 0.333317  ],
       [0.33335102, 0.33333296, 0.33331606],
       [0.33335316, 0.3333329 , 0.333314  ],
       [0.3333543 , 0.33333287, 0.33331287],
       [0.33335748, 0.3333328 , 0.3333097 ],
       [0.33335924, 0.33333275, 0.333308  ],
       [0.3333594 , 0.33333275, 0.33330783],
       [0.33335876, 0.3333328 , 0.3333085 ],
       [0.33333334, 0.33333334, 0.33333334],
       [0.33336312, 0.33333266, 0.33330417],
       [0.33333904, 0.3333332 , 0.33332774],
       [0.3333392 , 0.33333316, 0.33332756],
       [0.3333669 , 0.33333257, 0.3333005 ],
       [0.

Loss Function

In [22]:
class Loss:
    def calculate(self, output, y):
        sample_loss = self.forward(output, y)
        data_loss = np.mean(sample_loss)
        return data_loss

class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)

        # [1,0]  #We wanna handle both of these
        # [[0,1],[1,0]]

        if len(y_true.shape) == 1: #Scalar class values handling
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2: #one hot encoded vectors handling
            correct_confidences = np.sum(y_pred_clipped*y_true, axis = 1)

        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.calculate(activation2.output, y)

print("Loss : ", loss)

Loss :  1.09855


In [None]:
# Accuracy
N