# Building Neural Network
### Notebook by Phankawee Chulakasian
 This Notebook follows 'Neural Networks from Scratch' tutorial by sentdex on Youtube

## import dependencies

In [2]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
import math
import matplotlib.pyplot as plt

## Build a single neuron from scratch
Neuron taking arbitary numbers as inputs each input has its own weight
Then output weighted sum plus bias

In [3]:
inputs = [1.2, 5.1, 2.1] #arbitary inputs
weights = [3.1, 1.3, 6.5] #arbitary weight
bias = 4.2 #arbitary bias

output = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + bias
print(output)


28.2


In [4]:
inputs = [1.2, 5.1, 2.1] #arbitary inputs
weights = [3.1, 1.3, 6.5] #arbitary weight
bias = 4.2
output = 0

for i in range(0,len(inputs)) :
    output += inputs[i]*weights[i]
output += bias

print(output)

28.2


## BUilding Layer 
Building Layer with 3 neurons from 4 inputs

In [5]:
inputs = [1, 2, 3, 2.5]

weights1 = [0.2, 0.8, -0.5, 1.0]
weights2 = [0.5, -0.91, 0.26, -0.5]
weights3 = [-0.26, -0.27, 0.17, 0.87]

bias1 = 2
bias2 = 3
bias3 = 2.5

output =    [inputs[0]*weights1[0] + inputs[1]*weights1[1] + inputs[2]*weights1[2] + inputs[3]*weights1[3] + bias1,
             inputs[0]*weights2[0] + inputs[1]*weights2[1] + inputs[2]*weights2[2] + inputs[3]*weights2[3] + bias2,
             inputs[0]*weights3[0] + inputs[1]*weights3[1] + inputs[2]*weights3[2] + inputs[3]*weights3[3] + bias3]

print(output)

[4.8, 1.21, 4.385]


## Building layer using dot product
dot(A,B) = A[0]*B[0] + A[1]*B[1] + ...

In [6]:
inputs = [1, 2, 3, 2.5]

weights = [[0.2, 0.8, -0.5, 1.0],
            [0.5, -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17, 0.87]]

biases = [2,3,0.5]
output = np.dot(weights,inputs) + biases

print(output)


[4.8   1.21  2.385]


## Batching input to improve learning rate

In [7]:
inputs =   [[1, 2, 3, 2.5],
            [2.0, 5.0, -1.0, 2.0],
            [-1.5, 2.7, 3.3, -0.8]]

weights =  [[0.2, 0.8, -0.5, 1.0],
            [0.5, -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17, 0.87]]

biases = [2,3,0.5]

output = np.dot(inputs, np.array(weights).T) + biases

print(output)


[[ 4.8    1.21   2.385]
 [ 8.9   -1.81   0.2  ]
 [ 1.41   1.051  0.026]]


## Creating layer object
Using Object Oriented python to create a layer of neuron with a customizable number of inputs per neuron and number pof neuron per layer

In [8]:
np.random.seed(0)

X = [[1.0, 2.0, 3.0, 2.5],
     [2.0, 5.0, -1.0, 2.0],
     [-1.5, 2.7, 3.3, -0.8]]

class layer_dense :
    def __init__(self, n_inputs, n_neurons) :
        self.weights = 0.1*np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self,inputs) :
        self.output = np.dot(inputs, self.weights) + self.biases

layer1 = layer_dense(4,5)
layer2 = layer_dense(5,2)

layer1.forward(X)
print(layer1.output)
layer2.forward(layer1.output)
print(layer2.output)

[[ 0.10758131  1.03983522  0.24462411  0.31821498  0.18851053]
 [-0.08349796  0.70846411  0.00293357  0.44701525  0.36360538]
 [-0.50763245  0.55688422  0.07987797 -0.34889573  0.04553042]]
[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]


## Activation function
each neuron will have one activation funtion so the output of a function will be 0 or 1 we use non linear function so that we can fit non linear data into the neural network

Activation function can be 
  step function 
  sigmoid function
  rectified linear (ReLU) function

In [9]:
class activation_ReLU :
    def forward(self, inputs) :
        self.output = np.maximum(0,inputs)

### Testing the activation function with a dataset

In [10]:
nnfs.init()

X, y = spiral_data(100,3)

layer1 = layer_dense(2,5)
activation1 = activation_ReLU()

layer1.forward(X)
activation1.forward(layer1.output)

#print(layer1.output)
print(activation1.output)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 4.65504505e-04
  4.56846210e-05]
 [0.00000000e+00 5.93469958e-05 0.00000000e+00 2.03573116e-04
  6.10024377e-04]
 ...
 [1.13291524e-01 0.00000000e+00 0.00000000e+00 8.11079666e-02
  0.00000000e+00]
 [1.34588361e-01 0.00000000e+00 3.09493970e-02 5.66337556e-02
  0.00000000e+00]
 [1.07817926e-01 0.00000000e+00 0.00000000e+00 8.72561932e-02
  0.00000000e+00]]


### Softmax Activation
Use softmax activation to measure how right or worng the model prediction is by using exponential function

In [11]:
E = math.e

exp_values = []

for output in layer2.output :
    exp_values.append(E**output)
print(layer2.output)
print(exp_values)

[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]
[array([1.15985616, 0.91945331]), array([1.15142827, 0.98668476]), array([1.22293021, 0.92968807])]


#### Normalization

In [12]:
norm_base = sum(exp_values)
norm_val = []

for val in exp_values:
    norm_val.append(val / norm_base)

print(norm_val)
print(sum(norm_val))

[array([0.32817932, 0.32422767]), array([0.32579466, 0.34793556]), array([0.34602601, 0.32783677])]
[1. 1.]


### Softmax using numpy

In [13]:
exp_values = np.exp(layer2.output)
norm_val = exp_values / np.sum(exp_values, axis=1, keepdims=True)
print(norm_val)

[[0.55780834 0.44219166]
 [0.53852545 0.46147455]
 [0.5681129  0.4318871 ]]


overflow prevention

In [14]:
class Activation_softmax :
    def forward(self, inputs) :
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        prob = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = prob

In [37]:
X, y = spiral_data(samples=100, classes=3)

dense1 = layer_dense(2,4)
activation1 = activation_ReLU()

dense2 = layer_dense(4,3)
activation2 = Activation_softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output)

[[0.33333334 0.33333334 0.33333334]
 [0.33335462 0.33332726 0.33331808]
 [0.33337826 0.33333772 0.33328405]
 [0.33338264 0.33328643 0.33333096]
 [0.33341601 0.33329672 0.3332873 ]
 [0.33344433 0.33343863 0.33311707]
 [0.3334231  0.33354467 0.33303225]
 [0.33347476 0.3335443  0.332981  ]
 [0.33350173 0.33355016 0.33294815]
 [0.33353198 0.33332238 0.33314565]
 [0.33355126 0.33356908 0.33287966]
 [0.33342123 0.33367047 0.3329083 ]
 [0.33327818 0.33354437 0.3331774 ]
 [0.33327246 0.3335583  0.3331692 ]
 [0.3333889  0.33371934 0.33289176]
 [0.3331794  0.33329058 0.33352998]
 [0.33307502 0.33269185 0.33423316]
 [0.3334395  0.3338324  0.3327281 ]
 [0.3334351  0.3338536  0.33271128]
 [0.33302626 0.33271387 0.33425987]
 [0.33312243 0.3332531  0.3336245 ]
 [0.33310062 0.3332022  0.3336972 ]
 [0.3330262  0.33290333 0.33407047]
 [0.33313808 0.33342645 0.33343545]
 [0.33316275 0.33355123 0.33328605]
 [0.33307013 0.3332348  0.33369508]
 [0.3329541  0.3323925  0.33465338]
 [0.33309886 0.33342278 0.33

## Loss function
Use to find confidence score of the model 
Example mean absolute error
Categorical Crosss-Entropy 


### One-hot encoding 
Vector with N class long filled with 0 except at the target label have a 1

In [18]:
softmax_output = [0.7, 0.1, 0.2]
target_output = [1, 0, 0]

loss = -(math.log(softmax_output[0])*target_output[0] +
        (math.log(softmax_output[1]))*target_output[1] +
        (math.log(softmax_output[2]))*target_output[2])

print(loss)

loss = -(math.log(softmax_output[0]))
print(loss)

softmax_output = [0.5, 0.1, 0.4]

loss = -(math.log(softmax_output[0]))
print(loss)

0.35667494393873245
0.35667494393873245
0.6931471805599453


In [20]:
softmax_output = np.array([[0.7,0.1,0.2],
                          [0.1,0.5,0.4],
                          [0.02,0.9,0.08]])

class_target = [0,1,1]

print(-np.log(softmax_output[[0,1,2],[class_target]]))

[[0.35667494 0.69314718 0.10536052]]


In [21]:
class Loss :
    def calculate(self, output, y) :
        sample_losses = self.forward(output,y)
        data_loss = np.mean(sample_losses)
        return data_loss
class Loss_CategoricalCrossEntropy(Loss) :
    def forward(self, y_pred, y_true) :
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        if len(y_true.shape) == 1 :
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2 :
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
        negative_log_likelihood = -np.log(correct_confidences)
        return negative_log_likelihood

In [35]:
loss_function = Loss_CategoricalCrossEntropy()
loss =loss_function.calculate(activation2.output, y)

print("Loss : ",loss)

Loss :  1.0995139
