# Importing libraries and checking versions

In [1]:
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import nnfs
import math

In [2]:
from nnfs.datasets import spiral_data
from nnfs.datasets import vertical_data

In [3]:
nnfs.init()

In [4]:
np.random.seed(0)

In [5]:
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("Matplotlib: ", matplotlib.__version__)

Python:  3.9.7 (default, Sep 16 2021, 08:50:36) 
[Clang 10.0.0 ]
Numpy:  1.21.5
Matplotlib:  3.5.2


# Dataset

In [6]:
X, y = spiral_data(100, 3)

# Basic Neural Network

## Input layer

### Input values
the weight values will change as the neural network gets trained

In [7]:
# don't change as these are usually from your original data or outputs of other neurons 
inputs = [[1, 2, 3, 2.5],
          [5, 7, 3.4, -2],
          [1, -2, 3, 6.5]]

# usually change when training using back propagation
weights0 = [[0.2, 0.9, 0.5, -0.3],
           [-0.1, 0.8, -0.3, 0.6],
           [0.3, -0.3, 0.6, -0.5]]

weights1 = [[0.1, 0.4, -0.3],
           [0.7, -0.4, 0.5],
           [0.9, 0.4, 0.7]]

# can change
biases0 = [2, 4, 3]
biases1 = [6, 2, -0.5]

### Output values

In [8]:
# layer1_outputs = np.dot(inputs, np.array(weights0).T) + biases0
# print(layer1_outputs)

In [9]:
# layer2_outputs = np.dot(layer1_outputs, np.array(weights1).T) + biases1
# print(layer2_outputs)

# Real Neural Network

In [10]:
# # input data
# X = [[1, 2, 3, 2.5],
#      [5, 7, 3.4, -2],
#      [1, -2, 3, 6.5]]

In [11]:
# activation function
inputs = [0, 2, -1, 3.3, -2.7, 2.2, -100]
output = []

for i in inputs:
    output.append(max(0, i))

print(output)

[0, 2, 0, 3.3, 0, 2.2, 0]


In [12]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

In [13]:
# ReLU = Rectified Linear Unit
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

In [14]:
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

In [15]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [16]:
class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif  len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
            
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

In [17]:
# first variable is the number of inputs, second can be whatever you want
#layer_1 = Layer_Dense(2, 5)
# layer_2 = Layer_Dense(5, 3)

In [18]:
#activation1 = Activation_ReLU()

In [19]:
#layer_1.forward(X)
#print(layer_1.output)

In [20]:
#activation1.forward(layer_1.output)
#print(activation1.output)

In [21]:
# layer_2.forward(layer_1.output)
# print(layer_2.output)

# Soft max Activation Function
used for the output layer

In [22]:
X, y = spiral_data(samples=100, classes=3)

In [23]:
dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()

In [24]:
dense2 = Layer_Dense(3,3)
activation2 = Activation_Softmax()

In [25]:
dense1.forward(X)
activation1.forward(dense1.output)

In [26]:
dense2.forward(activation1.output)
activation2.forward(dense2.output)

In [27]:
print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333376 0.33333224 0.333334  ]
 [0.33334652 0.33330023 0.33335322]
 [0.33333334 0.33333334 0.33333334]]


In [28]:
loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.calculate(activation2.output, y)
print('Loss: ', loss)

Loss:  1.0985749


In [29]:
layer_outputs = [[4.8, 1.21, 2.385],
                 [8.9, -1.81, 0.2],
                 [1.41, 1.051, 0.026]]

In [30]:
#E = math.e
exp_values = np.exp(layer_outputs)

default axis is 'None' and all {9} numbers of layer outputs would be added together
if its set to 0, the columns will be added
if its set to 1, rows are added

keepdims keeps the matrix in the same shape -> the rows are added across here, so it keeps them in a column
'keep dimensions True' aka the same

In [31]:
norm_values = exp_values / np.sum(exp_values, axis=1, keepdims=True)

In [32]:
print(norm_values)

[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]


# Calculating Loss with Categorical Cross-Entropy
one-hot vectors

solving for x in:
e^x = b

In [33]:
b = 5.2

In [34]:
x = np.log(b)

print(x)
print((math.e ** x), 'which is approximately equal to ', b)

1.6486586255873816
5.199999999999999 which is approximately equal to  5.2


### Actually calculating loss

In [35]:
softmax_output = [0.7, 0.1, 0.2]

In [36]:
target_output = [1, 0, 0]

In [37]:
# use math.log instead of np.log as numpy is not raw python
loss = -(math.log(softmax_output[0])*target_output[0] + 
         math.log(softmax_output[1])*target_output[1] +
         math.log(softmax_output[2])*target_output[2])
print(loss)

# same as saying -math.log(softmax_output[0]) as target_output 1 and 2 = 0

print(-math.log(0.7))

0.35667494393873245
0.35667494393873245


# Implementing Loss

In [38]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

In [39]:
class_targets = [0, 1, 1]

In [40]:
neg_loss = -np.log(softmax_outputs[
               range(len(softmax_outputs)), class_targets
           ])

In [41]:
average_loss = np.mean(neg_loss)
print(average_loss)

# can run into problems with 0 as log0 = infinity and this causes issues with finding the average

0.38506088005216804


# Optimization and derivatives

In [42]:
# X, y = vertical_data(samples=100, classes=3)

In [51]:
X, y = spiral_data(samples=100, classes=3)

In [52]:
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

In [53]:
dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

In [54]:
loss_function = Loss_CategoricalCrossentropy()

In [55]:
lowest_loss = 9999999
best_dense1_weights = dense1.weights.copy()
best_dense1_biases = dense1.biases.copy()
best_dense2_weights = dense2.weights.copy()
best_dense2_biases = dense2.biases.copy()

In [56]:
for iteration in range(100000):
    
    dense1.weights += 0.05 * np.random.randn(2, 3)
    dense1.biases += 0.05 * np.random.randn(1, 3)
    dense2.weights += 0.05 * np.random.randn(3, 3)
    dense2.biases += 0.05 * np.random.randn(1, 3)
    
    dense1.forward(X)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    activation2.forward(dense2.output)
    
    loss = loss_function.calculate(activation2.output, y)
    
    predictions = np.argmax(activation2.output, axis=1)
    accuracy = np.mean(predictions==y)
    
    if loss < lowest_loss:
        print('New set of weights found, iteration: ', iteration,
             'loss: ', loss, 'acc:', accuracy)
        best_dense1_weights = dense1.weights.copy()
        best_dense1_biases = dense1.biases.copy()
        best_dense2_weights = dense1.weights.copy()
        best_dense2_biases = dense1.biases.copy()
        lowest_loss = loss
        
    else:
        dense1.weights = best_dense1_weights.copy()
        dense1.biases = best_dense1_biases.copy()
        dense2.weights = best_dense2_weights.copy()
        dense2.biases = best_dense2_biases.copy()

New set of weights found, iteration:  0 loss:  1.0985026 acc: 0.3333333333333333
New set of weights found, iteration:  1 loss:  1.0980016 acc: 0.3333333333333333
New set of weights found, iteration:  2 loss:  1.0977929 acc: 0.3333333333333333


ValueError: operands could not be broadcast together with shapes (2,3) (3,3) (2,3) 