In [829]:
import numpy as np
import nnfs
nnfs.init()
from nnfs.datasets import spiral_data
import math

# Activation Functions

## ReLU Function

In [830]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

output = []

for i in inputs:
    output.append(max(0, i))
    
print(output)

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]


In [831]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

output = np.maximum(0, inputs)

print(output)

[0.  2.  0.  3.3 0.  1.1 2.2 0. ]


In [832]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
        self.output = np.maximum(0, inputs)

X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2, 3)

dense1.forward(X)

print(dense1.output[:5]) 

[[0.         0.        ]
 [0.00299556 0.00964661]
 [0.01288097 0.01556285]
 [0.02997479 0.0044481 ]
 [0.03931246 0.00932828]]


## The Softmax Activation Function

In [833]:
layer_outputs = [4.8, 1.21, 2.385]

E = math.e

exp_values = []
for output in layer_outputs:
    exp_values.append(E ** output)
print('exponentiated values:')
print(exp_values)

exponentiated values:
[121.51041751873483, 3.353484652549023, 10.859062664920513]


In [834]:
# Now normalize values
norm_base = sum(exp_values) # We sum all values
norm_values = []
for value in exp_values:
    norm_values.append(value / norm_base)
print('Normalized exponentiated values:')
print(norm_values)
print('Sum of normalized values:', sum(norm_values))

Normalized exponentiated values:
[0.8952826639572619, 0.024708306782099374, 0.0800090292606387]
Sum of normalized values: 0.9999999999999999


In [835]:
# Values from the earlier previous when we described
# what a neural network is

layer_outputs = [4.8, 1.21, 2.385]

# For each value in a vector, calculate the exponential value
exp_values = np.exp(layer_outputs)
print('exponentiated values:')
print(exp_values)

# Now normalize values
norm_values = exp_values / np.sum(exp_values)
print('normalized exponentiated values:')
print(norm_values)
print('sum of normalized values:', np.sum(norm_values))


exponentiated values:
[121.51041752   3.35348465  10.85906266]
normalized exponentiated values:
[0.89528266 0.02470831 0.08000903]
sum of normalized values: 0.9999999999999999


In [836]:
np.sum(np.array([
    [1, 2, 3], 
    [4, 5, 6]
]))

np.sum(np.array([
    [1, 2, 3], 
    [4, 5, 6]
]), axis=0) # 1 x 3

np.sum(np.array([
    [1, 2, 3], 
    [4, 5, 6]
]), axis=1) # 2 x 1

array([ 6, 15])

In [837]:
layer_outputs = np.array(
    [[4.8, 1.21, 2.385],
     [8.9, -1.81, 0.2],
     [1.41, 1.051, 0.026]]
)

print('Sum without axis')
print(np.sum(layer_outputs))

print('Another way to think of it w/ a matrix == axis 0: columns:')
print(np.sum(layer_outputs, axis=0))

Sum without axis
18.172
Another way to think of it w/ a matrix == axis 0: columns:
[15.11   0.451  2.611]


In [838]:
print('But we want to sum the rows instead, like this w/ raw py:')
for i in layer_outputs:
    print(sum(i))

print('So we can sum axis 1, but note the current shape:')
print(np.sum(layer_outputs, axis=1))

print('Sum axis 1, but keep the same dimensions as input:')
print(np.sum(layer_outputs, axis=1, keepdims=True))

But we want to sum the rows instead, like this w/ raw py:
8.395
7.29
2.4869999999999997
So we can sum axis 1, but note the current shape:
[8.395 7.29  2.487]
Sum axis 1, but keep the same dimensions as input:
[[8.395]
 [7.29 ]
 [2.487]]


In [839]:
X = [
    [1, 2, 3],
    [4, 5, 6]
]

X_exp_values = np.exp(X)
X_exp_sum = np.sum(X_exp_values, axis=1, keepdims=True)
output = X_exp_values / X_exp_sum

row_sums = np.sum(output, axis=1, keepdims=True)
row_sums

array([[1.],
       [1.]])

In [840]:
class Activation_Softmax:

    # Forward pass
    def forward(self, inputs):

        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        
        self.output = probabilities

## Full Code

In [841]:
# A neural network dense layer (each neuron is connected to all the other neurons in the next layer)
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        # Define weights for this layer
        #  - Scale by 0.01 to be proportional for small training adjustments later
        #  - The dimensions of a weight matrix is W = [n^[l-1] x n^[l]]
        #    - Each row represents a weight set for each feature/neuron in the previous layer
        #    - Each nth column represents the weights that connect to the nth neuron
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)

        # Define biases for this layer
        #  - Initialize as 0
        #  - The dimensions of a bias matrix is b = [1 x n^[l]]
        #  - Each column represents the bias for each neuron
        self.biases = np.zeros((1, n_neurons))
    
    def forward(self, inputs):
        # Inputs is a matrix [#of samples x #of inputs]
        # Weights is [#of inputs x #of neurons]
        #  - Row and column of inputs and weights match, therefore matrix multiplication is possible
        # Biases is automatically broadcasted to fix multiple samples
        return inputs @ self.weights + self.biases


class Activation_ReLU:
    def forward(self, inputs):
        # Inputs is a matrix [#of samples x #of input/neurons]
        # ReLU is max(0, inputs)
        return np.maximum(0, inputs)


# Softmax takes non-normalized data and produces a probability distrubtion
class Activation_Softmax:
    def forward(self, inputs):
        # Inputs is a matrix [#of samples x #of input/neurons]

        # Take the exponential of the inputs matrix subtracted the vector column
        # containing the max value of each row
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        # Take the sum of all rows of the exp matrix
        exp_sum = np.sum(exp_values, axis=1, keepdims=True)

        return exp_values / exp_sum


layer1 = Layer_Dense(2, 3)
layer2 = Layer_Dense(3, 3)
ReLU = Activation_ReLU()
SoftMax = Activation_Softmax()

X, y = spiral_data(samples=100, classes=3)

z1 = layer1.forward(X)
a1 = ReLU.forward(z1)

z2 = layer2.forward(a1)
y_hat = SoftMax.forward(z2)
y_hat # Our model is completely random

# The next step is to quantify how wrong the model is

array([[0.33333334, 0.33333334, 0.33333334],
       [0.33333343, 0.33333343, 0.33333316],
       [0.33333343, 0.33333343, 0.3333331 ],
       [0.33333352, 0.33333352, 0.333333  ],
       [0.3333337 , 0.3333336 , 0.33333266],
       [0.33333385, 0.33333373, 0.33333242],
       [0.33333358, 0.33333352, 0.33333287],
       [0.33333403, 0.33333388, 0.3333321 ],
       [0.33333394, 0.33333382, 0.3333322 ],
       [0.33333403, 0.33333388, 0.3333321 ],
       [0.33333433, 0.33333415, 0.33333153],
       [0.33333403, 0.33333388, 0.33333212],
       [0.33333457, 0.3333343 , 0.33333117],
       [0.33333442, 0.3333342 , 0.33333135],
       [0.3333339 , 0.3333338 , 0.3333323 ],
       [0.33333483, 0.33333454, 0.3333306 ],
       [0.33333382, 0.3333337 , 0.33333248],
       [0.33333454, 0.33333427, 0.3333312 ],
       [0.33333334, 0.33333334, 0.33333334],
       [0.33333334, 0.33333334, 0.33333334],
       [0.33333403, 0.33333388, 0.3333321 ],
       [0.3333341 , 0.33333397, 0.33333194],
       [0.