In [8]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt

%matplotlib inline
nnfs.init()

Activation Functions:

- Step Function:
y = {1 if x > 0 | 0 if x <= 0}
Not good anymore because way different inputs yield same output

- Linear Function
y = x
Not good unless we're doing linear regression model because we want a
non linear function

- Sigmoid Function
y = 1 / (1+e^(-x))
Pretty good because granular, but not the best usually because hard to compute

- Rectified Linear Function (ReLU)
y = {x if x > 0 | 0 if x <= 0}
Usually used and really good because nonlinear due to bend at 0
and easy to compute because linear after 0 and 0 if less

Why use Activation Functions?

Not all problems are linear, especially with machine learning they're actually usually <i>nonlinear<i>.


In [9]:
# ReLU Activation Code
inputs = [0,2,-1,3.3,-2.7,1.1,2.2,-100]

output = []
for i in inputs:
    if i > 0:
        output.append(i)
    else:
        output.append(0)

print(type(output))
print(output)

<class 'list'>
[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]


In [10]:
# ReLU with numpy
inputs = [0,2,-1,3.3,-2.7,1.1,2.2,-100]
output = np.maximum(0, inputs)
print(type(output))
print(output)

<class 'numpy.ndarray'>
[0.  2.  0.  3.3 0.  1.1 2.2 0. ]


In [11]:
# ReLU Activation class
class Activation_ReLU:

    # forward pass
    def forward(self, inputs):
        # Calculate output values from input
        self.output = np.maximum(0, inputs)

In [15]:
class Layer_Dense:

    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        # pass # Using pass statement as a placeholder
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        #pass # using pass statements as a placeholder
        self.output = np.dot(inputs, self.weights) + self.biases

In [16]:
# create dataset
X, y = spiral_data(samples=100, classes=3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()

# make a forward pass of our training data through this layer
dense1.forward(X)

# Forward pass through activation func.
# Takes in output from previous layer
activation1.forward(dense1.output)

# See first few samples
print(activation1.output[:5])

[[0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [1.3520580e-04 1.8173116e-05 0.0000000e+00]
 [2.3245417e-04 0.0000000e+00 0.0000000e+00]
 [3.8226307e-04 0.0000000e+00 0.0000000e+00]
 [5.7436468e-04 0.0000000e+00 0.0000000e+00]]


Softmax Activation Function:

Softmax Function is meant for classification and it is used on the output of the last layer.



In [17]:
# Softmax

# Values from the previous output when we described
# what a neural network is
layer_outputs = [4.8, 1.21, 2.385]

# e - mathematical constant, we use here to match constants convention
E = 2.71828182846

# For each value in a vector, calculate the exponential value
exp_values = []
for output in layer_outputs:
    exp_values.append(E ** output) # ** - Power
print('exponentiated values:')
print(exp_values)

exponentiated values:
[121.51041751893969, 3.3534846525504487, 10.85906266492961]


In [19]:
# Now normalize values
norm_base = sum(exp_values) # We sum all values
norm_values = []
for value in exp_values:
    norm_values.append(value / norm_base)
print('Normalized exponentiated values:')
print(norm_values)

print('Sum of normalized values:', sum(norm_values))

Normalized exponentiated values:
[0.8952826639573506, 0.024708306782070668, 0.08000902926057876]
Sum of normalized values: 1.0


In [20]:
# W/ Numpy

layer_outputs = [4.8, 1.21, 2.385]

# For each value in vector, calculate the exponential value
exp_values = np.exp(layer_outputs)
print('exponentiated values:')
print(exp_values)

# Now normalize values
norm_values = exp_values / np.sum(exp_values)
print('Normalized exponentiated values:')
print(norm_values)

print('Sum of normalized values:', np.sum(norm_values))

exponentiated values:
[121.51041752   3.35348465  10.85906266]
Normalized exponentiated values:
[0.89528266 0.02470831 0.08000903]
Sum of normalized values: 0.9999999999999999


In [31]:
class Activation_Softmax:

    def forward(self, inputs):

        exp_values = np.exp(inputs - np.max(inputs, axis=1,
                                            keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1,
                                            keepdims=True)
        self.output = probabilities

In [32]:
print(np.exp(1))

2.718281828459045


In [33]:
print(np.exp(10))

22026.465794806718


In [34]:
print(np.exp(100))

2.6881171418161356e+43


In [35]:
print(np.exp(1000))

inf


  print(np.exp(1000))


In [36]:
print(np.exp(-np.inf), np.exp(0))

0.0 1.0


In [37]:
softmax = Activation_Softmax()

softmax.forward([[1,2,3]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


In [38]:
softmax.forward([[-2,-1,0]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


In [39]:
softmax.forward([[0.5,1,1.5]])
print(softmax.output)

[[0.18632372 0.30719589 0.50648039]]


In [40]:
X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2, 3)

activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)

activation2 = Activation_Softmax()

dense1.forward(X)

activation1.forward(dense1.output)

dense2.forward(activation1.output)

activation2.forward(dense2.output)

print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.33333322 0.33333343 0.33333334]
 [0.33333296 0.33333388 0.3333332 ]
 [0.3333323  0.33333504 0.33333266]
 [0.33333194 0.33333552 0.33333248]]
