Part 6 - Softmax Activation  
https://www.youtube.com/watch?v=omz_NdFgWyU&list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3&index=6

In [1]:
import numpy as np
import math
import nnfs
from nnfs.datasets import spiral_data

In [2]:
nnfs.init()

In [3]:
layer_outputs = [4.8, 1.21, 2.385]

In [4]:
# Euler's const
E = 2.71828182846

In [5]:
math.e
# Returns a normalization sum of 0.99999999...

2.718281828459045

In [6]:
exp_vals = []

# Exponential function
for output in layer_outputs:
    exp_vals.append(E**output)

In [7]:
exp_vals

[121.51041751893969, 3.3534846525504487, 10.85906266492961]

In [8]:
# Normalization

norm_base = sum(exp_vals)
norm_vals = []

for value in exp_vals:
    norm_vals.append(value/norm_base)

In [9]:
print(sum(norm_vals))
# Should add up to 1

1.0


Alternate method with numpy

In [10]:
exp_vals = np.exp(layer_outputs)

In [11]:
norm_vals = exp_vals/np.sum(exp_vals)
norm_vals

array([0.89528266, 0.02470831, 0.08000903])

In [12]:
norm_vals.sum()

0.9999999999999999

Batch Operation

In [13]:
layer_outputs = [[4.8, 1.21, 2.35],
                 [8.9, -1.81, 0.2], 
                 [1.41, 1.051, 0.026]]

In [14]:
exp_vals = np.exp(layer_outputs)
exp_vals

array([[1.21510418e+02, 3.35348465e+00, 1.04855697e+01],
       [7.33197354e+03, 1.63654137e-01, 1.22140276e+00],
       [4.09595540e+00, 2.86051020e+00, 1.02634095e+00]])

In [15]:
# axis 1 = rows
# keepdims = keep same shape as before
np.sum(layer_outputs, axis=1, keepdims=True)

array([[8.36 ],
       [7.29 ],
       [2.487]])

In [16]:
norm_vals = exp_vals/np.sum(exp_vals, axis=1, keepdims=True)
norm_vals

array([[8.97753170e-01, 2.47764886e-02, 7.74703409e-02],
       [9.99811129e-01, 2.23163963e-05, 1.66554348e-04],
       [5.13097164e-01, 3.58333899e-01, 1.28568936e-01]])

In [17]:
class LayerDense:    
    
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros(1, dtype=float, order='C')
    
    
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases 
        
        
class Activation_ReLU:    
    
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

        
class Activation_Softmax:
    
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_vals/np.sum(exp_vals, axis=1, keepdims=True)
        self.output = probabilities

In [18]:
X, y = spiral_data(samples=100, classes=3)

In [19]:
dense1 = LayerDense(2, 3)
activ1 = Activation_ReLU()

In [20]:
dense2 = LayerDense(3, 3)
activ2 = Activation_Softmax()

In [21]:
dense1.forward(X)
activ1.forward(dense1.output)

In [22]:
dense2.forward(activ1.output)
activ2.forward(dense2.output)

In [23]:
activ2.output

array([[8.97753170e-01, 2.47764886e-02, 7.74703409e-02],
       [9.99811129e-01, 2.23163963e-05, 1.66554348e-04],
       [5.13097164e-01, 3.58333899e-01, 1.28568936e-01]])

In [24]:
# code to match https://www.youtube.com/watch?v=omz_NdFgWyU&list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3&index=6
import numpy as np 
import nnfs
from nnfs.datasets import spiral_data


nnfs.init()

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases


class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities


X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.33331734 0.33331832 0.33336434]
 [0.3332888  0.33329153 0.33341965]
 [0.33325943 0.33326396 0.33347666]
 [0.33323312 0.33323926 0.33352762]]
