In [2]:
import numpy as np
import nnfs # package by the book's author to generate random data
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt

# Softmax activation function

In [18]:
# Softmax activation function is used when dealing with classifiers - returning a confidence in all value belonging to a group

layer_outputs = [4.8, 1.21, 2.385]

# to use softmax function Eulers number is needed
E = np.e # 2.718281828459045

exp_values = []
for value in layer_outputs:
  exp_values.append(E ** value)
  
print(exp_values)

# then we need to normalize the values

norm_base = sum(exp_values)
norm_values = []

for value in exp_values:
  norm_values.append(value / norm_base)

print(f'Normalized values :\n{norm_values}')
print(f'Sum of normalized values :\n{sum(norm_values)}')


[121.51041751873483, 3.353484652549023, 10.859062664920513]
Normalized values :
[0.8952826639572619, 0.024708306782099374, 0.0800090292606387]
Sum of normalized values :
0.9999999999999999


In [19]:
# using numpy
exp_values = np.exp(layer_outputs)
norm_values_np = exp_values / np.sum(exp_values)
print(f'Normalized values using numpy:\n{norm_values_np}')

Normalized values using numpy:
[0.89528266 0.02470831 0.08000903]


In [22]:
# to use bacthes 
inputs = [[1.0, 2.0, 3.0],
          [2.0, 5.0, -1.0]] 


exp_values = np.exp(inputs)
norm_values_np = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
print(f'Normalized values using numpy with batches:\n{norm_values_np}')

Normalized values using numpy with batches:
[[0.09003057 0.24472847 0.66524096]
 [0.04731416 0.95033021 0.00235563]]


In [26]:
print(f'Sum without axis:\n{np.sum(inputs)}')

print(f'Sum with axis:\n{np.sum(inputs, axis = 1, keepdims = Truet)}')

Sum without axis:
12.0
Sum with axis:
[[6.]
 [6.]]


# Stop overflowing exponential function

In [31]:
# The nice thing about softmax function is that we can subtract any value out of the inputs and it will not change the results

class Activation_Softmax():
  def forward(self, inputs):
    
    # get unnormalized probabilites
    exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
    
    # normalize them for each sample
    probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
    
    self.output = probabilities

In [34]:
softmax = Activation_Softmax()

softmax.forward([[1,2,3]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


In [38]:
# subtracting any value from each value, in this case its 4
softmax.forward([[-3, -2, -1]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]
