In [1]:
#In the case of ReLU activation function it is unbounded and not normalized. Not Normalized means that it can be anything like
# [100, 10 ,-25] or [-8, 2000, 200] ... which doesn't give much context to the output.

In [None]:
# So what a Softmax Activation Function does is that it takes some unbounded, not normalized input and outputs a normalized and bounded output
# This distribution returned by the softmax activation function represents confidence scores for each class and will add up to 1. 
# The predicted class is associated with the output neuron that returned the largest confidence score.

In [None]:
# Still, we can also note the other confidence scores in our overarching algorithm/program that uses this network. 
# For example, if our network has a confidence distribution for two classes: [0.45, 0.55] , the prediction is the 2nd class, but the confidence in this
# prediction isn’t very high. Maybe our program would not act in this case since it’s not very confident.

In [None]:
# Here's the function for the Softmax:
# S_ij = (e^z_ij)/Summation(e^z_il)[l= 1 to L] ; e=constant = math.e = 2.71828182846(exponential)
# z_ij indicates the output values from each of the neurons for that layer.
# z_il indicates the output values from each of the neurons for that layer.
# Both the numerator and the denominator of the Softmax function contain e raised to the power of z , where z , \
# given indices, means a singular output value — the index i means the current sample and the index j means the current output in this sample. 
# The numerator exponentiates the current output value and the denominator takes a sum of all of the exponentiated outputs for a given sample.

In [None]:
# Why was e used?
# "e" is a monotonically increasing function which is almost 0 when e^x where x<0. So it will remove all negative nos and 
# dividing by the sum of all those exponential nos will result in values between 0 and 1. ~ Almost like probability for where
# each probability lies between 0 and 1. Adding all of these values will eventually give 1.0.

In [10]:
import math

def softmax_activation_func(layer_outputs):
    E = math.e
    numerator_exp_vals = []

    #(e^z_ij) for each neuron output
    for output in layer_outputs:
        numerator_exp_vals.append(E ** output)
    
    print(numerator_exp_vals)

    #Normalized Denomenator base for normalized values
    # Summation(e^z_il)
    norm_base = sum(numerator_exp_vals)

    #normalized outputs after softmax activation
    norm_outputs = []

    for value in numerator_exp_vals:
        norm_outputs.append(value/norm_base)
    
    print ( 'Normalized exponentiated values:' )
    print (norm_outputs)
    print ( 'Sum of normalized values:' , sum (norm_outputs)) # It will give a value very near to 1.0
  

In [11]:
layer_outputs = [ 4.8 , 1.21 , 2.385 ]
softmax_activation_func(layer_outputs)

[121.51041751873483, 3.353484652549023, 10.859062664920513]
Normalized exponentiated values:
[0.8952826639572619, 0.024708306782099374, 0.0800090292606387]
Sum of normalized values: 0.9999999999999999


In [12]:
# Softmax Activation Function using numpy
import numpy as np

def softmax_activation_func_numpy(layer_outputs):
    #(e^z_ij) for each neuron output
    numerator_exp_vals = np.exp(layer_outputs)
    
    print(numerator_exp_vals)

    #Normalized Denomenator base for normalized values
    # Summation(e^z_il)
    norm_base = np.sum(numerator_exp_vals)

    #normalized outputs after softmax activation
    norm_outputs = numerator_exp_vals/norm_base
    
    print ( 'Normalized exponentiated values:' )
    print (norm_outputs)
    print ( 'Sum of normalized values:' , sum (norm_outputs)) # It will give a value very near to 1.0

In [13]:
layer_outputs = [ 4.8 , 1.21 , 2.385 ]
softmax_activation_func_numpy(layer_outputs)

[121.51041752   3.35348465  10.85906266]
Normalized exponentiated values:
[0.89528266 0.02470831 0.08000903]
Sum of normalized values: 0.9999999999999999


In [16]:
# Softmax Activation Function using numpy with input batches

import numpy as np

def softmax_activation_numpy_batch(layer_outputs_batch):
    #(e^z_ij) for each neuron output
    exp_vals_batch = np.exp(layer_outputs_batch)
    
    print(exp_vals_batch)

    #normalized outputs after softmax activation
    probabilities = exp_vals_batch / np.sum(exp_vals_batch, axis=1, keepdims=True)
    
    print ( 'Normalized exponentiated values:' )
    print (probabilities)
    print ( 'Sum of normalized values:' , np.sum(probabilities, axis=1, keepdims=False)) # It will give a value very near to 1.0

In [17]:
layer_outputs_batch = np.array([[ 4.8 , 1.21 , 2.385 ],
                                [ 8.9 , - 1.81 , 0.2 ],
                                [ 1.41 , 1.051 , 0.026 ]])
softmax_activation_numpy_batch(layer_outputs_batch)

[[1.21510418e+02 3.35348465e+00 1.08590627e+01]
 [7.33197354e+03 1.63654137e-01 1.22140276e+00]
 [4.09595540e+00 2.86051020e+00 1.02634095e+00]]
Normalized exponentiated values:
[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]
Sum of normalized values: [1. 1. 1.]


In [18]:
import numpy as np
class Activation_Softmax:
    #Forward pass
    def forward(self, inputs):
        #Get unnormalized input batch (inputs - max(input)). It is done so that the values if they are bigger like in the order of 1000 then e^1000
        # calculation may fail as it can overflow. So keeping the inputs in a small -ve range will give a +ve output with values between 0 and 1
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        #Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities

In [20]:
activation_softmax1 = Activation_Softmax()
layer_outputs_batch = np.array([[ 4.8 , 1.21 , 2.385 ],
                                [ 8.9 , - 1.81 , 0.2 ],
                                [ 1.41 , 1.051 , 0.026 ]])
activation_softmax1.forward(layer_outputs_batch)
print(activation_softmax1.output)

[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]
