# Softmax Activation Function

In [1]:
import math
import numpy as np

In [2]:
layer_output = [[4.8, 1.21, 2.385],
               [8.9, -1.81, 0.2],
               [1.41, 1.051, 0.026]]
# without using numpy
# E = math.e

# exp_values = []

# for output in layer_output:
#     exp_values.append(E**output)
    
# print(exp_values)

# norm_base = sum(exp_values)
# norm_values = []

# for value in exp_values:
#     norm_values.append(value/norm_base)

# print(norm_values)

# using numpy:

In [3]:
# for single layer inputs
# exp_values = np.exp(layer_output)
# norm_values = exp_values/np.sum(exp_values)
# print(exp_values)
# print()
# print(norm_values)

In [4]:
# for batch of inputs:

exp_values = np.exp(layer_output)
# print(exp_values)

print(np.sum(layer_output, axis=1, keepdims=True)) # axis = 1 will sum row wise # keepdims to keep the dimension same as inputs
# otherwise the answer would [[8.395],[7.29],[2.487]]
norm_values = exp_values/np.sum(exp_values, axis=1, keepdims=True)
print(norm_values)

[[8.395]
 [7.29 ]
 [2.487]]
[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]


## Merging all things till now what we done:


In [5]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.10*np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

In [7]:
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs) 

#without using numpy:
# for i in inputs:
#     output.append(max(0, i))
# print(output)

layer_1 = Activation_ReLU()
layer_1.forward(layer_output)
print(layer_1.output)

[[4.8   1.21  2.385]
 [8.9   0.    0.2  ]
 [1.41  1.051 0.026]]


In [8]:
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

In [9]:
# we will use data of spiral data
def create_data(samples, classes):
    X = np.zeros((samples*classes, 2))
    y = np.zeros(samples*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(samples*class_number, samples*(class_number+1))
        r = np.linspace(0.0, 1, samples)
        t = np.linspace(class_number*4, (class_number+1)*4, samples) + np.random.randn(samples)*0.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

In [10]:
X, y = create_data(samples=100, classes=3)
dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])

[[0.33333333 0.33333333 0.33333333]
 [0.33324608 0.33335531 0.33339861]
 [0.33315841 0.33337755 0.33346404]
 [0.33313155 0.33337857 0.33348988]
 [0.33300008 0.33341673 0.33358319]]
