# Relu Activation Function Code

In [2]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

output = []
for i in inputs:
    if i > 0:
        output.append(i)
    else:
        output.append(0)
output

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]

# Simple Code

In [3]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]

output = []
for i in inputs:
    output.append(max(0,i))
output

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]

#  More Simple (Numpy) Code

In [4]:
import numpy as np
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
output = np.maximum(0,inputs)
output

array([0. , 2. , 0. , 3.3, 0. , 1.1, 2.2, 0. ])

# ReLU activation function Class

In [5]:
class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

# Dense Layer Class

In [6]:
class Layer_Dense:
    # Layer intialization
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
    # Forward Pass
    def forward(self, inputs):
        self.output = np.dot(inputs,self.weights) + self.biases

# Dataset

In [7]:
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()

# Dense Layer with ReLU activation

In [8]:
# Create Dataset
X, y = spiral_data(samples = 100, classes = 3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)

# Create ReLU activation (to be used with dense layer)
activation1 = Activation_ReLU()

# Make a forward pass of our training data through this layer
dense1.forward(X)

# Forward pass through activation function
activation1.forward(dense1.output)

# Let's see output of the first few samples
activation1.output[:5]

array([[0.        , 0.        , 0.        ],
       [0.        , 0.00011395, 0.        ],
       [0.        , 0.00031729, 0.        ],
       [0.        , 0.00052666, 0.        ],
       [0.        , 0.00071401, 0.        ]], dtype=float32)

# The Softmax Activation Function

In [9]:
# Values from the previous output when we described
# what a neural network is
layer_outputs = [4.8, 1.21, 2.385]

# e - mathematical constant, we use E here to match a common coding
# style where constants are uppercased
E = 2.71828182846 # you can also use math.e

# For each value in a vector, calculate the exponential value
exp_values = []
for output in layer_outputs:
    exp_values.append(E ** output) # ** - power operator in Python
print('exponentiated values:')
print(exp_values)

exponentiated values:
[121.51041751893969, 3.3534846525504487, 10.85906266492961]


### Now normalize values

In [10]:
norm_base = sum(exp_values) # We sum all values
norm_values = []
for value in exp_values:
    norm_values.append(value / norm_base)
print('Normalized exponentiated values:')
print(norm_values)
print('Sum of normalized values:', sum(norm_values))

Normalized exponentiated values:
[0.8952826639573506, 0.024708306782070668, 0.08000902926057876]
Sum of normalized values: 1.0


### With Numpy

In [11]:
# Values from the earlier previous when we described
# what a neural network is
layer_outputs = [4.8, 1.21, 2.385]

# For each value in a vector, calculate the exponential value
exp_values = np.exp(layer_outputs)
print('exponentiated values:')
print(exp_values)

# Now normalize values
norm_values = exp_values / np.sum(exp_values)

print('normalized exponentiated values:')
print(norm_values)
print('sum of normalized values:', np.sum(norm_values))

exponentiated values:
[121.51041752   3.35348465  10.85906266]
normalized exponentiated values:
[0.89528266 0.02470831 0.08000903]
sum of normalized values: 0.9999999999999999


### Using Batches Of Data
To train in batches, we need to convert this functionality to accept layer 
outputs in batches.

In [12]:
inputs = [[1.0, 2.0, 3.0, 2.5],
          [2.0, 5.0, -1.0, 2.0],
          [-1.5, 2.7, 3.3, -0.8]]

# Get unnormalized probabilities
exp_values = np.exp(inputs)

# Normalize them for each sample
probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

### What is Axis
#### Axis None

In [13]:
layer_outputs = np.array([[4.8, 1.21, 2.385],
                         [8.9, -1.81, 0.2],
                         [1.41, 1.051, 0.026]])

print('Sum without axis')
print(np.sum(layer_outputs))
print('This will be identical to the above since default is None:')
print(np.sum(layer_outputs, axis=None))

Sum without axis
18.172
This will be identical to the above since default is None:
18.172


#### Axis 0

In [14]:
print('Another way to think of it w/ a matrix == axis 0: columns:')

print(np.sum(layer_outputs, axis=0))

Another way to think of it w/ a matrix == axis 0: columns:
[15.11   0.451  2.611]


#### Axis 1

In [15]:
print('But we want to sum the rows instead, like this w/ raw py:')
for i in layer_outputs:
    print(sum(i))

But we want to sum the rows instead, like this w/ raw py:
8.395
7.29
2.4869999999999997


In [16]:
print('So we can sum axis 1, but note the current shape:')
print(np.sum(layer_outputs, axis=1))

So we can sum axis 1, but note the current shape:
[8.395 7.29  2.487]


#### Make Column Vector of this

In [17]:
print('Sum axis 1, but keep the same dimensions as input:')
print(np.sum(layer_outputs, axis=1, keepdims=True))

Sum axis 1, but keep the same dimensions as input:
[[8.395]
 [7.29 ]
 [2.487]]


## Softmax Activation Function Class

In [18]:
class Activation_Softmax:
    
    #Forward Pass
    def forward(self, inputs):
        
        # Get unnormalize probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
        
        # Normalizing them for each sample
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
        
        self.output = probabilities
        

#### Exponential Function Exploids Values

In [19]:
print(np.exp(1))

2.718281828459045


In [20]:
print(np.exp(10))

22026.465794806718


In [21]:
print(np.exp(100))

2.6881171418161356e+43


In [22]:
print(np.exp(1000))

inf


  print(np.exp(1000))


In [23]:
print('Exponention of Negative Infinity:\n',np.exp(-np.inf),'\nExpnonetion of Zero:\n',np.exp(0))

Exponention of Negative Infinity:
 0.0 
Expnonetion of Zero:
 1.0


*We can use this property to prevent the exponential function from overflowing. Suppose we 
subtract the maximum value from a list of input values. We would then change the output values 
to always be in a range from some negative value up to 0, as the largest number subtracted by 
itself returns 0, and any smaller number subtracted by it will result in a negative number — 
exactly the range discussed above. With Softmax, thanks to the normalization, we can subtract any 
value from all of the inputs, and it will not change the output:*

In [24]:
softmax = Activation_Softmax()

softmax.forward([[1, 2, 3]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


In [25]:
softmax.forward([[-2, -1, 0]])  # subtracted 3 - max from the list
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


What happens if we divide the layer’s output 
data, [1, 2, 3], for example, by 2?

In [27]:
softmax.forward([[0.5, 1, 1.5]]) # subtracted 2 - max from the list
print(softmax.output)

[[0.18632372 0.30719589 0.50648039]]


#### Full code upto this point

In [45]:
#!pip install nnfs
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()


# Dense layer
class Layer_Dense:
    
    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
     # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases
    
# ReLU activation
class Activation_ReLU:
    
    # Forward pass
    def forward(self, inputs):
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)

# Softmax activation
class Activation_Softmax:
    
    # Forward pass
    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1,
        keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,
        keepdims=True)
        self.output = probabilities

#### Create Dataset

In [46]:
X, y = spiral_data(samples = 100, classes = 3)

# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2,3)

# Create ReLU activation (to be used in dense1)
activation1 = Activation_ReLU()

# Create Dense layer with 3 input features (as we take output of previous layer here) and 3 output values
dense2 = Layer_Dense(3,3)

# Create Softmax activation (to be used in dense2)
activation2 = Activation_Softmax()

# Make a forward pass of our training data through this layer
dense1.forward(X)

# Make a forward pass throgh activation function as it take output of dense1 here
activation1.forward(dense1.output)

# Make a forward pass through desns2 as it takes outputs of the activation of dense1 as inputs
dense2.forward(activation1.output)

# Make a forward pass through activation function as it takes output of dense2 as inputs here
activation2.forward(dense2.output)

# Let's see output of the first few samples
print(activation2.output[:5])

[[0.33333334 0.33333334 0.33333334]
 [0.3333332  0.3333332  0.33333364]
 [0.3333329  0.33333293 0.3333342 ]
 [0.3333326  0.33333263 0.33333477]
 [0.33333233 0.3333324  0.33333528]]
