In [13]:
# Activation Functions


Types of Activation Function

    - Linear or Identity Activation Function
    - Non-linear Activation Functions
    

'''
    1. linear
    2. sigmoid
    3. Relu        : Rectified Linear Activation Function 
    4. Softmax
    5. Softplus
    6. Softsign
    7. Tanh        : Hyperbolic tangent activation function
    8. selu        : Scaled Exponential Linear Unit 
    9. elu         : Exponential Linear Unit
    10.gelu        : Gaussian error linear unit (GELU)
    11.hard_sigmoid 
    12.Exponential

'''

import tensorflow as tf
tf.enable_eager_execution()
import numpy as np


In [None]:
# Consuming or using Activation Functions in Moel Building

# Applies an activation function to an output.

#01 Add Activation Function
model.add(layers.Dense(64, activation=activations.relu))

#02 Like stacking layer
model.add(layers.Dense(64))
model.add(layers.Activation(activations.relu))

#03  Using built-in activations may also be passed via their string identifier:
model.add(layers.Dense(64, activation='relu'))

In [None]:
# Different Activation Functions

# Binary Step :

# If the input to the activation function is greater than a threshold, then the neuron is activated, else it is deactivated

#f(x) = 1, x>=0
      = 0, x<0

In [14]:
# linear
# range(-infinity to infinity)



In [15]:
# Sigmoid :  1 / (1 + exp(-x))
# range : (0,1)

a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)

tf.keras.activations.sigmoid(a).numpy()

array([0.        , 0.26894143, 0.5       , 0.7310586 , 1.        ],
      dtype=float32)

In [28]:
# Tanh : Hyperbolic tangent activation function
# range(-1 to 1)
# the tanh function is very similar to the sigmoid function.
# The only difference is that it is symmetric around the origin.
# If you compare it to sigmoid, it solves just one problem of being zero-centred

a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
tf.keras.activations.tanh(a).numpy()


array([-0.9950547, -0.7615942,  0.       ,  0.7615942,  0.9950547],
      dtype=float32)

In [16]:
# relu function : Degault if x<0 consider as value as 0
# The main advantage of using the ReLU function over other activation functions is that it does not activate all the neurons at the same time.
# This means that the neurons will only be deactivated if the output of the linear transformation is less than 0.

# f(x) = max(0,x):

foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32) 
# Default : tf.keras.activations.relu(x, alpha=0.0, max_value=None, threshold=0)

tf.keras.activations.relu(foo).numpy()

array([ 0.,  0.,  0.,  5., 10.], dtype=float32)

In [17]:
# relu function : alpha ; if value is lessthan zero means values/0.5

tf.keras.activations.relu(foo, alpha=0.5).numpy()

array([-5. , -2.5,  0. ,  5. , 10. ], dtype=float32)

In [18]:
# relu function : max_value
tf.keras.activations.relu(foo, max_value=5).numpy()

array([0., 0., 0., 5., 5.], dtype=float32)

In [19]:
# relu function : threshold
tf.keras.activations.relu(foo, threshold=5).numpy()

array([-0., -0.,  0.,  0., 10.], dtype=float32)

In [20]:
# relu function : 
from tensorflow.keras import layers

layer = layers.Dense(4, activation='relu')
inputs = tf.convert_to_tensor([[-1,4,5,3],[1,6,0.3,1]])
outputs = layer(inputs)

outputs

<tf.Tensor: id=86, shape=(2, 4), dtype=float32, numpy=
array([[0.       , 0.       , 3.8707087, 0.       ],
       [0.       , 0.       , 4.2228813, 4.6672626]], dtype=float32)>

In [None]:
# ELU :  Exponential Linear Unit 

# ELu is variant of Rectiufied Linear Unit (ReLU) that modifies the slope of the negative part of the function. 

# Unlike the leaky relu and parametric ReLU functions, instead of a straight line

# The ELU is an excellent alternative to the ReLU – it decreases bias shifts by pushing mean activation 
# towards zero during the training process.




# f(x) = x for x >= 0
# f(x) =  alpha * (exp(x) - 1.) for x < 0

In [21]:
# Softmax : 
# z = np.exp(x)
# z_ = z/z.sum()


# 1. The softmax is a more generalised form of the sigmoid
# 2. softmax transforms the values between the range 0 and 1
# 3. It is used in multi-class classification problems

inp = np.asarray([1., 2., 1.])
layer = tf.keras.layers.Softmax()
layer(inp).numpy()

array([0.21194156, 0.57611688, 0.21194156])

In [22]:
# Softplus : log(exp(x) + 1)
a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
tf.keras.activations.softplus(a).numpy()

array([2.0611537e-09, 3.1326166e-01, 6.9314718e-01, 1.3132616e+00,
       2.0000000e+01], dtype=float32)

In [24]:
# Softsign : x / (abs(x) + 1)

# Similar to tanh functions
# Tanh function that converges exponentially and The softsign function converges in a polynomial form

a = tf.constant([-1.0, 0.0, 1.0], dtype = tf.float32)
tf.keras.activations.softsign(a).numpy()

array([-0.5,  0. ,  0.5], dtype=float32)

In [25]:
# Tanh : Hyperbolic tangent activation function
# tanh : sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x))).
# tanh : 2sigmoid(2x)-1

# the tanh function is very similar to the sigmoid function.
# The only difference is that it is symmetric around the origin.
# The range of values in this case is from -1 to 1.

# 1. If you compare it to sigmoid, it solves just one problem of being zero-centred

a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
tf.keras.activations.tanh(a).numpy()


array([-0.9950547, -0.7615942,  0.       ,  0.7615942,  0.9950547],
      dtype=float32)

In [None]:
# selu : Scaled Exponential Linear Unit 
# if x > 0: return scale * x
# if x < 0: return scale * alpha * (exp(x) - 1)
# constants (alpha=1.67326324 and scale=1.05070098)

In [None]:
# gelu : Gaussian error linear unit (GELU)

In [26]:
# Exponential : exp(x).

a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)

tf.keras.activations.exponential(a).numpy()

array([ 0.04978707,  0.36787945,  1.        ,  2.7182817 , 20.085537  ],
      dtype=float32)

In [27]:
# Swish:  f(x) = x*sigmoid(x)