ACTIVATION FUNCTIONS :

1. **Heaviside Step Function**

The Heaviside step function outputs 0 for negative inputs and 1 for positive inputs.

Range: 0,1

Advantages:
*   Simple and easy to implement.

Disadvantages:
*   Not differentiable at 0.
*   Provides no information about the magnitude of input values.

2. **Sigmoid** **Function**

The sigmoid function maps any input value to a value between 0 and 1.

𝜎(𝑥) = 1 / 1 + 𝑒^(−𝑥)

Range:[0,1]

Advantages:

*   Smooth gradient, preventing jumps in output values.
*   Outputs can be interpreted as probabilities.

Disadvantages:

*  Prone to vanishing gradient problem, especially with deep networks.
*  Outputs are not zero-centered.

Use Case:
Used in binary classification problems and as the activation function for the output layer in such tasks.

Layer:
Output layer in binary classification problems.


In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import sigmoid

# Example dataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a neural network with Sigmoid activation function
model_sigmoid = Sequential([
    Dense(64, input_shape=(20,), activation=sigmoid),
    Dense(1, activation='sigmoid')
])

# Compile the model
model_sigmoid.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history_sigmoid = model_sigmoid.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model_sigmoid.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.6619 - loss: 0.6520 - val_accuracy: 0.7875 - val_loss: 0.6175
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7781 - loss: 0.6081 - val_accuracy: 0.8375 - val_loss: 0.5779
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8290 - loss: 0.5682 - val_accuracy: 0.8562 - val_loss: 0.5437
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8560 - loss: 0.5353 - val_accuracy: 0.8500 - val_loss: 0.5127
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8445 - loss: 0.5120 - val_accuracy: 0.8500 - val_loss: 0.4845
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8599 - loss: 0.4846 - val_accuracy: 0.8750 - val_loss: 0.4585
Epoch 7/10
[1m20/20[0m [32m━━━━━━


3. **ReLU (Rectified Linear Unit)**

ReLU outputs the input directly if it is positive; otherwise, it outputs zero.

ReLU(𝑥) = max⁡(0,𝑥)

Range:[0,x]

Advantages:


*   Efficient computation.
*   Helps mitigate the vanishing gradient problem.

Disadvantages:


*   Outputs are not zero-centered.
*   Can cause dead neurons if many inputs are negative.

Use Case:
Widely used in hidden layers of deep neural networks.


In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import MeanSquaredError
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
y = y.reshape(-1, 1)  # Reshape y to be a 2D array

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1)
])

# Compile the model with RMSprop optimizer and Mean Squared Error loss
model.compile(optimizer=RMSprop(learning_rate=0.001), loss=MeanSquaredError())

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)
print(f"Predictions: {predictions[:5]}")
print(f"Actual values: {y_test[:5]}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 35951.1953 - val_loss: 35343.8945
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 39286.9883 - val_loss: 34998.7305
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 40124.1992 - val_loss: 34405.5195
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 36699.3359 - val_loss: 33505.9492
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 35980.9805 - val_loss: 32285.5879
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 33242.5586 - val_loss: 30785.0820
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 32561.5566 - val_loss: 28989.0781
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 27602.7227 - val_loss: 26843.2070
Ep


4. **Leaky ReLU**

Leaky ReLU allows a small, non-zero gradient when the input is negative.

Range:
(−∞,∞)

Advantages:


*   Prevents dead neurons.
*   Maintains some gradient for negative inputs.

Disadvantages:

Choosing the α parameter can be non-trivial.

Use Case:

Used in hidden layers where ReLU might lead to dead neurons.


In [3]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adagrad
from tensorflow.keras.losses import MeanAbsoluteError
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
y = y.reshape(-1, 1)  # Reshape y to be a 2D array

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.01),
    Dense(64),
    LeakyReLU(alpha=0.01),
    Dense(1)
])

# Compile the model with AdaGrad optimizer and Mean Absolute Error loss
model.compile(optimizer=Adagrad(learning_rate=0.01), loss=MeanAbsoluteError())

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)
print(f"Predictions: {predictions[:5]}")
print(f"Actual values: {y_test[:5]}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 155.9699 - val_loss: 152.1735
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 160.1796 - val_loss: 151.8476
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 156.4822 - val_loss: 151.2384
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 158.5468 - val_loss: 150.1054
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 153.5572 - val_loss: 148.0940
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 149.2942 - val_loss: 144.7629
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 151.5709 - val_loss: 139.6322
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 138.2898 - val_loss: 132.2353
Epoch 9/50
[1m20/20[0m [32m━━━━


5. **ELU (Exponential Linear Unit)**

ELU outputs the input if it is positive; otherwise, it outputs an exponential function minus one.

Range:(−𝛼,∞)

Advantages:


*   Helps mitigate vanishing gradient problem.
*   Outputs are closer to zero mean, which can help speed up learning.

Disadvantages:

More computationally intensive than ReLU and Leaky ReLU.

Use Case:
Used in hidden layers to improve learning speed and performance.


In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, ELU
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import MeanSquaredError
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
y = y.reshape(-1, 1)  # Reshape y to be a 2D array

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],)),
    ELU(alpha=1.0),
    Dense(64),
    ELU(alpha=1.0),
    Dense(1)
])

# Compile the model with RMSProp optimizer and Mean Squared Error loss
model.compile(optimizer=RMSprop(learning_rate=0.001), loss=MeanSquaredError())

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)
print(f"Predictions: {predictions[:5]}")
print(f"Actual values: {y_test[:5]}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 38720.9844 - val_loss: 34747.2070
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 36084.0117 - val_loss: 33654.5430
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 34757.7891 - val_loss: 32173.7305
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 33416.5469 - val_loss: 30318.2148
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 30501.5273 - val_loss: 28109.1523
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 30860.9648 - val_loss: 25640.9688
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 26974.9336 - val_loss: 22775.2129
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 24153.2578 - val_loss: 19781.3125
Ep



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Predictions: [[-320.68353]
 [-372.34805]
 [ 346.6453 ]
 [-251.3139 ]
 [ 211.27145]]
Actual values: [[-322.97345848]
 [-377.23213875]
 [ 369.21515032]
 [-273.14190959]
 [ 196.47631904]]



6. **Softmax**

Range:
[0,1]

Advantages:

*   Converts logits to probabilities.
*   Useful for multi-class classification.

Disadvantages:

*   Computationally expensive for many classes.

Use Case:
Used in multi-class classification problems.

Layer:
Output layer in multi-class classification problems.


In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import MeanSquaredError
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset for classification
X, y = make_classification(n_samples=1000, n_features=5, n_classes=3, n_clusters_per_class=1, random_state=42)
y = tf.keras.utils.to_categorical(y, num_classes=3)  # One-hot encode the labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),       # Hidden layer with ReLU activation
    Dense(64, activation='relu'),       # Another hidden layer with ReLU activation
    Dense(3, activation='softmax')      # Output layer with Softmax activation
])

# Compile the model with Mini Batch SGD optimizer and MSE loss
model.compile(optimizer=SGD(learning_rate=0.01), loss=MeanSquaredError(), metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Make predictions
predictions = model.predict(X_test)
print(f"Predictions (sample): {predictions[:5]}")
print(f"Actual values (sample): {y_test[:5]}")


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - accuracy: 0.1726 - loss: 0.2463 - val_accuracy: 0.2875 - val_loss: 0.2313
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.1820 - loss: 0.2378 - val_accuracy: 0.2688 - val_loss: 0.2254
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.1908 - loss: 0.2302 - val_accuracy: 0.2937 - val_loss: 0.2197
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2355 - loss: 0.2211 - val_accuracy: 0.3750 - val_loss: 0.2143
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3761 - loss: 0.2147 - val_accuracy: 0.4563 - val_loss: 0.2092
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4755 - loss: 0.2062 - val_accuracy: 0.4812 - val_loss: 0.2045
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━


7. **TanH (Hyperbolic Tangent)**

TanH maps input values to the range between -1 and 1.

Range:[−1,1]

Advantages:

*   Outputs are zero-centered.
*   Steeper gradients compared to sigmoid.

Disadvantages:

*   Still suffers from the vanishing gradient problem.

Use Case:
Used in hidden layers for zero-centered outputs.

Layer:
Hidden layers.


In [5]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import MeanAbsoluteError
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a sample dataset
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
y = y.reshape(-1, 1)  # Reshape y to be a 2D array

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='tanh'),
    Dense(64, activation='tanh'),
    Dense(1)
])

# Compile the model with SGD optimizer and Mean Absolute Error loss
model.compile(optimizer=SGD(learning_rate=0.01), loss=MeanAbsoluteError())

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)
print(f"Predictions: {predictions[:5]}")
print(f"Actual values: {y_test[:5]}")


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 160.0341 - val_loss: 151.9365
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 155.7324 - val_loss: 151.4988
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 159.9149 - val_loss: 150.8675
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 157.6353 - val_loss: 149.8846
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 156.0962 - val_loss: 148.3143
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 146.8740 - val_loss: 145.8511
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 152.7928 - val_loss: 142.1735
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 153.4803 - val_loss: 137.1693
Epoch 9/50
[1m20/20[0m



Test Loss: 41.488243103027344
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Predictions: [[-212.73947]
 [-221.71869]
 [ 220.92137]
 [-192.04257]
 [ 203.70181]]
Actual values: [[-322.97345848]
 [-377.23213875]
 [ 369.21515032]
 [-273.14190959]
 [ 196.47631904]]
