## Imports

In [1]:
# Step 1: Imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, PReLU
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Precision, Recall, AUC, F1Score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import random
import os

# 1. Set Python built-in random module seed
random.seed(42)

# 2. Set NumPy seed
np.random.seed(42)

# 3. Set TensorFlow seed
tf.random.set_seed(42)

# 4. (Optional) Control environmental sources of randomness
os.environ['PYTHONHASHSEED'] = '42'

import datetime


In [2]:
%pwd

'/Users/rahulshelke/Documents/Data-Science/Hands-on DL/activation-wars/notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/rahulshelke/Documents/Data-Science/Hands-on DL/activation-wars'

## Functions

In [5]:
def create_model(act_config, input_shape=(28, 28, 1)):
    layers = []

    layers.append(tf.keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
    layers.append(act_config if isinstance(act_config, tf.keras.layers.Layer) else tf.keras.layers.Activation(act_config))
    
    layers.append(tf.keras.layers.MaxPooling2D(2, 2))
    layers.append(tf.keras.layers.Conv2D(64, (3, 3)))
    layers.append(act_config if isinstance(act_config, tf.keras.layers.Layer) else tf.keras.layers.Activation(act_config))
    
    layers.append(tf.keras.layers.MaxPooling2D(2, 2))
    layers.append(tf.keras.layers.Flatten())
    layers.append(tf.keras.layers.Dense(64))
    layers.append(act_config if isinstance(act_config, tf.keras.layers.Layer) else tf.keras.layers.Activation(act_config))
    
    # Output layer - always softmax for classification
    layers.append(tf.keras.layers.Dense(10, activation='softmax'))

    model = tf.keras.Sequential(layers)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

## Load Datasets

In [6]:
# Step 2: Load and preprocess the MNIST data
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
# x_train = x_train.astype("float32") / 255.0
# x_test = x_test.astype("float32") / 255.0
# y_train = to_categorical(y_train, 10)
# y_test = to_categorical(y_test, 10)
# y_train_cat = to_categorical(y_train, 10)
# y_test_cat = to_categorical(y_test, 10)

In [7]:
datasets = {
            'mnist': tf.keras.datasets.mnist,
            'fashion_mnist': tf.keras.datasets.fashion_mnist,
            'cifar10': tf.keras.datasets.cifar10
        }

## Activation Functions

In [8]:
activation_configs = {
    'sigmoid': 'sigmoid',
    'tanh': 'tanh',
    'relu': 'relu',
    'leaky_relu': tf.keras.layers.LeakyReLU(),
    'prelu': tf.keras.layers.PReLU(), # problem with this activation
    'elu': 'elu',
    'swish': tf.keras.activations.swish,
    'swish': tf.keras.layers.Lambda(lambda x: tf.keras.activations.swish(x), name="swish_activation"),
}

## 1. Sigmoid

In [9]:
activation_configs = {
    'sigmoid': 'sigmoid',
}

### Training: Sigmoid

In [10]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with sigmoid


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-05-16 16:00:47.635824: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-05-16 16:00:47.635847: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-05-16 16:00:47.635854: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-05-16 16:00:47.635869: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-16 16:00:47.635882: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-05-16 16:00:48.167551: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


938/938 - 14s - 15ms/step - accuracy: 0.7175 - loss: 0.8704 - val_accuracy: 0.9450 - val_loss: 0.2043
Epoch 2/10
938/938 - 13s - 14ms/step - accuracy: 0.9574 - loss: 0.1539 - val_accuracy: 0.9679 - val_loss: 0.1089
Epoch 3/10
938/938 - 14s - 15ms/step - accuracy: 0.9718 - loss: 0.0966 - val_accuracy: 0.9747 - val_loss: 0.0808
Epoch 4/10
938/938 - 14s - 15ms/step - accuracy: 0.9786 - loss: 0.0724 - val_accuracy: 0.9792 - val_loss: 0.0642
Epoch 5/10
938/938 - 14s - 15ms/step - accuracy: 0.9829 - loss: 0.0579 - val_accuracy: 0.9816 - val_loss: 0.0562
Epoch 6/10
938/938 - 14s - 15ms/step - accuracy: 0.9863 - loss: 0.0478 - val_accuracy: 0.9836 - val_loss: 0.0524
Epoch 7/10
938/938 - 14s - 15ms/step - accuracy: 0.9885 - loss: 0.0403 - val_accuracy: 0.9840 - val_loss: 0.0509
Epoch 8/10
938/938 - 14s - 15ms/step - accuracy: 0.9902 - loss: 0.0344 - val_accuracy: 0.9832 - val_loss: 0.0506
Epoch 9/10
938/938 - 13s - 14ms/step - accuracy: 0.9918 - loss: 0.0294 - val_accuracy: 0.9831 - val_loss: 0

## 2. Tanh

In [12]:
activation_configs = {
    'tanh': 'tanh',
}

### Training: Tanh

In [13]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with tanh
Epoch 1/10
938/938 - 16s - 17ms/step - accuracy: 0.9522 - loss: 0.1657 - val_accuracy: 0.9802 - val_loss: 0.0659
Epoch 2/10
938/938 - 13s - 14ms/step - accuracy: 0.9852 - loss: 0.0512 - val_accuracy: 0.9826 - val_loss: 0.0570
Epoch 3/10
938/938 - 13s - 14ms/step - accuracy: 0.9900 - loss: 0.0348 - val_accuracy: 0.9837 - val_loss: 0.0534
Epoch 4/10
938/938 - 13s - 14ms/step - accuracy: 0.9938 - loss: 0.0237 - val_accuracy: 0.9859 - val_loss: 0.0443
Epoch 5/10
938/938 - 13s - 14ms/step - accuracy: 0.9955 - loss: 0.0169 - val_accuracy: 0.9863 - val_loss: 0.0409
Epoch 6/10
938/938 - 13s - 14ms/step - accuracy: 0.9972 - loss: 0.0122 - val_accuracy: 0.9871 - val_loss: 0.0456
Epoch 7/10
938/938 - 13s - 14ms/step - accuracy: 0.9970 - loss: 0.0113 - val_accuracy: 0.9890 - val_loss: 0.0394
Epoch 8/10
938/938 - 13s - 14ms/step - accuracy: 0.9967 - loss: 0.0111 - val_accuracy: 0.9889 - val_loss: 0.0402
Epoch 9/10
938/938 - 13s - 14ms/step - accuracy: 0.9975 - loss: 0.00

## 3. ReLU

In [15]:
activation_configs = {
    'relu': 'relu',
}

### Training: ReLU

In [16]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with relu
Epoch 1/10
938/938 - 20s - 21ms/step - accuracy: 0.9463 - loss: 0.1795 - val_accuracy: 0.9763 - val_loss: 0.0727
Epoch 2/10
938/938 - 14s - 15ms/step - accuracy: 0.9810 - loss: 0.0646 - val_accuracy: 0.9788 - val_loss: 0.0702
Epoch 3/10
938/938 - 13s - 14ms/step - accuracy: 0.9837 - loss: 0.0570 - val_accuracy: 0.9778 - val_loss: 0.0808
Epoch 4/10
938/938 - 13s - 14ms/step - accuracy: 0.9853 - loss: 0.0583 - val_accuracy: 0.9800 - val_loss: 0.0869
Epoch 5/10
938/938 - 13s - 14ms/step - accuracy: 0.9847 - loss: 0.0743 - val_accuracy: 0.9818 - val_loss: 0.1173
Epoch 6/10
938/938 - 13s - 14ms/step - accuracy: 0.9840 - loss: 0.0995 - val_accuracy: 0.9752 - val_loss: 0.2276
Epoch 7/10
938/938 - 13s - 14ms/step - accuracy: 0.9832 - loss: 0.1659 - val_accuracy: 0.9772 - val_loss: 0.3196
Epoch 8/10
938/938 - 13s - 14ms/step - accuracy: 0.9851 - loss: 0.2068 - val_accuracy: 0.9825 - val_loss: 0.3494
Epoch 9/10
938/938 - 13s - 14ms/step - accuracy: 0.9836 - loss: 0.34

## 4. SoftPlus

In [None]:
activation_configs = {
    'soft_plus': "softplus",
}

### Training: SoftPlus

In [19]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with leaky_relu


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
938/938 - 18s - 19ms/step - accuracy: 0.9512 - loss: 0.1662 - val_accuracy: 0.9790 - val_loss: 0.0639
Epoch 2/10
938/938 - 15s - 16ms/step - accuracy: 0.9839 - loss: 0.0512 - val_accuracy: 0.9808 - val_loss: 0.0597
Epoch 3/10
938/938 - 14s - 15ms/step - accuracy: 0.9887 - loss: 0.0367 - val_accuracy: 0.9834 - val_loss: 0.0535
Epoch 4/10
938/938 - 13s - 14ms/step - accuracy: 0.9916 - loss: 0.0279 - val_accuracy: 0.9849 - val_loss: 0.0474
Epoch 5/10
938/938 - 13s - 14ms/step - accuracy: 0.9935 - loss: 0.0215 - val_accuracy: 0.9870 - val_loss: 0.0413
Epoch 6/10
938/938 - 13s - 14ms/step - accuracy: 0.9948 - loss: 0.0167 - val_accuracy: 0.9888 - val_loss: 0.0396
Epoch 7/10
938/938 - 15s - 16ms/step - accuracy: 0.9960 - loss: 0.0134 - val_accuracy: 0.9873 - val_loss: 0.0490
Epoch 8/10
938/938 - 14s - 15ms/step - accuracy: 0.9960 - loss: 0.0127 - val_accuracy: 0.9904 - val_loss: 0.0410
Epoch 9/10
938/938 - 14s - 15ms/step - accuracy: 0.9968 - loss: 0.0100 - val_accuracy: 0.9896 - 

## 5. Leaky ReLU

In [17]:
activation_configs = {
    'leaky_relu': tf.keras.layers.LeakyReLU(),
}

### Training: Leaky ReLU

In [18]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with leaky_relu
Epoch 1/10
938/938 - 15s - 16ms/step - accuracy: 0.9556 - loss: 0.1513 - val_accuracy: 0.9838 - val_loss: 0.0494
Epoch 2/10
938/938 - 13s - 14ms/step - accuracy: 0.9847 - loss: 0.0500 - val_accuracy: 0.9845 - val_loss: 0.0484
Epoch 3/10
938/938 - 12s - 13ms/step - accuracy: 0.9896 - loss: 0.0358 - val_accuracy: 0.9827 - val_loss: 0.0504
Epoch 4/10
938/938 - 12s - 13ms/step - accuracy: 0.9920 - loss: 0.0268 - val_accuracy: 0.9848 - val_loss: 0.0457
Epoch 5/10
938/938 - 13s - 13ms/step - accuracy: 0.9938 - loss: 0.0207 - val_accuracy: 0.9870 - val_loss: 0.0444
Epoch 6/10
938/938 - 13s - 14ms/step - accuracy: 0.9948 - loss: 0.0168 - val_accuracy: 0.9882 - val_loss: 0.0394
Epoch 7/10
938/938 - 13s - 14ms/step - accuracy: 0.9957 - loss: 0.0141 - val_accuracy: 0.9875 - val_loss: 0.0442
Epoch 8/10
938/938 - 13s - 13ms/step - accuracy: 0.9967 - loss: 0.0109 - val_accuracy: 0.9899 - val_loss: 0.0394
Epoch 9/10
938/938 - 13s - 14ms/step - accuracy: 0.9962 - loss

## 6. PReLU

In [None]:
def create_prelu_model(act_config, input_shape=(28, 28, 1)):
    """ 
    act_config: ignoring input variable
    """
    layers = []

    layers.append(tf.keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
    layers.append(tf.keras.layers.PReLU()) # directly using prelu
    
    layers.append(tf.keras.layers.MaxPooling2D(2, 2))
    layers.append(tf.keras.layers.Conv2D(64, (3, 3)))
    layers.append(tf.keras.layers.PReLU()) # directly using prelu
    
    layers.append(tf.keras.layers.MaxPooling2D(2, 2))
    layers.append(tf.keras.layers.Flatten())
    layers.append(tf.keras.layers.Dense(64))
    layers.append(tf.keras.layers.PReLU()) # directly using prelu
    
    # Output layer - always softmax for classification
    layers.append(tf.keras.layers.Dense(10, activation='softmax'))

    model = tf.keras.Sequential(layers)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [32]:
activation_configs = {
    'prelu': tf.keras.layers.PReLU(), # this is not a string its a independent layer
}

### Training: PReLU

In [33]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_prelu_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with prelu


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
938/938 - 31s - 33ms/step - accuracy: 0.9438 - loss: 0.1881 - val_accuracy: 0.9762 - val_loss: 0.0726
Epoch 2/10
938/938 - 21s - 22ms/step - accuracy: 0.9829 - loss: 0.0547 - val_accuracy: 0.9859 - val_loss: 0.0438
Epoch 3/10
938/938 - 20s - 22ms/step - accuracy: 0.9885 - loss: 0.0371 - val_accuracy: 0.9853 - val_loss: 0.0432
Epoch 4/10
938/938 - 20s - 21ms/step - accuracy: 0.9917 - loss: 0.0274 - val_accuracy: 0.9850 - val_loss: 0.0450
Epoch 5/10
938/938 - 20s - 21ms/step - accuracy: 0.9940 - loss: 0.0202 - val_accuracy: 0.9884 - val_loss: 0.0396
Epoch 6/10
938/938 - 20s - 21ms/step - accuracy: 0.9947 - loss: 0.0162 - val_accuracy: 0.9896 - val_loss: 0.0347
Epoch 7/10
938/938 - 20s - 21ms/step - accuracy: 0.9961 - loss: 0.0126 - val_accuracy: 0.9905 - val_loss: 0.0351
Epoch 8/10
938/938 - 20s - 21ms/step - accuracy: 0.9965 - loss: 0.0108 - val_accuracy: 0.9890 - val_loss: 0.0456
Epoch 9/10
938/938 - 20s - 21ms/step - accuracy: 0.9965 - loss: 0.0100 - val_accuracy: 0.9905 - 

## 7. ELU

In [20]:
activation_configs = {
    'elu': "elu",
}

### Training: ELU

In [21]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with elu
Epoch 1/10
938/938 - 26s - 28ms/step - accuracy: 0.9550 - loss: 0.1531 - val_accuracy: 0.9776 - val_loss: 0.0686
Epoch 2/10
938/938 - 24s - 25ms/step - accuracy: 0.9840 - loss: 0.0510 - val_accuracy: 0.9818 - val_loss: 0.0584
Epoch 3/10
938/938 - 23s - 24ms/step - accuracy: 0.9898 - loss: 0.0344 - val_accuracy: 0.9842 - val_loss: 0.0533
Epoch 4/10
938/938 - 24s - 25ms/step - accuracy: 0.9932 - loss: 0.0238 - val_accuracy: 0.9854 - val_loss: 0.0491
Epoch 5/10
938/938 - 23s - 25ms/step - accuracy: 0.9948 - loss: 0.0180 - val_accuracy: 0.9868 - val_loss: 0.0492
Epoch 6/10
938/938 - 23s - 24ms/step - accuracy: 0.9958 - loss: 0.0140 - val_accuracy: 0.9891 - val_loss: 0.0397
Epoch 7/10
938/938 - 21s - 23ms/step - accuracy: 0.9959 - loss: 0.0126 - val_accuracy: 0.9896 - val_loss: 0.0392
Epoch 8/10
938/938 - 22s - 24ms/step - accuracy: 0.9964 - loss: 0.0112 - val_accuracy: 0.9903 - val_loss: 0.0424
Epoch 9/10
938/938 - 24s - 25ms/step - accuracy: 0.9964 - loss: 0.009

## 8. GELU (Gaussian Error Linear Unit)

In [22]:
activation_configs = {
    'gelu': tf.keras.activations.gelu,
}

### Training: GELU

In [23]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with gelu


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
938/938 - 21s - 23ms/step - accuracy: 0.9441 - loss: 0.1855 - val_accuracy: 0.9765 - val_loss: 0.0706
Epoch 2/10
938/938 - 19s - 20ms/step - accuracy: 0.9843 - loss: 0.0518 - val_accuracy: 0.9834 - val_loss: 0.0533
Epoch 3/10
938/938 - 18s - 19ms/step - accuracy: 0.9898 - loss: 0.0336 - val_accuracy: 0.9845 - val_loss: 0.0468
Epoch 4/10
938/938 - 18s - 20ms/step - accuracy: 0.9931 - loss: 0.0233 - val_accuracy: 0.9885 - val_loss: 0.0375
Epoch 5/10
938/938 - 18s - 19ms/step - accuracy: 0.9955 - loss: 0.0162 - val_accuracy: 0.9876 - val_loss: 0.0372
Epoch 6/10
938/938 - 20s - 21ms/step - accuracy: 0.9962 - loss: 0.0128 - val_accuracy: 0.9886 - val_loss: 0.0406
Epoch 7/10
938/938 - 18s - 19ms/step - accuracy: 0.9966 - loss: 0.0109 - val_accuracy: 0.9891 - val_loss: 0.0417
Epoch 8/10
938/938 - 18s - 19ms/step - accuracy: 0.9974 - loss: 0.0083 - val_accuracy: 0.9899 - val_loss: 0.0390
Epoch 9/10
938/938 - 18s - 20ms/step - accuracy: 0.9975 - loss: 0.0076 - val_accuracy: 0.9891 - 

## 9. Swish

In [24]:
activation_configs = {
    'swish': tf.keras.activations.swish,
}

### Training: Swish

In [25]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with swish
Epoch 1/10
938/938 - 27s - 29ms/step - accuracy: 0.9468 - loss: 0.1831 - val_accuracy: 0.9807 - val_loss: 0.0598
Epoch 2/10
938/938 - 17s - 18ms/step - accuracy: 0.9850 - loss: 0.0491 - val_accuracy: 0.9832 - val_loss: 0.0554
Epoch 3/10
938/938 - 16s - 17ms/step - accuracy: 0.9901 - loss: 0.0329 - val_accuracy: 0.9868 - val_loss: 0.0454
Epoch 4/10
938/938 - 16s - 18ms/step - accuracy: 0.9931 - loss: 0.0232 - val_accuracy: 0.9893 - val_loss: 0.0352
Epoch 5/10
938/938 - 16s - 18ms/step - accuracy: 0.9954 - loss: 0.0163 - val_accuracy: 0.9896 - val_loss: 0.0400
Epoch 6/10
938/938 - 17s - 18ms/step - accuracy: 0.9963 - loss: 0.0128 - val_accuracy: 0.9907 - val_loss: 0.0360
Epoch 7/10
938/938 - 17s - 18ms/step - accuracy: 0.9972 - loss: 0.0095 - val_accuracy: 0.9902 - val_loss: 0.0373
Epoch 8/10
938/938 - 17s - 18ms/step - accuracy: 0.9973 - loss: 0.0080 - val_accuracy: 0.9909 - val_loss: 0.0388
Epoch 9/10
938/938 - 17s - 18ms/step - accuracy: 0.9976 - loss: 0.0

## 10. Mish

In [27]:
activation_configs = {
    'mish': tf.keras.activations.mish,
}

### Training: Mish

In [28]:
# Dataset Loop with TensorBoard Logging
for dataset_name, loader in datasets.items():
    (x_train, y_train), (x_test, y_test) = loader.load_data()

    if dataset_name == 'cifar10':
        x_train, x_test = x_train / 255.0, x_test / 255.0
    else:
        x_train = x_train.reshape(-1, 28, 28, 1) / 255.0
        x_test = x_test.reshape(-1, 28, 28, 1) / 255.0

    for act_name, act_config in activation_configs.items():
        print(f"Training on {dataset_name} with {act_name}")

        model = create_model(activation_configs[act_name], input_shape=x_train.shape[1:])
        log_dir = f"logs/{dataset_name}/{act_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
        checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(f"models/{dataset_name}_{act_name}.keras", save_best_only=True)

        model.fit(x_train, y_train, epochs=10, batch_size=64,
                  validation_data=(x_test, y_test),
                  callbacks=[tb_callback], verbose=2)

Training on mnist with mish


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
938/938 - 28s - 29ms/step - accuracy: 0.9497 - loss: 0.1693 - val_accuracy: 0.9801 - val_loss: 0.0601
Epoch 2/10
938/938 - 29s - 31ms/step - accuracy: 0.9851 - loss: 0.0490 - val_accuracy: 0.9840 - val_loss: 0.0510
Epoch 3/10
938/938 - 24s - 26ms/step - accuracy: 0.9902 - loss: 0.0323 - val_accuracy: 0.9834 - val_loss: 0.0515
Epoch 4/10
938/938 - 24s - 26ms/step - accuracy: 0.9935 - loss: 0.0222 - val_accuracy: 0.9873 - val_loss: 0.0423
Epoch 5/10
938/938 - 23s - 25ms/step - accuracy: 0.9951 - loss: 0.0165 - val_accuracy: 0.9900 - val_loss: 0.0379
Epoch 6/10
938/938 - 22s - 24ms/step - accuracy: 0.9963 - loss: 0.0122 - val_accuracy: 0.9891 - val_loss: 0.0426
Epoch 7/10
938/938 - 23s - 24ms/step - accuracy: 0.9967 - loss: 0.0103 - val_accuracy: 0.9903 - val_loss: 0.0353
Epoch 8/10
938/938 - 26s - 28ms/step - accuracy: 0.9969 - loss: 0.0086 - val_accuracy: 0.9897 - val_loss: 0.0409
Epoch 9/10
938/938 - 24s - 26ms/step - accuracy: 0.9975 - loss: 0.0079 - val_accuracy: 0.9909 - 

## TensorBoard

In [11]:
# ! find logs/ | grep tfevents

In [11]:
# !tensorboard --inspect --logdir logs/

Processing event files... (this can take a few minutes)

Found event files in:
logs/mnist/leaky_relu/20250516-101650/train
logs/mnist/leaky_relu/20250516-101650/validation
logs/mnist/tanh/20250516-101232/train
logs/mnist/tanh/20250516-101232/validation
logs/mnist/relu/20250516-101442/train
logs/mnist/relu/20250516-101442/validation
logs/mnist/sigmoid/20250516-101022/train
logs/mnist/sigmoid/20250516-101022/validation
logs/mnist/elu/20250516-101901/train
logs/mnist/elu/20250516-101901/validation
logs/mnist/swish/20250516-102234/train
logs/mnist/swish/20250516-102234/validation
logs/fashion_mnist/leaky_relu/20250516-103216/train
logs/fashion_mnist/leaky_relu/20250516-103216/validation
logs/fashion_mnist/tanh/20250516-102722/train
logs/fashion_mnist/tanh/20250516-102722/validation
logs/fashion_mnist/relu/20250516-102933/train
logs/fashion_mnist/relu/20250516-102933/validation
logs/fashion_mnist/sigmoid/20250516-102509/train
logs/fashion_mnist/sigmoid/20250516-102509/validation
logs/fashio

In [34]:
%pwd

'/Users/rahulshelke/Documents/Data-Science/Hands-on DL/activation-wars'

In [35]:
! tensorboard --logdir=logs/cifar10

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.16.2 at http://localhost:6006/ (Press CTRL+C to quit)
^C
