In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np
import pickle

In [None]:
# Load CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = tfds.as_numpy(tfds.load(
    'cifar100',
    split=['train', 'test'],
    batch_size=-1, 
    as_supervised=True,
))

# Normalize the images to a 0-1 range
x_train, x_test = x_train / 255.0, x_test / 255.0

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, num_classes=100)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=100)

In [None]:
from sklearn.model_selection import train_test_split

# Split the original training data to create a new training set and a validation set
x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42)

In [None]:
#  Print the shapes of the splits to verify
print("Shape of new training images:", x_train_split.shape)
print("Shape of new training labels:", y_train_split.shape)
print("Shape of validation images:", x_val_split.shape)
print("Shape of validation labels:", y_val_split.shape)


# Check the shapes of testing data after preprocessing
print("Shape of test images:", x_test.shape)
print("Shape of test labels:", y_test.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, \
Activation, Add, AveragePooling2D, Flatten, Dense
from tensorflow.keras.models import Model

def resnet_block(input_data, filters, conv_size, activation_func):
    x = Conv2D(filters, conv_size, padding='same')(input_data)
    x = BatchNormalization()(x)
    x = Activation(activation_func)(x)

    x = Conv2D(filters, conv_size, padding='same')(x)
    x = BatchNormalization()(x)

    # Adding the input data to the output of the block (Skip Connection)
    x = Add()([x, input_data])

    x = Activation(activation_func)(x)
    return x

def build_resnet20(input_shape, num_classes, activation_func):
    inputs = Input(shape=input_shape)

    # Initial Conv Layer
    x = Conv2D(16, (3, 3), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation_func)(x)

    # ResNet Blocks
    for _ in range(3):
        x = resnet_block(x, 16, (3, 3), activation_func)

    # Transition Layer
    x = Conv2D(32, (3, 3), padding='same', strides=(2, 2))(x)
    x = BatchNormalization()(x)
    x = Activation(activation_func)(x)

    for _ in range(3):
        x = resnet_block(x, 32, (3, 3), activation_func)

    # Transition Layer
    x = Conv2D(64, (3, 3), padding='same', strides=(2, 2))(x)
    x = BatchNormalization()(x)
    x = Activation(activation_func)(x)

    for _ in range(3):
        x = resnet_block(x, 64, (3, 3), activation_func)

    # Average Pooling
    x = AveragePooling2D(pool_size=(8, 8))(x)
    x = Flatten()(x)

    # Output Layer
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Custom activation function
def custom_activation(x):
    # Define your custom activation logic here
    return tf.nn.relu(x)  # Example: using ReLU as a placeholder

# Building the model with the custom activation function
input_shape = (32, 32, 3)  # Change based on your dataset
num_classes = 100  # Change based on your dataset

model = build_resnet20(input_shape, num_classes, custom_activation)
model.summary()

In [None]:
def train_model(activation_func, x_train, y_train, x_val, y_val, batch_size, learning_rate, name):
    # Build the model
    model = build_resnet20(input_shape=x_train.shape[1:], num_classes=y_train.shape[1], activation_func=activation_func)

    # Compile the model with specified learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                  metrics=[
                  'accuracy',
                  tf.keras.metrics.Precision(name='precision'),
                  tf.keras.metrics.Recall(name='recall'),
                  tf.keras.metrics.AUC(name='auc')
              ])
    
        # Define the checkpoint callback
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        f'{name}.keras', # Path where to save the model
        save_best_only=True, # Only save a model if `val_loss` has improved
        save_weights_only = False,
        monitor='val_loss', # Monitor the validation loss
        mode='min', # The lower the validation loss, the better the model
        verbose=1 # Log a message when a better model is found
    )


    # Train the model with specified batch size
    history = model.fit(x_train, y_train, epochs=60, batch_size=batch_size,
                        validation_data=(x_val, y_val), verbose=1, callbacks = [checkpoint_cb])

    return history, model



# Parameters
batch_size = 32
learning_rate = 0.005

# Activation functions to try
activation_functions = [tf.nn.relu, tf.nn.sigmoid, tf.nn.tanh]
names = ["relu", "sigmoid", "tanh"]
histories = {}

# Train and evaluate the model with each activation function
print(f"Training with Relu activation function")
history_relu, model_relu = train_model(tf.nn.relu, x_train_split, y_train_split,
                          x_val_split, y_val_split, batch_size, learning_rate, "relu")
histories["relu"] = history_relu

print(f"\n\n Training with Sigmoid activation function")
history_sigmoid, model_sigmoid = train_model( tf.nn.sigmoid, x_train_split, y_train_split,
                          x_val_split, y_val_split, batch_size, learning_rate, "sigmoid")
histories["sigmoid"] = history_sigmoid


print(f"\n\n Training with Tanh activation function")
history_tanh, model_tanh = train_model(tf.nn.tanh, x_train_split, y_train_split,
                          x_val_split, y_val_split, batch_size, learning_rate, "tanh")
histories["tanh"] = history_tanh

In [None]:
import tensorflow as tf


class SmoothTransitionReLU(tf.keras.layers.Layer):
    def __init__(self, initial_slope, final_slope, steepness=10, **kwargs):
        super(SmoothTransitionReLU, self).__init__(**kwargs)
        self.initial_slope = initial_slope
        self.final_slope = final_slope
        self.steepness = steepness
        # Internal counter to track the relative progress of training
        self.progress = tf.Variable(0.0, trainable=False, dtype=tf.float32)

    def call(self, inputs, training=None):
        if training:
            # Increment the progress during training (you might need to adjust how this increments based on your training regime)
            self.progress.assign_add(0.01)  # Increment by a small value on each call

        # Calculate the current slope based on the sigmoid function
        x = self.progress
        current_slope = self.initial_slope + (self.final_slope - self.initial_slope) / (1 + tf.exp(-self.steepness * (x - 0.5)))

        # Apply the dynamic slope to the positive part of the inputs
        positive_part = tf.maximum(0.0, inputs) * current_slope
        # For negative inputs, just pass them through or adjust as needed
        negative_part = tf.minimum(0.0, inputs)

        return positive_part + negative_part

    def get_config(self):
        config = super(SmoothTransitionReLU, self).get_config()
        config.update({
            "initial_slope": self.initial_slope,
            "final_slope": self.final_slope,
            "steepness": self.steepness
        })
        return config

In [None]:
def train_model_with_custom_activation(x_train, y_train, x_val, y_val, batch_size, learning_rate,
                                       initial_slope, target_slope, total_epochs):
    # Initialize the custom activation function with provided parameters
    custom_activation = SmoothTransitionReLU(initial_slope=initial_slope, final_slope=target_slope)

    # Build the model using the custom activation function
    model = build_resnet20(input_shape=x_train.shape[1:], num_classes=y_train.shape[1],
                        activation_func=custom_activation)

    # Define the checkpoint callback
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        'dynamic_relu_model.keras',
        save_best_only=True,
        monitor='val_loss',
        mode='min',
        verbose=1
    )

    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', 
                  metrics=['accuracy', 'precision', 'recall', 'auc'])

    # Train the model
    history = model.fit(x_train, y_train, epochs=total_epochs, batch_size=batch_size,
                        validation_data=(x_val, y_val), verbose=1,
                        callbacks=[checkpoint_cb])

    return history, model


# Example parameters
batch_size = 32
learning_rate = 0.005
initial_slope = 1.732
target_slope = 0.557
rate = 0.01
total_epochs = 60

# Train the model
history_custom, model_custom = train_model_with_custom_activation(
    x_train_split, y_train_split, x_val_split, y_val_split, batch_size, learning_rate,
    initial_slope, target_slope, total_epochs
)

histories["custom"] = history_custom

In [None]:
# Function to calculate F1 scores from precision and recall
def calculate_f1_scores(precision, recall):
    return 2 * (np.array(precision) * np.array(recall)) / (np.array(precision) + np.array(recall))

def print_info(history, model_name):
    history = history.history
    
    print("*" * 50)
    print(f"\n{model_name} Results:")
    print("*" * 50)
    print("\n")

    # Assuming history['loss'], history['val_loss'], etc., exist
    training_loss = history['loss']
    validation_loss = history['val_loss']
    training_accuracy = history['accuracy']
    validation_accuracy = history['val_accuracy']
    training_auc = history['auc']
    validation_auc = history['val_auc']
    training_precision = history['precision']
    validation_precision = history['val_precision']
    training_recall = history['recall']
    validation_recall = history['val_recall']
    


    # Calculate F1 scores based on available precision and recall in history
    training_f1 = calculate_f1_scores(history['precision'], history['recall'])
    validation_f1 = calculate_f1_scores(history['val_precision'], history['val_recall'])


    top_3_dict[model_name] = {
        "training_loss": sorted(training_loss)[:3],
        "validation_loss": sorted(validation_loss)[:3],
        "training_accuracy": sorted(training_accuracy, reverse=True)[:3],
        "validation_accuracy": sorted(validation_accuracy, reverse=True)[:3],
        "training_auc": sorted(training_auc, reverse=True)[:3],
        "validation_auc": sorted(validation_auc, reverse=True)[:3], 
        "training_precision": sorted(training_precision, reverse=True)[:3],
        "validation_precision": sorted(validation_precision, reverse=True)[:3],
        "training_recall": sorted(training_recall, reverse=True)[:3],
        "validation_recall": sorted(validation_recall, reverse=True)[:3], 
        "training_f1": sorted(training_f1, reverse=True)[:3],
        "validation_f1": sorted(validation_f1, reverse=True)[:3]

    }

    # Print Top 3 Lowest Losses
    print("Top 3 Lowest Training Losses:", sorted(training_loss)[:3])
    print("Top 3 Lowest Validation Losses:", sorted(validation_loss)[:3])

    # Print Top 3 Highest Accuracies
    print("Top 3 Highest Training Accuracies:", sorted(training_accuracy, reverse=True)[:3])
    print("Top 3 Highest Validation Accuracies:", sorted(validation_accuracy, reverse=True)[:3])

    # Print Top 3 AUCs
    print("Top 3 Training AUCs:", sorted(training_auc, reverse=True)[:3])
    print("Top 3 Validation AUCs:", sorted(validation_auc, reverse=True)[:3])

    # Print Top 3 F1 Scores
    print("Top 3 Training F1 Scores:", sorted(training_f1, reverse=True)[:3])
    print("Top 3 Validation F1 Scores:", sorted(validation_f1, reverse=True)[:3])

    # Print Top 3 Precision
    print("Top 3 Training Precision:", sorted(training_precision, reverse=True)[:3])
    print("Top 3 Validation Precision:", sorted(validation_precision, reverse=True)[:3])

    # Print Top 3 Recall
    print("Top 3 Training Recall:", sorted(training_recall, reverse=True)[:3])
    print("Top 3 Validation Recall:", sorted(validation_recall, reverse=True)[:3])

## The Mish Activation

In [None]:
def mish(x):
    return x * tf.math.tanh(tf.math.softplus(x))

In [None]:
history_mish, model_mish = train_model(mish, x_train_split, y_train_split,
                          x_val_split, y_val_split, batch_size, learning_rate, "mish")
histories["mish"] = history

In [None]:
print_info(history_relu, "RELU")
print_info(history_sigmoid, "SIGMOID")
print_info(history_tanh, "TANH")
print_info(history_mish, "MISH")

In [None]:
history_Save = {
    "relu" : histories["relu"].history, 
    "sigmoid" : histories["sigmoid"].history, 
    "tanh" : histories["tanh"].history, 
    "custom" : histories["custom"].history, 
    "mish" : histories["mish"].history}
    

In [None]:
# Dumping the dictionary into a pickle file
file_path = "history.pkl"
with open(file_path, 'wb') as file:
    pickle.dump(history_Save, file)

file_path

## Testing and evaluation

In [None]:
model_paths = [
    "/kaggle/working/relu.keras", 
    "/kaggle/working/tanh.keras", 
    "/kaggle/working/sigmoid.keras", 
    "/kaggle/working/mish.keras", 
    "/kaggle/working/custom_relu.keras"
]

model_path = "/kaggle/input/models/relu.keras"