In [1]:
!pip install git+https://github.com/EmotionEngineer/actix.git -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for actix (pyproject.toml) ... [?25l[?25hdone


In [2]:
# --- 1. Import Necessary Libraries ---
import tensorflow as tf
from tensorflow.keras.layers import (Layer, Dense, BatchNormalization, Activation, Input, 
                                     Conv2D, MaxPooling2D, Flatten)
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd
import time
import sys

# Suppress warnings for a cleaner output
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=FutureWarning, module='tensorflow')

2025-06-12 09:18:36.513338: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749719916.708991      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749719916.771881      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# Define the List of Activation Functions to Test ---
best_of_last_run = [
    'ParametricLogish', 
    'gelu', 
    'swish', 
    'OptimA', 
    'relu', 
    'WeibullSoftplusActivation', 
    'A_ELuC', 
    'mish', 
    'AdaptiveErfSwish'
]

# New functions to add to the test
new_to_test = [
    'A_STReLU', 
    'ATanSigU', 
    'SwishLogTanh', 
    'ArcGaLU', 
    'ParametricHyperbolicQuadraticActivation', 
    'RootSoftplus',
    'AdaptiveSinusoidalSoftgate', 
    'ExpTanhGatedActivation', 
    'HybridSinExpUnit',
    'ParametricLogarithmicSwish', 
    'AdaptiveCubicSigmoid', 
    'SmoothedAbsoluteGatedUnit',
    'GaussianTanhHarmonicUnit', 
    'SymmetricParametricRationalSigmoid', 
    'AdaptivePolynomialSwish',
    'LogSigmoidGatedElu', 
    'AdaptiveBipolarExponentialUnit', 
    'ParametricHyperGaussianGate',
    'TanhGatedArcsinhLinearUnit', 
    'ParametricOddPowerSwish', 
    'AdaptiveLinearLogTanh'
]

# Combine the lists for the current experiment
activations_to_test = best_of_last_run + new_to_test

In [4]:
# --- 2. Import and Setup Activations from Actix ---
try:
    import actix
except ImportError:
    print("Error: 'actix' library is not installed.")
    print("Please install it using the command: pip install git+https://github.com/EmotionEngineer/actix.git")
    sys.exit(1)

# Dynamically create the activation map from the actix library
CUSTOM_ACTIVATIONS_MAP = {}
print("Loading activation functions from actix...")
for name in activations_to_test:
    try:
        activation_class = getattr(actix, name)
        CUSTOM_ACTIVATIONS_MAP[name] = activation_class
    except AttributeError:
        print(f"  - WARNING: Activation function '{name}' not found in the actix library and will be skipped.")
print("Loading complete.\n")

Loading activation functions from actix...
Loading complete.



In [5]:
# --- 3. Constants and Experiment Configuration ---
NUM_SEEDS = 3        # Reduced for faster execution, as CIFAR-10 training takes longer
EPOCHS = 150
PATIENCE = 20        # More aggressive early stopping
BATCH_SIZE = 64      # Increased for more stable training on images
LEARNING_RATE = 1e-3 # A standard learning rate for Adam

In [6]:
# --- 4. Layer Naming Utilities ---
_layer_name_counters = {}

def _get_unique_layer_name(base_name_key):
    if base_name_key not in _layer_name_counters:
        _layer_name_counters[base_name_key] = 0
    _layer_name_counters[base_name_key] += 1
    return f"{base_name_key}_{_layer_name_counters[base_name_key]}"

def _reset_layer_name_counters():
    global _layer_name_counters
    _layer_name_counters = {}

def _add_activation_layer(model, activation_name_str):
    unique_name = _get_unique_layer_name(activation_name_str.lower().replace(" ", "_").replace("-","_"))
    if activation_name_str in CUSTOM_ACTIVATIONS_MAP:
        # Use a custom activation from actix
        model.add(CUSTOM_ACTIVATIONS_MAP[activation_name_str](name=unique_name))
    else:
        # Use a standard TensorFlow activation
        model.add(Activation(activation_name_str, name=unique_name))

In [7]:
# --- 5. Model Definition for Classification Task (CNN) ---
def create_cifar10_cnn_model(input_shape, activation_name_str, num_classes=10):
    model_name_prefix = activation_name_str.replace(' ','_').replace("-","_").lower()
    model = Sequential(name=f"cifar10_cnn_model_{model_name_prefix}")

    model.add(Input(shape=input_shape, name=_get_unique_layer_name("input_layer")))

    # Block 1
    model.add(Conv2D(32, (3, 3), padding='same', name=_get_unique_layer_name("conv2d")))
    _add_activation_layer(model, activation_name_str)
    model.add(BatchNormalization(name=_get_unique_layer_name("bn")))
    model.add(MaxPooling2D(pool_size=(2, 2), name=_get_unique_layer_name("maxpool")))

    # Block 2
    model.add(Conv2D(64, (3, 3), padding='same', name=_get_unique_layer_name("conv2d")))
    _add_activation_layer(model, activation_name_str)
    model.add(BatchNormalization(name=_get_unique_layer_name("bn")))
    model.add(MaxPooling2D(pool_size=(2, 2), name=_get_unique_layer_name("maxpool")))
    
    # Block 3
    model.add(Conv2D(128, (3, 3), padding='same', name=_get_unique_layer_name("conv2d")))
    _add_activation_layer(model, activation_name_str)
    model.add(BatchNormalization(name=_get_unique_layer_name("bn")))

    # Classifier Head
    model.add(Flatten(name=_get_unique_layer_name("flatten")))
    model.add(Dense(128, name=_get_unique_layer_name("dense")))
    _add_activation_layer(model, activation_name_str)
    model.add(BatchNormalization(name=_get_unique_layer_name("bn")))
    
    model.add(Dense(num_classes, activation='softmax', name=_get_unique_layer_name("output_dense")))

    optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy', # Loss function for multi-class classification
                  metrics=['accuracy'])            # Primary metric is accuracy
    return model

In [8]:
# --- 6. Data Loading and Preprocessing for CIFAR-10 ---
def load_and_preprocess_cifar10():
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    # Normalize images: scale pixel values to the [0, 1] range
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    # Convert labels to one-hot encoding
    num_classes = 10
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    return (x_train, y_train), (x_test, y_test)

In [9]:
# --- 7. Main Training and Evaluation Loop ---
if __name__ == '__main__':
    print(f"TensorFlow Version: {tf.__version__}")
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print(f"Num GPUs Available: {len(gpus)}, Memory growth enabled.")
            print("TensorFlow is using GPU.")
        except RuntimeError as e:
            print(e)
    else:
        print("TensorFlow is using CPU.")

    dataset_name = "CIFAR-10"
    (x_train, y_train), (x_test, y_test) = load_and_preprocess_cifar10()
    input_shape = x_train.shape[1:]
    num_classes = y_train.shape[1]
    
    all_results_list = []

    print(f"\n\n--- Benchmarking on Dataset: {dataset_name} ---")
    print(f"--- Activations to test ({len(activations_to_test)} total): {activations_to_test} ---")

    for act_name in activations_to_test:
        print(f"\n  --- Testing Activation: {act_name} ---")
        losses_for_activation = []
        accuracies_for_activation = []
        
        # Pre-check model build
        try:
            tf.keras.backend.clear_session()
            _reset_layer_name_counters()
            _ = create_cifar10_cnn_model(input_shape, act_name, num_classes)
        except Exception as e:
            print(f"    ERROR: Could not build model with {act_name}. Error: {e}")
            result_entry = {
                'activation': act_name,
                'mean_loss': np.nan, 'std_loss': np.nan,
                'mean_accuracy': np.nan, 'std_accuracy': np.nan, 'best_accuracy': np.nan,
                'notes': f'Failed to build model: {e}'
            }
            all_results_list.append(result_entry)
            continue

        for i in range(NUM_SEEDS):
            seed = 42 + i
            print(f"    Seed {i+1}/{NUM_SEEDS} (Actual seed: {seed}) for {act_name}")
            tf.keras.utils.set_random_seed(seed)
            tf.keras.backend.clear_session()
            _reset_layer_name_counters()

            model = create_cifar10_cnn_model(input_shape, act_name, num_classes)
            
            callbacks = [
                EarlyStopping(monitor='val_accuracy', patience=PATIENCE, restore_best_weights=True, mode='max', verbose=0),
            ]
            
            try:
                history = model.fit(x_train, y_train,
                                    batch_size=BATCH_SIZE,
                                    epochs=EPOCHS,
                                    validation_data=(x_test, y_test),
                                    callbacks=callbacks,
                                    verbose=0) # verbose=0 for a cleaner log
                
                # evaluate returns [loss, accuracy] as per compiled metrics
                eval_results = model.evaluate(x_test, y_test, verbose=0)
                loss, accuracy = eval_results[0], eval_results[1]
                
                losses_for_activation.append(loss)
                accuracies_for_activation.append(accuracy)
                print(f"      Seed {i+1} Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

            except Exception as e:
                print(f"      ERROR during training/evaluation for {act_name} with seed {seed}: {e}")
                losses_for_activation.append(np.nan)
                accuracies_for_activation.append(np.nan)

        # Calculate final statistics across all seeds
        if any(not np.isnan(l) for l in losses_for_activation):
            mean_loss = np.nanmean(losses_for_activation)
            std_loss = np.nanstd(losses_for_activation)
            mean_accuracy = np.nanmean(accuracies_for_activation)
            std_accuracy = np.nanstd(accuracies_for_activation)
            best_accuracy = np.nanmax(accuracies_for_activation)

            result_entry = {
                'activation': act_name,
                'mean_loss': mean_loss, 'std_loss': std_loss,
                'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy, 'best_accuracy': best_accuracy,
                'notes': ''
            }
            print(f"    Results for {act_name}: Mean Loss: {mean_loss:.4f}, Mean Accuracy: {mean_accuracy:.4f}")
        else:
            result_entry = {
                'activation': act_name,
                'mean_loss': np.nan, 'std_loss': np.nan,
                'mean_accuracy': np.nan, 'std_accuracy': np.nan, 'best_accuracy': np.nan,
                'notes': 'All runs failed'
            }
        all_results_list.append(result_entry)

    # --- 8. Output and Saving Results ---
    print("\n\n--- Final Comparison Results ---")
    results_df = pd.DataFrame(all_results_list)
    
    # Sort by mean accuracy (from best to worst)
    results_df = results_df.sort_values(by=['mean_accuracy'], ascending=False)
    
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)

    print(results_df[['activation', 'mean_accuracy', 'std_accuracy', 'best_accuracy', 'mean_loss', 'std_loss', 'notes']])

    # Save to CSV
    output_filename = "cifar10_activation_comparison_results.csv"
    results_df.to_csv(output_filename, index=False)
    print(f"\nResults saved to {output_filename}")

TensorFlow Version: 2.18.0
Num GPUs Available: 1, Memory growth enabled.
TensorFlow is using GPU.
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


--- Benchmarking on Dataset: CIFAR-10 ---
--- Activations to test (30 total): ['ParametricLogish', 'gelu', 'swish', 'OptimA', 'relu', 'WeibullSoftplusActivation', 'A_ELuC', 'mish', 'AdaptiveErfSwish', 'A_STReLU', 'ATanSigU', 'SwishLogTanh', 'ArcGaLU', 'ParametricHyperbolicQuadraticActivation', 'RootSoftplus', 'AdaptiveSinusoidalSoftgate', 'ExpTanhGatedActivation', 'HybridSinExpUnit', 'ParametricLogarithmicSwish', 'AdaptiveCubicSigmoid', 'SmoothedAbsoluteGatedUnit', 'GaussianTanhHarmonicUnit', 'SymmetricParametricRationalSigmoid', 'AdaptivePolynomialSwish', 'LogSigmoidGatedElu', 'AdaptiveBipolarExponentialUnit', 'ParametricHyperGaussianGate', 'TanhGatedArcsinhLinearUnit', 'ParametricOddPowerSwish', 'AdaptiveLinearLogTanh'] ---

I0000 00:00:1749719940.867173      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


    Seed 1/3 (Actual seed: 42) for ParametricLogish


I0000 00:00:1749719949.963132      95 service.cc:148] XLA service 0x7985fc018cb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1749719949.963829      95 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1749719950.498636      95 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1749719953.513899      95 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


      Seed 1 Loss: 2.0548, Accuracy: 0.7734
    Seed 2/3 (Actual seed: 43) for ParametricLogish
      Seed 2 Loss: 1.9503, Accuracy: 0.7709
    Seed 3/3 (Actual seed: 44) for ParametricLogish
      Seed 3 Loss: 2.2176, Accuracy: 0.7800
    Results for ParametricLogish: Mean Loss: 2.0742, Mean Accuracy: 0.7748

  --- Testing Activation: gelu ---
    Seed 1/3 (Actual seed: 42) for gelu
      Seed 1 Loss: 2.3705, Accuracy: 0.7718
    Seed 2/3 (Actual seed: 43) for gelu
      Seed 2 Loss: 1.7991, Accuracy: 0.7526
    Seed 3/3 (Actual seed: 44) for gelu
      Seed 3 Loss: 1.7475, Accuracy: 0.7576
    Results for gelu: Mean Loss: 1.9723, Mean Accuracy: 0.7607

  --- Testing Activation: swish ---
    Seed 1/3 (Actual seed: 42) for swish
      Seed 1 Loss: 2.3374, Accuracy: 0.7736
    Seed 2/3 (Actual seed: 43) for swish
      Seed 2 Loss: 2.1978, Accuracy: 0.7653
    Seed 3/3 (Actual seed: 44) for swish
      Seed 3 Loss: 2.2090, Accuracy: 0.7720
    Results for swish: Mean Loss: 2.2481, Mean