In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Input, Conv2D, MaxPooling2D, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess_input
from tensorflow.keras.datasets import cifar10, mnist
from tensorflow.keras.optimizers import Adam, SGD, AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd
import random
import time
import matplotlib.pyplot as plt
from scipy.ndimage import rotate
import os
import gc

# --- Configuration ---
RAND_SEED = 42
NUM_RUNS = 1
EPOCHS = 50
PATIENCE = 10
BATCH_SIZE = 16
ROTATION_ANGLE_MAX = 90
LEARNING_RATE = 1e-5 # For fine-tuning ResNet

# UTKFace specific
UTKFACE_DIR = '/kaggle/input/utkface-new/UTKFace'
IMAGE_SIZE_RESNET = (224, 224)

def set_seed(seed=42):
    """Sets random seeds for reproducibility."""
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    # For full determinism, uncomment below (can slow down execution)
    # tf.config.threading.set_inter_op_parallelism_threads(1)
    # tf.config.threading.set_intra_op_parallelism_threads(1)

set_seed(RAND_SEED)

2025-05-08 08:01:20.454667: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746691280.650969      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746691280.707946      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# --- Custom Activation Functions ---
class OptimA(Layer):
    """Custom Optimal Activation function."""
    def __init__(self, **kwargs):
        super(OptimA, self).__init__(**kwargs)

    def build(self, input_shape):
        self.alpha = self.add_weight(name='alpha', shape=(), initializer='ones', trainable=True)
        self.beta = self.add_weight(name='beta', shape=(), initializer=tf.keras.initializers.Constant(0.5), trainable=True)
        self.gamma = self.add_weight(name='gamma', shape=(), initializer='ones', trainable=True)
        self.delta = self.add_weight(name='delta', shape=(), initializer=tf.keras.initializers.Constant(0.5), trainable=True)
        self.lambda_ = self.add_weight(name='lambda', shape=(), initializer='ones', trainable=True)
        super(OptimA, self).build(input_shape)

    def call(self, x):
        term1 = self.alpha * tf.math.tanh(self.beta * x)
        term2 = self.gamma * tf.math.softplus(self.delta * x) * tf.math.sigmoid(self.lambda_ * x)
        return term1 + term2

    def get_config(self):
        config = super(OptimA, self).get_config()
        return config

class OptimALinear(Layer):
    """Custom Optimal Activation function (Linear Approximation)."""
    def __init__(self, epsilon=1e-5, **kwargs):
        super(OptimALinear, self).__init__(**kwargs)
        self.epsilon = epsilon

    def build(self, input_shape):
        self.alpha = self.add_weight(name='alpha', shape=(), initializer='ones', trainable=True)
        self.beta = self.add_weight(name='beta', shape=(), initializer=tf.keras.initializers.Constant(0.5), trainable=True)
        self.gamma = self.add_weight(name='gamma', shape=(), initializer='ones', trainable=True)
        self.delta = self.add_weight(name='delta', shape=(), initializer=tf.keras.initializers.Constant(0.5), trainable=True)
        self.lambda_ = self.add_weight(name='lambda', shape=(), initializer='ones', trainable=True)
        super(OptimALinear, self).build(input_shape)

    def call(self, x):
        term1 = self.alpha * tf.clip_by_value(self.beta * x, -1, 1)
        term2 = self.gamma * (tf.maximum(0.0, self.delta * x) + self.epsilon) * (0.5 + 0.25 * self.lambda_ * x)
        return term1 + term2

    def get_config(self):
        config = super(OptimALinear, self).get_config()
        config.update({'epsilon': self.epsilon})
        return config

# --- Data Loading and Preprocessing ---
def load_and_prepare_image_classification_data(load_func, dataset_name):
    """Loads and prepares standard image classification datasets (MNIST, CIFAR-10)."""
    (x_train, y_train), (x_test, y_test) = load_func()
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    if dataset_name == "MNIST" or dataset_name == "RotatedMNIST":
        x_train = np.expand_dims(x_train, -1)
        x_test = np.expand_dims(x_test, -1)
    elif dataset_name == "CIFAR-10": # Ensure 3 channels for CIFAR-10
        if x_train.shape[-1] == 1:
            x_train = tf.image.grayscale_to_rgb(tf.convert_to_tensor(x_train)).numpy()
            x_test = tf.image.grayscale_to_rgb(tf.convert_to_tensor(x_test)).numpy()

    num_classes = len(np.unique(np.concatenate((y_train.flatten(), y_test.flatten()))))
    y_train = to_categorical(y_train, num_classes=num_classes)
    y_test = to_categorical(y_test, num_classes=num_classes)
    return x_train, y_train, x_test, y_test, num_classes

def load_and_prepare_rotated_mnist(max_angle=ROTATION_ANGLE_MAX, seed=None):
    """Loads MNIST, rotates images, and sets the angle as the regression target."""
    if seed is not None: np.random.seed(seed)
    (x_train, _), (x_test, _) = mnist.load_data()

    train_angles = np.random.uniform(-max_angle, max_angle, size=len(x_train))
    y_train_reg = train_angles / max_angle
    x_train_rot = np.array([rotate(img, angle, reshape=False, mode='nearest', order=1) for img, angle in zip(x_train, train_angles)])

    test_angles = np.random.uniform(-max_angle, max_angle, size=len(x_test))
    y_test_reg = test_angles / max_angle
    x_test_rot = np.array([rotate(img, angle, reshape=False, mode='nearest', order=1) for img, angle in zip(x_test, test_angles)])

    x_train_rot = x_train_rot.astype('float32') / 255.0
    x_test_rot = x_test_rot.astype('float32') / 255.0
    x_train_rot = np.expand_dims(x_train_rot, -1)
    x_test_rot = np.expand_dims(x_test_rot, -1)
    return x_train_rot, y_train_reg, x_test_rot, y_test_reg

def parse_utkface_metadata(data_dir, seed=None):
    """Parses UTKFace filenames to extract image paths and ages."""
    print(f"Parsing UTKFace metadata from: {data_dir}")
    if not os.path.isdir(data_dir):
        raise FileNotFoundError(f"UTKFace directory not found or is not a directory: {data_dir}")

    image_paths, ages = [], []
    corrupted_files = 0
    filenames = os.listdir(data_dir)
    if not filenames: raise ValueError(f"No files found in UTKFace directory: {data_dir}")

    for filename in filenames:
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                age = int(filename.split('_')[0])
                if 1 <= age <= 116: # UTKFace typical age range
                    image_paths.append(os.path.join(data_dir, filename))
                    ages.append(float(age))
                else: corrupted_files += 1
            except (IndexError, ValueError): corrupted_files += 1
    
    if not image_paths:
        raise ValueError(f"No valid image files parsed in {data_dir}. Found {len(filenames)} files, skipped {corrupted_files}.")
    print(f"Parsed {len(image_paths)} image paths. Skipped {corrupted_files} files.")

    if seed is not None: random.seed(seed) # Seed for shuffling
    combined = list(zip(image_paths, ages))
    random.shuffle(combined)
    if combined: image_paths[:], ages[:] = zip(*combined) # Unpack shuffled data
    else: image_paths, ages = [], [] # Handle empty case after filtering
    return image_paths, ages

def load_and_preprocess_image_tf(path, label, target_size):
    """Loads and preprocesses a single image for a tf.data.Dataset pipeline."""
    try:
        image_string = tf.io.read_file(path)
        image = tf.image.decode_image(image_string, channels=3, expand_animations=False) # Ensure 3 channels
        image = tf.image.resize(image, target_size)
        image.set_shape([target_size[0], target_size[1], 3]) # Explicitly set shape
        image = resnet_preprocess_input(image) # Apply ResNet specific preprocessing
        return image, label
    except Exception as e:
        # tf.print(f"Error processing image {path}: {e}. Returning zeros.") # Use tf.print for TF graph mode
        # Return a "dummy" image and label if processing fails
        return tf.zeros((*target_size, 3), dtype=tf.float32), tf.cast(label, dtype=tf.float32)

def create_tf_dataset(image_paths, labels, batch_size_local, target_size, is_training=True, buffer_size_shuffle=1000):
    """Creates a tf.data.Dataset from image paths and labels for efficient data loading."""
    if not image_paths: return None
    dataset = tf.data.Dataset.from_tensor_slices((list(image_paths), list(labels)))

    if is_training:
        dataset = dataset.shuffle(buffer_size=min(len(image_paths), buffer_size_shuffle), reshuffle_each_iteration=True)

    dataset = dataset.map(lambda p, l: load_and_preprocess_image_tf(p, l, target_size),
                          num_parallel_calls=tf.data.AUTOTUNE) # Parallelize image loading
    
    # Optional: filter out images that failed to load (if load_and_preprocess_image_tf returns a distinguishable dummy)
    # dataset = dataset.filter(lambda image, label: not tf.reduce_all(tf.equal(image, tf.zeros_like(image))))

    dataset = dataset.batch(batch_size_local)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE) # Prefetch batches for performance
    return dataset

# --- Helper to get activation layer ---
def get_activation(act_config):
    """Instantiates or returns an activation layer/name."""
    if isinstance(act_config, str): return tf.keras.layers.Activation(act_config)
    elif isinstance(act_config, Layer): # If it's an already instantiated custom layer
        if type(act_config) == OptimA: return OptimA() # Create new instance
        if type(act_config) == OptimALinear: return OptimALinear() # Create new instance
        return act_config # Fallback: reuse instance (less ideal)
    elif isinstance(act_config, type) and issubclass(act_config, Layer): return act_config() # Instantiate class type
    else: raise ValueError(f"Unsupported activation: {act_config}")

# --- Model Building ---
def build_cnn_model(input_shape, output_units, activation_func_config, task_type):
    """Builds a simple CNN model."""
    if isinstance(activation_func_config, str): activation_name = activation_func_config
    elif hasattr(activation_func_config, '__name__'): activation_name = activation_func_config.__name__
    else: activation_name = activation_func_config.__class__.__name__

    model = Sequential(name=f"SimpleCNN_{activation_name}_{task_type}")
    model.add(Input(shape=input_shape))
    model.add(Conv2D(32, (3,3), padding='same')); model.add(get_activation(activation_func_config)); model.add(MaxPooling2D((2,2))); model.add(Dropout(0.2))
    model.add(Conv2D(64, (3,3), padding='same')); model.add(get_activation(activation_func_config)); model.add(MaxPooling2D((2,2))); model.add(Dropout(0.2))
    model.add(Flatten()); model.add(Dense(128)); model.add(get_activation(activation_func_config)); model.add(Dropout(0.3))
    if task_type == "classification": model.add(Dense(output_units, activation='softmax'))
    elif task_type == "regression": model.add(Dense(output_units, activation='linear'))
    else: raise ValueError(f"Unsupported task: {task_type}")
    return model

def build_resnet50_regression_model(input_shape, activation_func_config):
    """Builds a ResNet-50 based model for regression with a custom activation in the head."""
    if isinstance(activation_func_config, str): activation_name = activation_func_config
    elif hasattr(activation_func_config, '__name__'): activation_name = activation_func_config.__name__
    else: activation_name = activation_func_config.__class__.__name__

    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    base_model.trainable = True # Fine-tune all layers of ResNet

    x = base_model.output
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, name='fc1')(x)
    x = get_activation(activation_func_config)(x) # Apply custom/standard activation
    x = Dropout(0.3)(x)
    output_tensor = Dense(1, activation='linear', name='age_output')(x) # Linear output for regression
    model = Model(inputs=base_model.input, outputs=output_tensor, name=f"ResNet50_{activation_name}_Reg")
    return model

# --- Experiment Execution ---
def run_experiment(datasets_config, activations_config, optimizers_config, num_runs, epochs_local, batch_size_local, patience_local):
    """Runs the benchmarking experiments across datasets, activations, and optimizers."""
    results = {}
    histories = {}

    for run_idx in range(num_runs):
        print(f"\n--- Starting Run {run_idx + 1}/{num_runs} ---")
        current_seed = RAND_SEED + run_idx
        set_seed(current_seed)

        for dataset_name, dataset_cfg in datasets_config.items():
            print(f"\nDataset: {dataset_name} (Task: {dataset_cfg['task']})")
            if dataset_name not in results:
                results[dataset_name], histories[dataset_name] = {}, {}

            task_type, model_arch = dataset_cfg['task'], dataset_cfg.get('model_type', 'cnn')
            # Initialize data-related variables
            train_src, val_src, test_eval_src = None, None, None
            steps_epoch, val_steps = None, None
            input_s, output_u = None, None
            loss_f, eval_m, primary_m_name, primary_m_idx = None, None, None, None

            # Load and prepare data based on dataset type and task
            if dataset_cfg["type"] == "image" and task_type == "classification":
                x_tr, y_tr, x_te, y_te, n_classes = load_and_prepare_image_classification_data(dataset_cfg["load_func"], dataset_name)
                input_s, output_u = x_tr.shape[1:], n_classes
                loss_f, eval_m, primary_m_name, primary_m_idx = 'categorical_crossentropy', ['accuracy'], 'accuracy', 1
                train_src, test_eval_src = (x_tr, y_tr), (x_te, y_te) # Use NumPy arrays directly
            elif dataset_cfg["type"] == "image" and task_type == "regression":
                if dataset_name == "UTKFaceAge":
                    paths, labels = parse_utkface_metadata(UTKFACE_DIR, current_seed)
                    if not paths: print(f"Skipping UTKFaceAge for run {run_idx+1}, no data parsed."); continue
                    
                    # Split data (e.g., 70% train, 15% val, 15% test)
                    n_total = len(paths); n_tr = int(n_total * 0.7); n_v = int(n_total * 0.15)
                    tr_p, tr_l = paths[:n_tr], labels[:n_tr]
                    v_p, v_l = paths[n_tr:n_tr+n_v], labels[n_tr:n_tr+n_v]
                    te_p, te_l = paths[n_tr+n_v:], labels[n_tr+n_v:]
                    print(f"UTKFace splits: Train: {len(tr_p)}, Validation: {len(v_p)}, Test: {len(te_p)}")
                    if not tr_p or not v_p or not te_p: print(f"UTKFace data split resulted in an empty set. Skipping."); continue

                    # Create tf.data.Dataset for efficient loading
                    train_src = create_tf_dataset(tr_p, tr_l, batch_size_local, IMAGE_SIZE_RESNET, is_training=True)
                    val_src = create_tf_dataset(v_p, v_l, batch_size_local, IMAGE_SIZE_RESNET, is_training=False)
                    test_eval_src = create_tf_dataset(te_p, te_l, batch_size_local, IMAGE_SIZE_RESNET, is_training=False)
                    if not train_src or not val_src or not test_eval_src: print("Failed to create UTKFace tf.data.Dataset. Skipping."); continue
                    
                    # Calculate steps for training and validation
                    steps_epoch = (len(tr_p) + batch_size_local -1) // batch_size_local # Ceiling division
                    val_steps = (len(v_p) + batch_size_local -1) // batch_size_local   # Ceiling division
                    input_s = (*IMAGE_SIZE_RESNET, 3)
                elif dataset_name == "RotatedMNIST":
                    x_tr_rot, y_tr_reg, x_te_rot, y_te_reg = dataset_cfg["load_func"](seed=current_seed)
                    if x_tr_rot.ndim == 3: x_tr_rot,x_te_rot = np.expand_dims(x_tr_rot,-1), np.expand_dims(x_te_rot,-1) # Add channel dim
                    input_s = x_tr_rot.shape[1:]
                    train_src, test_eval_src = (x_tr_rot, y_tr_reg), (x_te_rot, y_te_reg)
                
                output_u = 1 # Single output for regression
                loss_f, eval_m, primary_m_name, primary_m_idx = 'mse', ['mae'], 'mae', 1 # MSE loss, MAE metric
            else: raise ValueError(f"Unsupported dataset configuration: {dataset_name}")
            if input_s is None: print(f"Input shape not determined for {dataset_name}. Skipping this dataset iteration."); continue

            # Iterate over optimizers and activations
            for opt_name, opt_creator in optimizers_config.items():
                print(f"  Optimizer: {opt_name} (Run {run_idx + 1})")
                if opt_name not in results[dataset_name]: results[dataset_name][opt_name], histories[dataset_name][opt_name] = {}, {}
                for act_name, act_cfg_val in activations_config.items():
                    print(f"    Activation: {act_name}")
                    if act_name not in results[dataset_name][opt_name]:
                        results[dataset_name][opt_name][act_name] = {'loss': [], primary_m_name: [], 'time': [], 'params': []}
                        histories[dataset_name][opt_name][act_name] = []

                    tf.keras.backend.clear_session(); gc.collect(); set_seed(current_seed) # Reset state
                    
                    # Build model
                    model = build_resnet50_regression_model(input_s, act_cfg_val) if model_arch == 'resnet50' \
                            else build_cnn_model(input_s, output_u, act_cfg_val, task_type)
                    
                    opt_instance = opt_creator() # Create fresh optimizer instance
                    model.compile(optimizer=opt_instance, loss=loss_f, metrics=eval_m)
                    
                    # Define monitor metric for callbacks
                    mon_metric = 'val_mae' if primary_m_name == 'mae' else ('val_accuracy' if task_type == 'classification' else 'val_loss')
                    cbs = [EarlyStopping(monitor=mon_metric, patience=patience_local, restore_best_weights=True, verbose=1),
                           ReduceLROnPlateau(monitor=mon_metric, factor=0.3, patience=patience_local//2, min_lr=1e-7, verbose=1)]
                    
                    start_t = time.time()
                    # Prepare arguments for model.fit
                    fit_args = {"epochs": epochs_local, "callbacks": cbs, "verbose": 1}
                    if isinstance(train_src, tf.data.Dataset): # If using tf.data.Dataset
                        fit_args.update({"x": train_src, "validation_data": val_src})
                        if steps_epoch: fit_args["steps_per_epoch"] = steps_epoch
                        if val_steps: fit_args["validation_steps"] = val_steps
                    else: # If using NumPy arrays
                        fit_args.update({"x": train_src[0], "y": train_src[1], "batch_size": batch_size_local, "validation_split": 0.2})
                    
                    hist = model.fit(**fit_args) # Train the model
                    train_time = time.time() - start_t

                    print("Evaluating model...")
                    # Evaluate model
                    eval_res = model.evaluate(test_eval_src, verbose=0) if isinstance(test_eval_src, tf.data.Dataset) \
                               else model.evaluate(test_eval_src[0], test_eval_src[1], batch_size=batch_size_local, verbose=0)
                    
                    eval_l, eval_pm = eval_res[0], eval_res[primary_m_idx]
                    n_params = model.count_params()

                    # Store results
                    res_dict = results[dataset_name][opt_name][act_name]
                    res_dict['loss'].append(eval_l); res_dict[primary_m_name].append(eval_pm)
                    res_dict['time'].append(train_time); res_dict['params'].append(n_params)
                    histories[dataset_name][opt_name][act_name].append(hist.history)
                    print(f"      Loss: {eval_l:.4f}, {primary_m_name.capitalize()}: {eval_pm:.4f}, Time: {train_time:.2f}s, Params: {n_params}")

                    del model, opt_instance, hist; gc.collect() # Clean up
            
            # Clean up data after processing a dataset to free memory
            if dataset_cfg["type"] == "image" and task_type == "classification": del x_tr, y_tr, x_te, y_te
            elif dataset_name == "RotatedMNIST": del x_tr_rot, y_tr_reg, x_te_rot, y_te_reg
            del train_src, val_src, test_eval_src; gc.collect()
    return results, histories

# --- Aggregate and Display Results ---
def aggregate_and_display_results(final_results, datasets_cfg_local, all_histories_local, num_runs_executed):
    """Aggregates results from multiple runs and displays them in a table and plots."""
    aggregated = {}
    best_mae = {}  # To store best MAE for each regression dataset
    
    # First pass: aggregate all data and identify best MAE for regression tasks
    for ds_name, opt_data in final_results.items():
        aggregated[ds_name] = {}
        task = datasets_cfg_local[ds_name]['task']
        metric_k = 'accuracy' if task == 'classification' else 'mae'
        
        if task == 'regression':
            best_mae[ds_name] = {'value': float('inf'), 'optimizer': None, 'activation': None}
        
        for opt_n, act_data in opt_data.items():
            aggregated[ds_name][opt_n] = {}
            for act_n, metrics_vals in act_data.items():
                if not metrics_vals['loss']: # Handle case where no runs completed for this combo
                    agg_data = {'Avg Loss': np.nan, 'Std Loss': np.nan, f'Avg {metric_k.capitalize()}': np.nan,
                                f'Std {metric_k.capitalize()}': np.std(metrics_vals[metric_k]) if metrics_vals[metric_k] else np.nan,
                                'Avg Time': np.nan, 'Std Time': np.nan, 'Avg Params': np.nan}
                else:
                    avg_mae = np.mean(metrics_vals['mae']) if 'mae' in metrics_vals else np.nan
                    agg_data = {'Avg Loss': np.mean(metrics_vals['loss']), 'Std Loss': np.std(metrics_vals['loss']),
                                f'Avg {metric_k.capitalize()}': avg_mae if metric_k == 'mae' else np.mean(metrics_vals[metric_k]),
                                f'Std {metric_k.capitalize()}': np.std(metrics_vals[metric_k]),
                                'Avg Time': np.mean(metrics_vals['time']), 'Std Time': np.std(metrics_vals['time']),
                                'Avg Params': int(np.mean(metrics_vals['params'])) if metrics_vals['params'] else np.nan}
                    
                    # Track best MAE for regression tasks
                    if task == 'regression' and 'mae' in metrics_vals and avg_mae < best_mae[ds_name]['value']:
                        best_mae[ds_name]['value'] = avg_mae
                        best_mae[ds_name]['optimizer'] = opt_n
                        best_mae[ds_name]['activation'] = act_n
                
                aggregated[ds_name][opt_n][act_n] = agg_data
    
    # Create DataFrame for results
    all_rows = []
    for ds, o_data in aggregated.items():
        task = datasets_cfg_local[ds]['task']
        for o, a_data in o_data.items():
            for a, m_vals in a_data.items():
                row = {'Dataset': ds, 'Optimizer': o, 'Activation': a}
                row.update(m_vals)
                
                # Add Best MAE marker for regression tasks
                if task == 'regression':
                    is_best = (best_mae.get(ds, {}).get('optimizer') == o and 
                              best_mae.get(ds, {}).get('activation') == a)
                    row['Best MAE'] = '★' if is_best else ''
                
                all_rows.append(row)

    df_res = pd.DataFrame(all_rows)
    if not df_res.empty:
        df_res = df_res.set_index(['Dataset', 'Optimizer', 'Activation'])
        # Define column order for display
        cols_order = ['Best MAE', 'Avg Accuracy', 'Std Accuracy', 'Avg Mae', 'Std Mae', 
                     'Avg Loss', 'Std Loss', 'Avg Time', 'Std Time', 'Avg Params']
        # Only keep columns that exist in the dataframe
        cols_order = [col for col in cols_order if col in df_res.columns]
        df_res = df_res.reindex(columns=cols_order).sort_index()
    
    print("\n--- Aggregated Benchmark Results ---")
    if best_mae:
        print("\nBest MAE for Regression Tasks:")
        for ds, info in best_mae.items():
            print(f"{ds}: {info['optimizer']} with {info['activation']} (MAE: {info['value']:.4f})")
    
    pd.options.display.float_format = '{:.4f}'.format
    print(df_res.dropna(axis=1, how='all') if not df_res.empty else "No results to display.")

In [3]:
# --- Main Execution ---
if __name__ == "__main__":
    # Define datasets to benchmark
    datasets_run_cfg = {
        # Example: Uncomment to run MNIST classification
        # "MNIST": {"load_func": mnist.load_data, "type": "image", "task": "classification", "model_type": "cnn"},
        # Example: Uncomment to run CIFAR-10 classification
        # "CIFAR-10": {"load_func": cifar10.load_data, "type": "image", "task": "classification", "model_type": "cnn"},
        # Example: Uncomment to run Rotated MNIST regression
        # "RotatedMNIST": {"load_func": load_and_prepare_rotated_mnist, "type": "image", "task": "regression", "model_type": "cnn"},
    }
    
    # Add UTKFaceAge if directory exists and is not empty
    if os.path.isdir(UTKFACE_DIR) and os.listdir(UTKFACE_DIR):
         datasets_run_cfg["UTKFaceAge"] = {
             "load_func": parse_utkface_metadata, # Function to parse paths/labels
             "type": "image", "task": "regression", "model_type": "resnet50"
         }
    else:
        print(f"WARNING: UTKFace directory '{UTKFACE_DIR}' not found or is empty. Skipping UTKFaceAge dataset.")
    
    if not datasets_run_cfg: print("No datasets configured to run. Exiting."); exit()

    # Define activation functions to compare
    activations_run_cfg = {'OptimA': OptimA, 'OptimALinear': OptimALinear, 
                           'ReLU': 'relu', 'ELU': 'elu', 'Swish': 'swish', 'GeLU': 'gelu'}
    # Define optimizers to compare (using lambdas for fresh instances)
    optimizers_run_cfg = {
                          'AdamW': lambda: AdamW(learning_rate=LEARNING_RATE, beta_1=0.95, beta_2=0.999, amsgrad=True)
                         }

    print(f"Effective BATCH_SIZE: {BATCH_SIZE}, EPOCHS: {EPOCHS}, NUM_RUNS: {NUM_RUNS}")

    # Run the experiments
    final_data, all_hists = run_experiment(
        datasets_run_cfg, activations_run_cfg, optimizers_run_cfg,
        num_runs=NUM_RUNS, epochs_local=EPOCHS, batch_size_local=BATCH_SIZE, patience_local=PATIENCE
    )
    # Display aggregated results and plots
    aggregate_and_display_results(final_data, datasets_run_cfg, all_hists, NUM_RUNS)

Effective BATCH_SIZE: 16, EPOCHS: 50, NUM_RUNS: 1

--- Starting Run 1/1 ---

Dataset: UTKFaceAge (Task: regression)
Parsing UTKFace metadata from: /kaggle/input/utkface-new/UTKFace
Parsed 23708 image paths. Skipped 0 files.
UTKFace splits: Train: 16595, Validation: 3556, Test: 3557


I0000 00:00:1746691294.579221      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


  Optimizer: AdamW (Run 1)
    Activation: OptimA
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/50


I0000 00:00:1746691361.118301      59 service.cc:148] XLA service 0x7dd8b0003010 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746691361.119097      59 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746691365.949780      59 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   1/1038[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25:05:33[0m 87s/step - loss: 1433.4185 - mae: 30.7487

I0000 00:00:1746691386.404711      59 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 135ms/step - loss: 663.1389 - mae: 19.1174 - val_loss: 83.4814 - val_mae: 6.7262 - learning_rate: 1.0000e-05
Epoch 2/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77us/step - loss: 0.0000e+00 - mae: 0.0000e+00 - learning_rate: 1.0000e-05
Epoch 3/50
[1m   1/1038[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:31[0m 146ms/step - loss: 113.6577 - mae: 8.8349

  self.gen.throw(typ, value, traceback)
  current = self.get_monitor_value(logs)
  callback.on_epoch_end(epoch, logs)


[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 109ms/step - loss: 98.5598 - mae: 7.4664 - val_loss: 72.9784 - val_mae: 6.1999 - learning_rate: 1.0000e-05
Epoch 4/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34us/step - loss: 0.0000e+00 - mae: 0.0000e+00 - learning_rate: 1.0000e-05
Epoch 5/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 110ms/step - loss: 74.5841 - mae: 6.4369 - val_loss: 70.9566 - val_mae: 6.0678 - learning_rate: 1.0000e-05
Epoch 6/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38us/step - loss: 0.0000e+00 - mae: 0.0000e+00 - learning_rate: 1.0000e-05
Epoch 7/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 110ms/step - loss: 56.2359 - mae: 5.6665 - val_loss: 69.2320 - val_mae: 5.9744 - learning_rate: 1.0000e-05
Epoch 8/50
[1m1038/1038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35us/step - loss: 0.0000e+00 - mae: 0.0000e+00 - learning_ra