In [None]:
import math
import random
import time
import numpy as np
from copy import deepcopy
from tqdm import trange
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Cuckoo Search Algorithm implementation
def cuckoo_search(objective_func, bounds, n_nests=15, pa=0.25, alpha=0.1,
                  n_iter=100, levy_lambda=1.5, minimize=True, verbose_prefix=""):
    """
    Cuckoo Search (CS) algorithm for optimization.

    objective_func: The function to be optimized. It should accept a dictionary of parameters.
    bounds: A dictionary of parameter bounds: {'param_name': (min_val, max_val, 'type')}
           'type' can be 'float' or 'int'.
    n_nests: Number of host nests (and cuckoos).
    pa: Probability of alien eggs detection (discovery rate).
    alpha: Step size scaling factor.
    n_iter: Total number of iterations (generations).
    levy_lambda: Exponent for Levy flight.
    minimize: If True, objective_func is minimized; otherwise, it's maximized.
    verbose_prefix: Prefix for print statements during optimization.
    """
    n_params = len(bounds)
    param_names = list(bounds.keys())

    # Initialize nests (solutions) randomly
    nests = []
    for _ in range(n_nests):
        nest = {}
        for name, (min_val, max_val, p_type) in bounds.items():
            if p_type == 'float':
                nest[name] = random.uniform(min_val, max_val)
            elif p_type == 'int':
                nest[name] = random.randint(int(min_val), int(max_val))
        nests.append(nest)

    # Evaluate initial nests
    fitness = [objective_func(nest) for nest in nests]

    # Find the initial best nest
    if minimize:
        best_f_idx = np.argmin(fitness)
    else:
        best_f_idx = np.argmax(fitness)
    best_nest = deepcopy(nests[best_f_idx])
    best_fitness = fitness[best_f_idx]

    # Main loop
    for iteration in trange(n_iter, desc=f"{verbose_prefix}Cuckoo Search"):
        # Generate new solutions (cuckoos) via Levy flights
        new_nests = []
        for i in range(n_nests):
            # Select a cuckoo randomly
            cuckoo = deepcopy(nests[i])

            # Generate new solution using Levy flight
            # Based on Mantegna's algorithm for Levy flights
            sigma = (math.gamma(1 + levy_lambda) * math.sin(math.pi * levy_lambda / 2) / \
                    (math.gamma((1 + levy_lambda) / 2) * levy_lambda *
                     (2**((levy_lambda - 1) / 2))))**(1 / levy_lambda)
            u = np.random.normal(0, sigma**2, n_params)
            v = np.random.normal(0, 1, n_params)
            step = u / (np.abs(v)**(1 / levy_lambda))

            # Scale step and apply to solution
            step_size = alpha * step * (np.array(list(best_nest.values())) -
                                       np.array(list(cuckoo.values())))

            new_cuckoo = {}
            for j, name in enumerate(param_names):
                min_val, max_val, p_type = bounds[name]
                if p_type == 'float':
                    new_cuckoo[name] = cuckoo[name] + step_size[j]
                    new_cuckoo[name] = max(min_val, min(max_val, new_cuckoo[name]))
                elif p_type == 'int':
                    new_cuckoo[name] = round(cuckoo[name] + step_size[j])
                    new_cuckoo[name] = int(max(min_val, min(max_val, new_cuckoo[name])))
            new_nests.append(new_cuckoo)

        # Evaluate new cuckoos and replace old nests if better
        new_fitness = [objective_func(nest) for nest in new_nests]
        for i in range(n_nests):
            if (minimize and new_fitness[i] < fitness[i]) or \
               (not minimize and new_fitness[i] > fitness[i]):
                nests[i] = deepcopy(new_nests[i])
                fitness[i] = new_fitness[i]

        # Discover alien eggs (abandon nests)
        n_abandon = int(pa * n_nests)
        if n_abandon > 0:
            abandon_indices = random.sample(range(n_nests), n_abandon)
            for idx in abandon_indices:
                new_nest = {}
                for name, (min_val, max_val, p_type) in bounds.items():
                    if p_type == 'float':
                        new_nest[name] = random.uniform(min_val, max_val)
                    elif p_type == 'int':
                        new_nest[name] = random.randint(int(min_val), int(max_val))
                nests[idx] = new_nest
                fitness[idx] = objective_func(new_nest)

        # Update best nest
        if minimize:
            current_best_f_idx = np.argmin(fitness)
        else:
            current_best_f_idx = np.argmax(fitness)
        if (minimize and fitness[current_best_f_idx] < best_fitness) or \
           (not minimize and fitness[current_best_f_idx] > best_fitness):
            best_nest = deepcopy(nests[current_best_f_idx])
            best_fitness = fitness[current_best_f_idx]

    return best_nest, best_fitness


# Simple ResNet Block
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out


class SimpleResNet(nn.Module):
    def __init__(self, n_filters=16, n_blocks=2, drop=0.3, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(1, n_filters, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(n_filters)
        self.relu = nn.ReLU(inplace=True)

        # Create residual blocks
        self.layer1 = self._make_layer(n_filters, n_filters, n_blocks, stride=1)
        self.layer2 = self._make_layer(n_filters, n_filters*2, n_blocks, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(drop)
        self.fc = nn.Linear(n_filters*2, num_classes)

        self.in_channels = n_filters

    def _make_layer(self, in_channels, out_channels, n_blocks, stride):
        layers = []
        layers.append(BasicBlock(in_channels, out_channels, stride))
        for _ in range(1, n_blocks):
            layers.append(BasicBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        return x


def resnet_mnist_objective(params):
    """
    Objective function for ResNet on MNIST dataset.

    params: dict e.g. {'lr':0.01, 'batch':64, 'n_filters':16, 'n_blocks':2, 'dropout':0.3}
    We'll train for small epochs and return validation loss (minimize).
    """
    # Map parameters
    lr = float(params['lr'])
    batch = int(params['batch'])
    n_filters = int(params['n_filters'])
    n_blocks = int(params['n_blocks'])
    dropout = float(params['dropout'])
    epochs = 2  # Quick training for optimization

    # Load MNIST dataset
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                          download=True, transform=transform)
    valset = torchvision.datasets.MNIST(root='./data', train=False,
                                        download=True, transform=transform)

    # Use subset for faster optimization
    small_train_idx = list(range(0, 10000))
    small_val_idx = list(range(0, 1000))

    train_subset = torch.utils.data.Subset(trainset, small_train_idx)
    val_subset = torch.utils.data.Subset(valset, small_val_idx)

    trainloader = torch.utils.data.DataLoader(train_subset, batch_size=batch,
                                             shuffle=True, num_workers=0)
    valloader = torch.utils.data.DataLoader(val_subset, batch_size=batch,
                                           shuffle=False, num_workers=0)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create model
    model = SimpleResNet(n_filters=n_filters, n_blocks=n_blocks,
                        drop=dropout, num_classes=10).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Train for a few epochs
    model.train()
    for e in range(epochs):
        for xb, yb in trainloader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()

    # Evaluate validation loss
    model.eval()
    val_loss = 0.0
    n = 0
    with torch.no_grad():
        for xb, yb in valloader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            val_loss += loss.item() * xb.size(0)
            n += xb.size(0)

    val_loss = val_loss / n

    # Return validation loss (minimize)
    return val_loss


def run_resnet_mnist_optimization():
    """
    Run Cuckoo Search optimization for ResNet on MNIST.
    """
    print("\n=== ResNet on MNIST: Hyperparameter optimization using Cuckoo Search ===")

    # Define parameter search space
    bounds = {
        'lr': (1e-4, 1e-2, 'float'),
        'batch': (32, 256, 'int'),
        'n_filters': (8, 32, 'int'),
        'n_blocks': (1, 3, 'int'),
        'dropout': (0.0, 0.5, 'float'),
    }

    # Run Cuckoo Search
    best, best_f = cuckoo_search(
        resnet_mnist_objective,
        bounds,
        n_nests=8,          # Number of nests
        pa=0.25,            # Discovery rate
        alpha=0.1,          # Step size
        n_iter=12,          # Number of iterations
        levy_lambda=1.5,    # Levy flight exponent
        minimize=True,      # We want to minimize validation loss
        verbose_prefix="[ResNet-MNIST] "
    )

    print(f"\n[ResNet-MNIST] OPTIMIZATION COMPLETE!")
    print(f"[ResNet-MNIST] Best validation loss: {best_f:.6f}")
    print(f"[ResNet-MNIST] Best parameters:")
    for param, value in best.items():
        print(f"  - {param}: {value}")

    return best, best_f


if __name__ == "__main__":
    # Set random seeds for reproducibility
    random.seed(42)
    np.random.seed(42)
    torch.manual_seed(42)

    # Run the optimization
    best_params, best_loss = run_resnet_mnist_optimization()


=== ResNet on MNIST: Hyperparameter optimization using Cuckoo Search ===


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 483kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.47MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 14.9MB/s]
[ResNet-MNIST] Cuckoo Search:  58%|█████▊    | 7/12 [43:42<31:01, 372.22s/it]

START

  ↓

Initialize nests randomly
  
  ↓

Evaluate all nests

  ↓


Select best_nest

  ↓


For each iteration:

    ↓

  Generate new cuckoos using Levy flight

    ↓

  Evaluate new cuckoos


    ↓

  Replace nests if cuckoo is better

    ↓

  Abandon pa% of nests → random new nests

    ↓


  Update global best_nest

  ↑

Repeat iteration

  ↓

Return best_nest and best_fitness

  ↓

END
