In [None]:
# NOTE: This code was used for experiments as-is. the code was ran in Enviornment 2: Google colab as described in the paper
# Naming and structure may not follow programming best practices.
# Focus is on reproducibility.
#This code was developed for internal experimentation and contains hardcoded values for various test cases.
#It was not refactored for modularity, but the logic matches the experiments reported in the paper.


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import numpy as np
import torchvision.transforms as transforms
from torchvision.models import resnet34
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch.nn.functional as F
import random
def set_all_seeds(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    print(seed)
#######################################################
#######################################################
#######################################################
#set_all_seeds(40)
#set_all_seeds(41)
set_all_seeds(42)
#set_all_seeds(43)
#set_all_seeds(44)
#set_all_seeds(45)
#set_all_seeds(46)
#set_all_seeds(47)
#######################################################
#######################################################
#######################################################

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transform and download CIFAR-100 dataset
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4865, 0.4409], std=[0.2673, 0.2564, 0.2762])
])

transformtest = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4865, 0.4409], std=[0.2673, 0.2564, 0.2762])
])

# Fine-to-Coarse mapping: Maps each fine label (0-99) to its respective coarse label (0-19)
fine_to_coarse = [
    4, 1, 14, 8, 0, 6, 7, 7, 18, 3, 3, 14, 9, 18, 7, 11, 3, 9, 7, 11,
    6, 11, 5, 10, 7, 6, 13, 15, 3, 15, 0, 11, 1, 10, 12, 14, 16, 9,
    11, 5, 5, 19, 8, 8, 15, 13, 14, 17, 18, 10, 16, 4, 17, 4, 2, 0,
    17, 4, 18, 17, 10, 3, 2, 12, 12, 16, 12, 1, 9, 19, 2, 10, 0, 1,
    16, 12, 9, 13, 15, 13, 16, 19, 2, 4, 6, 19, 5, 5, 8, 19, 18, 1,
    2, 15, 6, 0, 17, 8, 14, 13
]

# Modify the dataset to return coarse labels by mapping fine labels to coarse labels
class CIFAR100Coarse(torchvision.datasets.CIFAR100):
    def __getitem__(self, index):
        img, fine_label = super().__getitem__(index)
        coarse_label = fine_to_coarse[fine_label]  # Map fine label to coarse label
        return img, coarse_label

# Load the dataset with coarse labels
train_data_coarse = CIFAR100Coarse(root='./data', train=True, download=True, transform=transform)
test_data_coarse = CIFAR100Coarse(root='./data', train=False, download=True, transform=transformtest)

# Create DataLoaders for batching
train_loader = DataLoader(train_data_coarse, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data_coarse, batch_size=64, shuffle=False)

# Example: Accessing the superclasses
for images, coarse_labels in train_loader:
    print(f'Batch of images shape: {images.shape}')
    print(f'Batch of coarse labels: {coarse_labels}')
    break

import torch.nn.functional as F
class idle(nn.Module):
    def forward(self, x):
         #return x * torch.sigmoid(x)  #Swish
         #return 1.25*x * torch.sigmoid(x) #ESwish(UP)
         #return x*(torch.sigmoid(x)+0.125*torch.exp(-0.5*x**2))  #SwishPlus(UP)
         #return x * torch.tanh(F.softplus(x))   #Mish
         #return x * torch.tanh(F.softplus(0.9454113159514*x)/0.9454113159514)  #PMish(UP)
         return x * torch.tanh(F.softplus(x)) +0.025*x*torch.exp(-0.5*x**2)  #MishPlus(UP)
         #return torch.relu(x) #ReLU

# Define ResNet model with ReLU activation
class CustomResNet18(nn.Module):
    def __init__(self, num_classes=20,pretrained=False):
        super(CustomResNet18, self).__init__()
        self.resnet = resnet34(pretrained=pretrained)
        # Modify first conv layer for 32x32 images
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        if pretrained:
            for param in self.resnet.parameters():
                param.requires_grad = False
        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(in_features, num_classes)
        self.replace_activations(self.resnet)

    def replace_activations(self, module):
        """
        Recursively replace all ReLU activations in the model,
        including those inside Sequential blocks
        """
        for name, child in module.named_children():
            if isinstance(child, nn.ReLU):
                # Direct replacement of ReLU modules
                new_activation = idle()
                if isinstance(module, nn.Sequential):
                    # For Sequential containers, we need to maintain the order
                    module[int(name)] = new_activation
                else:
                    setattr(module, name, new_activation)
            elif len(list(child.children())) > 0:
                # If module has children, recurse into them
                self.replace_activations(child)

    def forward(self, x):
        return self.resnet(x)

# Verification function to check if replacement worked
def verify_activation_replacement(model):
    """
    Verify that all ReLU activations have been replaced with Idle
    """
    def check_module(module):
        relu_count = 0
        idle_count = 0
        for child in module.modules():
            if isinstance(child, nn.ReLU):
                relu_count += 1
            if isinstance(child, idle):
                idle_count += 1
        return relu_count, idle_count

    relu_count, idle_count = check_module(model_idle)
    print(f"Found {relu_count} ReLU activations and {idle_count} Idle activations")
    assert relu_count == 0, "Some ReLU activations were not replaced!"
    return idle_count > 0

def calculate_batch_accuracy(outputs, labels):
    """Calculate accuracy for a single batch while in training mode"""
    _, predicted = torch.max(outputs, 1)
    return (predicted == labels).sum().item() / labels.size(0)

def calculate_accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy

# Training loop with proper metrics tracking
model_idle = CustomResNet18().to(device)
optimizer = optim.SGD(model_idle.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer,
                                    step_size=10,    # Decay LR every 10 epochs
                                    gamma=0.1)
criterion = nn.CrossEntropyLoss()
verify_activation_replacement(model_idle)
train_accuracies = []
test_accuracies = []
train_losses = []
test_losses = []
num_epochs=40
for epoch in range(num_epochs):
    # Training phase
    model_idle.train()
    running_train_loss = 0.0
    running_train_acc = 0.0
    num_train_batches = 0

    for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs} (Training)'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_idle(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # Calculate batch accuracy while in training mode
        batch_acc = calculate_batch_accuracy(outputs, labels)

        running_train_loss += loss.item()
        running_train_acc += batch_acc
        num_train_batches += 1

    scheduler.step()
    avg_train_loss = running_train_loss / num_train_batches
    avg_train_acc = running_train_acc / num_train_batches

    # Testing phase
    model_idle.eval()
    running_test_loss = 0.0
    running_test_acc = 0.0
    num_test_batches = 0

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc=f'Epoch {epoch + 1}/{num_epochs} (Testing)'):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model_idle(inputs)
            loss = criterion(outputs, labels)

            batch_acc = calculate_batch_accuracy(outputs, labels)

            running_test_loss += loss.item()
            running_test_acc += batch_acc
            num_test_batches += 1

    avg_test_loss = running_test_loss / num_test_batches
    avg_test_acc = running_test_acc / num_test_batches

    # Store metrics
    train_accuracies.append(avg_train_acc)
    test_accuracies.append(avg_test_acc)
    train_losses.append(avg_train_loss)
    test_losses.append(avg_test_loss)

    print(f'Epoch {epoch + 1}/{num_epochs}')
    print(f'Training Loss: {avg_train_loss:.4f}, Training Accuracy: {avg_train_acc:.4f}')
    print(f'Test Loss: {avg_test_loss:.4f}, Test Accuracy: {avg_test_acc:.4f}')