In [None]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as func
import torch
from math import floor, sqrt
from torchvision import datasets
import torchvision.transforms as transforms
from torch import optim
import time
import math
import json
import matplotlib.pyplot as plt

In [None]:
# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Data preparation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=512, shuffle=False)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 16.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 459kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.25MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.4MB/s]


In [None]:
# Custom activation function: x * erf(x/sqrt(2))
# SGELU - Symmetric Gaussian Error Linear Unit
class SGELU(nn.Module):
    def __init__(self):
        super().__init__()
        self.sqrt2 = sqrt(2)

    def forward(self, x):
        return x * torch.erf(x / self.sqrt2)


# Learnable custom activation: alpha * x * erf(x/sqrt(2))
class AdaSGELU(nn.Module):
    def __init__(self):
        super().__init__()
        self.sqrt2 = sqrt(2)
        self.alpha = nn.Parameter(torch.ones(1))

    def forward(self, x):
        return self.alpha * x * torch.erf(x / self.sqrt2)

In [None]:
# Model definition
class MnistTest(nn.Module):
    def __init__(self, activation):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 128),
            nn.BatchNorm1d(128),
            activation(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        return self.layers(x)

    def predict(self, x):
        return torch.argmax(self.softmax(self.layers(x)), dim=1)

In [None]:
# train_func
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


# Evaluation function
def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = model.predict(data)
            correct += (pred == target).sum().item()
            total += target.size(0)
    return total_loss / len(loader), correct / total

In [None]:
# Benchmark function
def benchmark_activation(activation_name, activation_class, epochs=20):
    print(f"\n{'=' * 60}")
    print(f"Training with {activation_name}")
    print(f"{'=' * 60}")

    model = MnistTest(activation_class).to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    results = {
        'activation': activation_name,
        'epochs': [],
        'train_loss': [],
        'test_loss': [],
        'test_accuracy': []
    }

    start_time = time.time()

    for epoch in range(epochs):
        train_loss = train_epoch(model, trainloader, optimizer, criterion)
        test_loss, test_acc = evaluate(model, testloader, criterion)

        results['epochs'].append(epoch + 1)
        results['train_loss'].append(train_loss)
        results['test_loss'].append(test_loss)
        results['test_accuracy'].append(test_acc)

        print(f"Epoch {epoch + 1:2d}/{epochs} | "
              f"Train Loss: {train_loss:.4f} | "
              f"Test Loss: {test_loss:.4f} | "
              f"Test Acc: {test_acc:.4f}")

    elapsed_time = time.time() - start_time
    results['training_time'] = elapsed_time
    print(f"\nTraining completed in {elapsed_time:.2f} seconds")
    print(f"Final Test Accuracy: {results['test_accuracy'][-1]:.4f}")

    return results

In [None]:
activations = {
    'ReLU': nn.ReLU,
    'ELU': nn.ELU,
    'GELU': nn.GELU,
    'SGELU': SGELU,
    'AdaSGELU': AdaSGELU,
}

# Load existing results if they exist
output_file = 'activation_benchmark_results.json'
try:
    with open(output_file, 'r') as f:
        all_results = json.load(f)
    print(f"Loaded existing results from {output_file}")
    print(f"Already trained: {list(all_results.keys())}")
except FileNotFoundError:
    all_results = {}
    print(f"No existing results found. Will train all activations.")

# Only train activations that haven't been trained yet
for name, activation_class in activations.items():
    if name in all_results:
        print(f"\nSkipping {name} (already trained)")
    else:
        results = benchmark_activation(name, activation_class, epochs=20)
        all_results[name] = results

No existing results found. Will train all activations.

Training with ReLU
Epoch  1/20 | Train Loss: 0.6172 | Test Loss: 0.2708 | Test Acc: 0.9263
Epoch  2/20 | Train Loss: 0.3153 | Test Loss: 0.2084 | Test Acc: 0.9424
Epoch  3/20 | Train Loss: 0.2636 | Test Loss: 0.1790 | Test Acc: 0.9504
Epoch  4/20 | Train Loss: 0.2359 | Test Loss: 0.1603 | Test Acc: 0.9536
Epoch  5/20 | Train Loss: 0.2150 | Test Loss: 0.1445 | Test Acc: 0.9595
Epoch  6/20 | Train Loss: 0.1975 | Test Loss: 0.1374 | Test Acc: 0.9604
Epoch  7/20 | Train Loss: 0.1841 | Test Loss: 0.1269 | Test Acc: 0.9647
Epoch  8/20 | Train Loss: 0.1767 | Test Loss: 0.1223 | Test Acc: 0.9660
Epoch  9/20 | Train Loss: 0.1681 | Test Loss: 0.1137 | Test Acc: 0.9678
Epoch 10/20 | Train Loss: 0.1576 | Test Loss: 0.1112 | Test Acc: 0.9678
Epoch 11/20 | Train Loss: 0.1520 | Test Loss: 0.1073 | Test Acc: 0.9700
Epoch 12/20 | Train Loss: 0.1457 | Test Loss: 0.1056 | Test Acc: 0.9698
Epoch 13/20 | Train Loss: 0.1385 | Test Loss: 0.1022 | Test A

In [None]:
# Save results to JSON
output_file = 'activation_benchmark_results.json'
with open(output_file, 'w') as f:
    json.dump(all_results, f, indent=4)

print(f"\n{'=' * 60}")
print(f"Benchmark Summary")
print(f"{'=' * 60}")
for name, results in all_results.items():
    print(f"{name:20s} | Final Acc: {results['test_accuracy'][-1]:.4f} | "
          f"Time: {results['training_time']:.2f}s")

print(f"\nResults saved to {output_file}")


Benchmark Summary
ReLU                 | Final Acc: 0.9738 | Time: 267.47s
ELU                  | Final Acc: 0.9685 | Time: 261.74s
GELU                 | Final Acc: 0.9737 | Time: 257.96s
SGELU                | Final Acc: 0.9782 | Time: 258.01s
AdaSGELU             | Final Acc: 0.9782 | Time: 258.83s

Results saved to activation_benchmark_results.json


Visualizing results

In [None]:
# Load the benchmark results
with open('activation_benchmark_results.json', 'r') as f:
    results = json.load(f)

# Set up the plot style
plt.style.use('seaborn-v0_8-darkgrid')
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', 'black', 'pink']

# Create figure with 2 subplots
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot 1: Test Accuracy
ax1 = axes[0]
for i, (name, data) in enumerate(results.items()):
    epochs = data['epochs']
    test_acc = [acc * 100 for acc in data['test_accuracy']]  # Convert to percentage
    ax1.plot(epochs, test_acc, marker='o', linewidth=2,
             markersize=4, label=name, color=colors[i])

ax1.set_xlabel('Epoch', fontsize=12, fontweight='bold')
ax1.set_ylabel('Test Accuracy (%)', fontsize=12, fontweight='bold')
ax1.set_title('Test Accuracy Comparison', fontsize=14, fontweight='bold')
ax1.legend(loc='lower right', fontsize=10)
ax1.grid(True, alpha=0.3)
ax1.set_xlim(1, 20)

# Plot 2: Training Loss
ax2 = axes[1]
for i, (name, data) in enumerate(results.items()):
    epochs = data['epochs']
    train_loss = data['train_loss']
    ax2.plot(epochs, train_loss, marker='o', linewidth=2,
             markersize=4, label=name, color=colors[i])

ax2.set_xlabel('Epoch', fontsize=12, fontweight='bold')
ax2.set_ylabel('Training Loss', fontsize=12, fontweight='bold')
ax2.set_title('Training Loss Comparison', fontsize=14, fontweight='bold')
ax2.legend(loc='upper right', fontsize=10)
ax2.grid(True, alpha=0.3)
ax2.set_xlim(1, 20)

plt.tight_layout()
plt.savefig('activation_comparison.png', dpi=300, bbox_inches='tight')
print("Plot saved as 'activation_comparison.png'")
plt.show()

# Print final statistics
print("\n" + "="*60)
print("Final Performance Summary")
print("="*60)
print(f"{'Activation':<20} {'Final Acc (%)':<15} {'Final Train Loss':<20} {'Time (s)'}")
print("-"*60)
for name, data in results.items():
    final_acc = data['test_accuracy'][-1] * 100
    final_train_loss = data['train_loss'][-1]
    train_time = data['training_time']
    print(f"{name:<20} {final_acc:<15.2f} {final_train_loss:<20.4f} {train_time:.2f}")

# Find best performing activation
best_activation = max(results.items(), key=lambda x: x[1]['test_accuracy'][-1])
print("\n" + "="*60)
print(f"Best Performing Activation: {best_activation[0]}")
print(f"Final Test Accuracy: {best_activation[1]['test_accuracy'][-1] * 100:.2f}%")
print("="*60)

FileNotFoundError: [Errno 2] No such file or directory: 'activation_benchmark_results.json'

Autoencoder test