In [1]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as func
import torch
from math import floor, sqrt
from torchvision import datasets
import torchvision.transforms as transforms
from torch import optim
import time
import math
import json
import matplotlib.pyplot as plt

In [2]:
# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [3]:
# Data preparation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=512, shuffle=False)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 59.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 2.00MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 15.2MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.1MB/s]


In [4]:
# Custom activation function: x * erf(x/sqrt(2))
# SGELU - Symmetric Gaussian Error Linear Unit
class SGELU(nn.Module):
    def __init__(self):
        super().__init__()
        self.sqrt2 = sqrt(2)

    def forward(self, x):
        return x * torch.erf(x / self.sqrt2)


# Learnable custom activation: alpha * x * erf(x/sqrt(2))
class AdaSGELU(nn.Module):
    def __init__(self):
        super().__init__()
        self.sqrt2 = sqrt(2)
        self.alpha = nn.Parameter(torch.ones(1))

    def forward(self, x):
        return self.alpha * x * torch.erf(x / self.sqrt2)

In [5]:
class MnistAutoencoderConv(nn.Module):
    def __init__(self, activation):
        super().__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            activation(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            activation(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            activation(),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.BatchNorm2d(64),
            activation(),
            nn.ConvTranspose2d(64, 32, 3, padding=1),
            nn.Upsample(scale_factor=2, mode='nearest'),
            nn.BatchNorm2d(32),
            activation(),
            nn.ConvTranspose2d(32, 16, 3, padding=1),
            nn.Upsample(scale_factor=2, mode='nearest'),
            nn.BatchNorm2d(16),
            activation(),
            nn.ConvTranspose2d(16, 1, 3, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        batch_size = x.size(0)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def encode(self, x):
        return self.encoder(x)


In [6]:
class MnistAutoencoderDense(nn.Module):
    def __init__(self, activation):
        super().__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 256),
            nn.BatchNorm1d(256),
            activation(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            activation(),
            nn.Linear(128, 64),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.BatchNorm1d(128),
            activation(),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            activation(),
            nn.Linear(256, 28*28),
        )

    def forward(self, x):
        batch_size = x.size(0)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        decoded = decoded.view(batch_size, 1, 28, 28)
        return decoded

    def encode(self, x):
        return self.encoder(x)

In [7]:
# Training function
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data, _ in loader:  # Don't need labels for autoencoder
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


# Evaluation function
def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data, _ in loader:
            data = data.to(device)
            output = model(data)
            loss = criterion(output, data)
            total_loss += loss.item()
    return total_loss / len(loader)


# Benchmark function
def benchmark_activation(activation_name, model, activation_class, epochs=20):
    print(f"\n{'=' * 60}")
    print(f"Training Autoencoder with {activation_name}")
    print(f"{'=' * 60}")

    model = model(activation_class).to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.MSELoss()  # Reconstruction loss

    results = {
        'activation': activation_name,
        'model_type': 'autoencoder',
        'epochs': [],
        'train_loss': [],
        'test_loss': []
    }

    start_time = time.time()

    for epoch in range(epochs):
        train_loss = train_epoch(model, trainloader, optimizer, criterion)
        test_loss = evaluate(model, testloader, criterion)

        results['epochs'].append(epoch + 1)
        results['train_loss'].append(train_loss)
        results['test_loss'].append(test_loss)

        print(f"Epoch {epoch + 1:2d}/{epochs} | "
              f"Train Loss: {train_loss:.6f} | "
              f"Test Loss: {test_loss:.6f}")

    elapsed_time = time.time() - start_time
    results['training_time'] = elapsed_time
    print(f"\nTraining completed in {elapsed_time:.2f} seconds")
    print(f"Final Test Loss: {results['test_loss'][-1]:.6f}")

    return results

In [9]:
activations = {
    'ReLU': nn.ReLU,
    'ELU': nn.ELU,
    'GELU': nn.GELU,
    'SGELU': SGELU,
    'AdaSGELU': AdaSGELU
}

# Load existing results (Dense) if they exist
output_file = 'autoencoder_benchmark_results_dense.json'
try:
    with open(output_file, 'r') as f:
        all_results = json.load(f)
    print(f"Loaded existing results from {output_file}")
    print(f"Already trained: {list(all_results.keys())}")
except FileNotFoundError:
    all_results = {}
    print(f"No existing results found. Will train all activations.")

# Only train activations that haven't been trained yet
for name, activation_class in activations.items():
    if name in all_results:
        print(f"\nSkipping {name} (already trained)")
    else:
        results = benchmark_activation(name, MnistAutoencoderDense, activation_class, epochs=20)
        all_results[name] = results

# Save results to JSON
with open(output_file, 'w') as f:
    json.dump(all_results, f, indent=4)

print(f"\n{'=' * 60}")
print(f"Autoencoder Benchmark Summary")
print(f"{'=' * 60}")
for name, results in all_results.items():
    print(f"{name:20s} | Final Test Loss: {results['test_loss'][-1]:.6f} | "
          f"Time: {results['training_time']:.2f}s")

print(f"\nResults saved to {output_file}")

No existing results found. Will train all activations.

Training Autoencoder with ReLU
Epoch  1/20 | Train Loss: 0.400350 | Test Loss: 0.252095


KeyboardInterrupt: 

In [10]:
# Load existing results (Dense) if they exist
output_file = 'autoencoder_benchmark_results_conv.json'
try:
    with open(output_file, 'r') as f:
        all_results = json.load(f)
    print(f"Loaded existing results from {output_file}")
    print(f"Already trained: {list(all_results.keys())}")
except FileNotFoundError:
    all_results = {}
    print(f"No existing results found. Will train all activations.")

# Only train activations that haven't been trained yet
for name, activation_class in activations.items():
    if name in all_results:
        print(f"\nSkipping {name} (already trained)")
    else:
        results = benchmark_activation(name, MnistAutoencoderConv, activation_class, epochs=20)
        all_results[name] = results

# Save results to JSON
with open(output_file, 'w') as f:
    json.dump(all_results, f, indent=4)

print(f"\n{'=' * 60}")
print(f"Autoencoder Benchmark Summary")
print(f"{'=' * 60}")
for name, results in all_results.items():
    print(f"{name:20s} | Final Test Loss: {results['test_loss'][-1]:.6f} | "
          f"Time: {results['training_time']:.2f}s")

print(f"\nResults saved to {output_file}")

No existing results found. Will train all activations.

Training Autoencoder with ReLU


KeyboardInterrupt: 