In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
from torch.optim import Adam
from torchvision.datasets import QMNIST

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 50
batch_size = 64
learning_rate = 0.001

# QMNIST dataset transforms
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to a smaller dimension (e.g., 32x32)
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # For grayscale images
])

# QMNIST dataset
train_dataset = QMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = QMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Custom Initialization: He for shallow layers, Orthogonal for deep layers
def custom_init(model):
    relu_gain = nn.init.calculate_gain('relu')
    layer_idx = 0
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if layer_idx < 5:  # Shallow layers (first 5 layers)
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            else:  # Deeper layers (after 5 layers)
                nn.init.orthogonal_(m.weight, gain=relu_gain)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
            layer_idx += 1

# Custom 9-Layer CNN Architecture
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()

        self.layer1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)   # Output: (32x32)
        self.layer2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # Output: (32x32)
        self.layer3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # Output: (32x32)
        self.layer4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)# Output: (32x32)
        self.layer5 = nn.Conv2d(256, 512, kernel_size=3, padding=1)# Output: (32x32)

        self.pool = nn.MaxPool2d(2, 2)  # Reduces H and W by half

        # Pass dummy input to calculate output shape
        dummy_input = torch.zeros(1, 1, 32, 32)
        x = self.pool(torch.relu(self.layer2(torch.relu(self.layer1(dummy_input)))))
        x = self.pool(torch.relu(self.layer4(torch.relu(self.layer3(x)))))
        x = torch.relu(self.layer5(x))
        self.flattened_size = x.view(1, -1).shape[1]  # dynamic calc

        self.fc1 = nn.Linear(self.flattened_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

        custom_init(self)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.pool(torch.relu(self.layer2(x)))
        x = torch.relu(self.layer3(x))
        x = self.pool(torch.relu(self.layer4(x)))
        x = torch.relu(self.layer5(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# Initialize model and move to device
model = CustomCNN().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  # L2 Regularization (weight decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

# Train the model
train_acc_list, val_acc_list = [], []
train_loss_list, val_loss_list = [], []
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    total, correct, train_loss = 0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    train_acc_list.append(train_acc)
    train_loss_list.append(train_loss / len(train_loader))

    # Validation
    model.eval()
    correct, total, val_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_acc_list.append(val_acc)
    val_loss_list.append(val_loss / len(test_loader))

    scheduler.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], "
          f"Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%, "
          f"Train Loss: {train_loss_list[-1]:.4f}, Val Loss: {val_loss_list[-1]:.4f}")

duration = time.time() - start_time
print(f"\nTraining completed in {duration:.2f} seconds.")

# Visualization
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_loss_list, label='Train Loss')
plt.plot(val_loss_list, label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_acc_list, label='Train Acc')
plt.plot(val_acc_list, label='Val Acc')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()


Epoch [1/50], Train Acc: 95.73%, Val Acc: 97.98%, Train Loss: 0.1949, Val Loss: 0.0687
Epoch [2/50], Train Acc: 98.66%, Val Acc: 98.72%, Train Loss: 0.0442, Val Loss: 0.0423
Epoch [3/50], Train Acc: 99.04%, Val Acc: 98.93%, Train Loss: 0.0332, Val Loss: 0.0360
Epoch [4/50], Train Acc: 99.10%, Val Acc: 99.15%, Train Loss: 0.0301, Val Loss: 0.0276
Epoch [5/50], Train Acc: 99.23%, Val Acc: 98.96%, Train Loss: 0.0253, Val Loss: 0.0401
Epoch [6/50], Train Acc: 99.30%, Val Acc: 99.14%, Train Loss: 0.0234, Val Loss: 0.0305
Epoch [7/50], Train Acc: 99.36%, Val Acc: 99.02%, Train Loss: 0.0218, Val Loss: 0.0347
Epoch [8/50], Train Acc: 99.49%, Val Acc: 98.95%, Train Loss: 0.0176, Val Loss: 0.0405
Epoch [9/50], Train Acc: 99.47%, Val Acc: 99.04%, Train Loss: 0.0173, Val Loss: 0.0380
Epoch [10/50], Train Acc: 99.53%, Val Acc: 99.16%, Train Loss: 0.0158, Val Loss: 0.0312


KeyboardInterrupt: 