In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.models import resnet18
import matplotlib.pyplot as plt
import time

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()
torch.backends.cudnn.benchmark = True

# Hyperparameters
num_epochs = 50
batch_size = 16
learning_rate = 0.001

# STL10 transforms with Data Augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(64, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Resize((64, 64)),  # Resize from 96x96 to 64x64
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

# STL10 dataset
train_dataset = torchvision.datasets.STL10(root='./data', split='train', download=True, transform=transform)
test_dataset = torchvision.datasets.STL10(root='./data', split='test', download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Custom Initialization: He for shallow, Orthogonal for deep
def custom_init(model):
    relu_gain = nn.init.calculate_gain('relu')
    layer_idx = 0
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if layer_idx < 10:
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            else:
                nn.init.orthogonal_(m.weight, gain=relu_gain)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
            layer_idx += 1

# Modify ResNet18 with Dropout in the Fully Connected Layer
class ResNet18Modified(nn.Module):
    def __init__(self):
        super(ResNet18Modified, self).__init__()
        self.model = resnet18()
        self.model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(self.model.fc.in_features, 10)
        )
        custom_init(self.model)

    def forward(self, x):
        return self.model(x)

model = ResNet18Modified().to(device)

# Loss and optimizer with weight decay for L2 regularization
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

# Train the model
train_acc_list, val_acc_list = [], []
train_loss_list, val_loss_list = [], []
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    total, correct, train_loss = 0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    train_acc_list.append(train_acc)
    train_loss_list.append(train_loss / len(train_loader))

    # Validation
    model.eval()
    correct, total, val_loss = 0, 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_acc_list.append(val_acc)
    val_loss_list.append(val_loss / len(test_loader))

    scheduler.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], "
          f"Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%, "
          f"Train Loss: {train_loss_list[-1]:.4f}, Val Loss: {val_loss_list[-1]:.4f}")

duration = time.time() - start_time
print(f"\nTraining completed in {duration:.2f} seconds.")

# Visualization
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_loss_list, label='Train Loss')
plt.plot(val_loss_list, label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_acc_list, label='Train Acc')
plt.plot(val_acc_list, label='Val Acc')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/50], Train Acc: 20.44%, Val Acc: 26.90%, Train Loss: 2.4926, Val Loss: 1.9923
Epoch [2/50], Train Acc: 25.52%, Val Acc: 29.85%, Train Loss: 2.0124, Val Loss: 1.8017
Epoch [3/50], Train Acc: 28.50%, Val Acc: 32.90%, Train Loss: 1.9002, Val Loss: 1.7105
Epoch [4/50], Train Acc: 31.34%, Val Acc: 31.01%, Train Loss: 1.8287, Val Loss: 1.7921
Epoch [5/50], Train Acc: 34.42%, Val Acc: 35.91%, Train Loss: 1.7443, Val Loss: 1.7347
Epoch [6/50], Train Acc: 36.54%, Val Acc: 41.77%, Train Loss: 1.6866, Val Loss: 1.5277
Epoch [7/50], Train Acc: 39.86%, Val Acc: 42.25%, Train Loss: 1.6191, Val Loss: 1.5363
Epoch [8/50], Train Acc: 43.02%, Val Acc: 43.66%, Train Loss: 1.5576, Val Loss: 1.5063
Epoch [9/50], Train Acc: 43.18%, Val Acc: 43.39%, Train Loss: 1.5375, Val Loss: 1.5301
Epoch [10/50], Train Acc: 44.70%, Val Acc: 41.17%, Train Loss: 1.5042, Val Loss: 1.6210
Epoch [11/50], Train Acc: 45.94%, Val Acc: 45.14%, Tr