In [6]:
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast

In [7]:
# Load CIFAR-10 dataset
train_data = datasets.CIFAR10(root='/home/j597s263/Datasets/cifar10', download=True, transform=ToTensor(), train=True)
test_data = datasets.CIFAR10(root='/home/j597s263/Datasets/cifar10', download=True, transform=ToTensor(), train=False)

# Create DataLoaders for training and testing
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)

print(f"Training data: {len(train_data)} samples")
print(f"Testing data: {len(test_data)} samples")


Files already downloaded and verified
Files already downloaded and verified
Training data: 50000 samples
Testing data: 10000 samples


In [3]:
import torch.nn as nn

# Residual block
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x):
        return self.fn(x) + x

# ConvMixer model with hard-coded parameters
def ConvMixer():
    dim = 256          # Embedding dimension
    depth = 8          # Number of ConvMixer blocks
    kernel_size = 5    # Kernel size for depthwise convolution
    patch_size = 4     # Patch size for initial convolution
    n_classes = 10     # CIFAR-10 has 10 classes

    return nn.Sequential(
        nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size),
        nn.GELU(),
        nn.BatchNorm2d(dim),
        *[nn.Sequential(
                Residual(nn.Sequential(
                    nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same"),
                    nn.GELU(),
                    nn.BatchNorm2d(dim)
                )),
                nn.Conv2d(dim, dim, kernel_size=1),
                nn.GELU(),
                nn.BatchNorm2d(dim)
        ) for _ in range(depth)],
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.Linear(dim, n_classes)
    )

In [4]:
model = ConvMixer().to('cuda:1')

In [5]:
# Hyperparameters
epochs = 150
learning_rate = 0.01
opt_eps = 1e-3
clip_grad = 1.0
device = 'cuda:1' 

# Optimizer and scheduler
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, eps=opt_eps)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=learning_rate,
    steps_per_epoch=len(train_loader),
    epochs=epochs
)

# Loss function
criterion = nn.CrossEntropyLoss()

# Automatic Mixed Precision (AMP)
scaler = GradScaler()

# Training and Testing Loop
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # Move data to GPU
        images, labels = images.to(device), labels.to(device)

        # Forward and backward pass with AMP
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()

        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

        # Optimizer step
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        running_loss += loss.item()

    # Log training loss for the epoch
    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {running_loss/len(train_loader):.4f}")

    # Testing phase after each epoch
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            # Move data to GPU
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Log test accuracy and loss
    test_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")

  scaler = GradScaler()
  with autocast():


Epoch [1/150], Training Loss: 1.4949
Epoch [1/150], Test Loss: 1.2500, Test Accuracy: 54.68%
Epoch [2/150], Training Loss: 1.1094
Epoch [2/150], Test Loss: 1.1793, Test Accuracy: 58.09%
Epoch [3/150], Training Loss: 0.8972
Epoch [3/150], Test Loss: 1.0524, Test Accuracy: 62.79%
Epoch [4/150], Training Loss: 0.7327
Epoch [4/150], Test Loss: 1.0550, Test Accuracy: 63.74%
Epoch [5/150], Training Loss: 0.5816
Epoch [5/150], Test Loss: 1.1286, Test Accuracy: 63.64%
Epoch [6/150], Training Loss: 0.4843
Epoch [6/150], Test Loss: 1.2069, Test Accuracy: 63.05%
Epoch [7/150], Training Loss: 0.4460
Epoch [7/150], Test Loss: 1.2190, Test Accuracy: 63.52%
Epoch [8/150], Training Loss: 0.4458
Epoch [8/150], Test Loss: 1.2079, Test Accuracy: 63.24%
Epoch [9/150], Training Loss: 0.4669
Epoch [9/150], Test Loss: 1.1838, Test Accuracy: 63.93%
Epoch [10/150], Training Loss: 0.4806
Epoch [10/150], Test Loss: 1.1144, Test Accuracy: 66.08%
Epoch [11/150], Training Loss: 0.4978
Epoch [11/150], Test Loss: 1.0

In [9]:
torch.save(model, '/home/j597s263/Models/Conv_Cifar.mod')