In [30]:
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Subset
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.amp import GradScaler, autocast
import os
import random

In [2]:
# Load Imagenette dataset
dataset = datasets.Imagenette(root='/home/j597s263/Datasets/imagenette', download=False, transform=ToTensor())

In [3]:
dataset

Dataset Imagenette
    Number of datapoints: 9469
    Root location: /home/j597s263/Datasets/imagenette
    StandardTransform
Transform: ToTensor()

In [31]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to ConvMixer input size
    transforms.ToTensor()
])

# Load the dataset
dataset = datasets.Imagenette(root='/home/j597s263/Datasets/imagenette', download=False, transform=transform)

# Shuffle indices with a fixed random seed for reproducibility
random.seed(42)  # Use any fixed seed for consistency
indices = list(range(len(dataset)))
random.shuffle(indices)

# Split shuffled indices into training and testing
train_indices = indices[:7568]
test_indices = indices[7568:8522]

# Create Subsets
train_data = Subset(dataset, train_indices)
test_data = Subset(dataset, test_indices)

# Create DataLoaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)  # Shuffle within batches
test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)  # No shuffle for test set

# Print dataset sizes
print(f"Total samples: {len(dataset)}")
print(f"Training samples: {len(train_data)}")
print(f"Test samples: {len(test_data)}")


Total samples: 9469
Training samples: 7568
Test samples: 954


In [32]:
import torch.nn as nn

# Residual block
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x):
        return self.fn(x) + x

# ConvMixer model with hard-coded parameters
def ConvMixer():
    dim = 256          # Embedding dimension
    depth = 8          # Number of ConvMixer blocks
    kernel_size = 5    # Kernel size for depthwise convolution
    patch_size = 4     # Patch size for initial convolution
    n_classes = 10     # CIFAR-10 has 10 classes

    return nn.Sequential(
        nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size),
        nn.GELU(),
        nn.BatchNorm2d(dim),
        *[nn.Sequential(
                Residual(nn.Sequential(
                    nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same"),
                    nn.GELU(),
                    nn.BatchNorm2d(dim)
                )),
                nn.Conv2d(dim, dim, kernel_size=1),
                nn.GELU(),
                nn.BatchNorm2d(dim)
        ) for _ in range(depth)],
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.Linear(dim, n_classes)
    )

In [33]:
model = ConvMixer().to('cuda:1')

In [34]:
# Hyperparameters
epochs = 150
learning_rate = 0.01
opt_eps = 1e-3
clip_grad = 1.0
device = 'cuda:1' 

# Optimizer and scheduler
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, eps=opt_eps)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=learning_rate,
    steps_per_epoch=len(train_loader),
    epochs=epochs
)

# Loss function
criterion = nn.CrossEntropyLoss()

# Automatic Mixed Precision (AMP)
scaler = GradScaler()

# Training and Testing Loop
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # Move data to GPU
        images, labels = images.to(device), labels.to(device)

        # Forward and backward pass with AMP
        with autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()

        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

        # Optimizer step
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        running_loss += loss.item()

    # Log training loss for the epoch
    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {running_loss/len(train_loader):.4f}")

    # Testing phase after each epoch
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            # Move data to GPU
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Log test accuracy and loss
    test_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch [1/150], Training Loss: 1.7073
Epoch [1/150], Test Loss: 1.4579, Test Accuracy: 53.46%
Epoch [2/150], Training Loss: 1.2778
Epoch [2/150], Test Loss: 1.2541, Test Accuracy: 60.69%
Epoch [3/150], Training Loss: 1.0861
Epoch [3/150], Test Loss: 1.2110, Test Accuracy: 61.74%
Epoch [4/150], Training Loss: 0.9343
Epoch [4/150], Test Loss: 1.0506, Test Accuracy: 68.34%
Epoch [5/150], Training Loss: 0.8299
Epoch [5/150], Test Loss: 1.0734, Test Accuracy: 65.41%
Epoch [6/150], Training Loss: 0.7355
Epoch [6/150], Test Loss: 1.0020, Test Accuracy: 68.45%
Epoch [7/150], Training Loss: 0.6664
Epoch [7/150], Test Loss: 1.1042, Test Accuracy: 66.04%
Epoch [8/150], Training Loss: 0.6287
Epoch [8/150], Test Loss: 1.0263, Test Accuracy: 69.39%
Epoch [9/150], Training Loss: 0.5364
Epoch [9/150], Test Loss: 0.9998, Test Accuracy: 69.29%
Epoch [10/150], Training Loss: 0.4953
Epoch [10/150], Test Loss: 0.9250, Test Accuracy: 73.90%
Epoch [11/150], Training Loss: 0.4817
Epoch [11/150], Test Loss: 1.5

In [35]:
torch.save(model, '/home/j597s263/Models/Conv_Imagenette.mod')