In [6]:
# Import PyTorch
import torch
from torch import nn

# Import torchvision
import torchvision
from torchvision import datasets
# from torchvision.transforms import ToTensor
import torchvision.transforms as transforms

# Import matplotlib for visualization
import matplotlib.pyplot as plt

# Check versions
# Note: your PyTorch version shouldn't be lower than 1.10.0 and torchvision version shouldn't be lower than 0.11
print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

PyTorch version: 2.5.1+cu121
torchvision version: 0.20.1+cu121


In [7]:
class CIFAR10Dataset(torch.utils.data.Dataset):
    def __init__(self, root='./data', train=True, transform=None, download=True):
        self.data = torchvision.datasets.CIFAR10(root=root, train=train, download=download, transform=transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]  # Returns (image, label) tuple

def get_dataloader(batch_size=4, num_workers=0, root='./data'):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_dataset = CIFAR10Dataset(root=root, train=True, transform=transform)
    test_dataset = CIFAR10Dataset(root=root, train=False, transform=transform)

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_dataloader, test_dataloader

# Get dataloaders
train_dataloader, test_dataloader = get_dataloader(batch_size=16, num_workers=0)

# Define class names
class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

batch_size = 16

Files already downloaded and verified
Files already downloaded and verified


In [8]:
# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}")
print(f"Length of train dataloader: {len(train_dataloader)} batches of {batch_size}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {batch_size}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x0000020E327CCE20>, <torch.utils.data.dataloader.DataLoader object at 0x0000020E326B8EB0>)
Length of train dataloader: 3125 batches of 16
Length of test dataloader: 625 batches of 16


## Residual Block

In [9]:
class CIFAR10Dataset(torch.utils.data.Dataset):
    def __init__(self, root='./data', train=True, transform=None, download=True):
        self.data = torchvision.datasets.CIFAR10(root=root, train=train, download=download, transform=transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]  # Returns (image, label) tuple

def get_dataloader(batch_size=4, num_workers=0, root='./data'):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_dataset = CIFAR10Dataset(root=root, train=True, transform=transform)
    test_dataset = CIFAR10Dataset(root=root, train=False, transform=transform)

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_dataloader, test_dataloader

# Get dataloaders
train_dataloader, test_dataloader = get_dataloader(batch_size=16, num_workers=0)

# Define class names
class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

batch_size = 16

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    def _init_(self, in_channels, out_channels):
        super(ResidualBlock, self)._init_() # type: ignore
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        
        # Identity mapping (skip connection)
        self.identity = nn.Identity()
        if in_channels != out_channels:
            self.identity = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        out = F.relu(self.conv1(x))  # Conv1: [B, in_channels, H, W] -> [B, out_channels, H, W]
        out = self.conv2(out)  # Conv2: [B, out_channels, H, W] -> [B, out_channels, H, W]
        out += self.identity(x)  # Skip connection: [B, out_channels, H, W] + [B, out_channels, H, W]
        out = F.relu(out)  # ReLU: [B, out_channels, H, W] -> [B, out_channels, H, W]
        return out


In [22]:
class ResidualCNN(nn.Module):
    def _init_(self, num_classes=10):
        super(ResidualCNN, self).__init__() # type: ignore
        
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)  # Conv1: [B, 3, 28, 28] -> [B, 6, 24, 24]
        self.pool = nn.MaxPool2d(2, 2)  # MaxPool: [B, 6, 24, 24] -> [B, 6, 12, 12]
        self.res_block1 = ResidualBlock(6, 6)  # ResidualBlock1: [B, 6, 12, 12] -> [B, 6, 12, 12]
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)  # Conv2: [B, 6, 12, 12] -> [B, 16, 8, 8]
        self.res_block2 = ResidualBlock(16, 16)  # ResidualBlock2: [B, 16, 8, 8] -> [B, 16, 8, 8]

        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # FC1: [B, 16 * 5 * 5] -> [B, 120]
        self.fc2 = nn.Linear(120, 84)  # FC2: [B, 120] -> [B, 84]
        self.fc3 = nn.Linear(84, num_classes)  # FC3: [B, 84] -> [B, num_classes]

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Conv1 + Pool: [B, 1, 28, 28] -> [B, 6, 12, 12]
        x = self.res_block1(x)  # ResidualBlock1: [B, 6, 12, 12] -> [B, 6, 12, 12]
        x = self.pool(F.relu(self.conv2(x)))  # Conv2 + Pool: [B, 6, 12, 12] -> [B, 16, 8, 8]
        x = self.res_block2(x)  # ResidualBlock2: [B, 16, 8, 8] -> [B, 16, 8, 8]
        x = x.view(-1, 16 * 5 * 5)  # Flatten: [B, 16, 8, 8] -> [B, 16 * 5 * 5]
        x = F.relu(self.fc1(x))  # FC1: [B, 16 * 5 * 5] -> [B, 120]
        x = F.relu(self.fc2(x))  # FC2: [B, 120] -> [B, 84]
        x = self.fc3(x)  # FC3: [B, 84] -> [B, num_classes]
        return x

In [23]:
model = ResidualCNN()  # Instantiate the model

# LOSS AND OPTIMIZER
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# move the model to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

ValueError: optimizer got an empty parameter list

In [19]:
import time
from tqdm.auto import tqdm

# Train in one epoch function
def train_one_epoch(model, train_loader, loss_fn, optimizer, device):
    model.train()
    train_loss, train_correct = 0, 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        _, predictions = torch.max(outputs, 1)
        train_correct += torch.sum(predictions == labels.data)

    return train_loss / len(train_loader.dataset), train_correct.double() / len(train_loader.dataset)

# Validation function
def validate(model, val_loader, loss_fn, device):
    model.eval()
    val_loss, val_correct = 0, 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            _, predictions = torch.max(outputs, 1)
            val_correct += torch.sum(predictions == labels.data)

    return val_loss / len(val_loader.dataset), val_correct.double() / len(val_loader.dataset)

# Training and validation loop with timing
def train_and_validate(model, train_loader, val_loader, loss_fn, optimizer, epochs, device='cuda'):
    model.to(device)
    history = {
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }

    for epoch in tqdm(range(epochs), desc="Training Progress", leave=True):
        epoch_start_time = time.time()

        train_loss, train_accuracy = train_one_epoch(model, train_loader, loss_fn, optimizer, device)
        val_loss, val_accuracy = validate(model, val_loader, loss_fn, device)

        history['train_loss'].append(train_loss)
        history['train_accuracy'].append(train_accuracy.item())
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy.item())

        epoch_end_time = time.time()

        # Use tqdm.write() instead of print() to avoid extra blank lines
        tqdm.write(f'Epoch {epoch+1}/{epochs}: Train loss: {train_loss:.4f}, Train accuracy: {train_accuracy:.4f}, '
                   f'Val loss: {val_loss:.4f}, Val accuracy: {val_accuracy:.4f}, '
                   f'Time: {(epoch_end_time - epoch_start_time):.2f}s')

    return model, history

In [20]:
num_epochs =10
trained_model, history = train_and_validate(model, train_dataloader, test_dataloader, loss_fn, optimizer, num_epochs)

NameError: name 'optimizer' is not defined