In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import sklearn

def plot_train_val_loss(train_losses, val_losses):
    fig, ax = plt.subplots()
    ax.plot(train_losses, label='Train Loss')
    ax.plot(val_losses, label='Val Loss')
    ax.legend()
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')
    ax.set_title('Train/Val Loss')
    plt.show()

def plot_train_val_acc(train_accs, val_accs):
    fig, ax = plt.subplots()
    ax.plot(train_accs, label='Train Acc')
    ax.plot(val_accs, label='Val Acc')
    ax.legend()
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Accuracy')
    ax.set_title('Train/Val Accuracy')
    plt.show()


class Network(nn.Module):
    def __init__(self, depth=28, widen_factor=10, dropout_rate=0.3, num_classes=10):
        super(Network, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert ((depth-4)%6 ==0), "depth should be 6n+4"
        n = (depth-4)//6
        BasicBlock = self.BasicBlock
        self.conv1 = nn.Conv2d(1, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.block1 = nn.ModuleList([BasicBlock(nChannels[0], nChannels[1], stride=1, dropRate=dropout_rate) for i in range(n)])
        self.block2 = nn.ModuleList([BasicBlock(nChannels[1], nChannels[2], stride=2, dropRate=dropout_rate) for i in range(n)])
        self.block3 = nn.ModuleList([BasicBlock(nChannels[2], nChannels[3], stride=2, dropRate=dropout_rate) for i in range(n)])
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(nChannels[3], num_classes)
    
    class BasicBlock(nn.Module):
        def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
            super(Network.BasicBlock, self).__init__()
            self.bn1 = nn.BatchNorm2d(in_planes)
            self.relu1 = nn.ReLU(inplace=True)
            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(out_planes)
            self.relu2 = nn.ReLU(inplace=True)
            self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
            self.droprate = dropRate
            self.equal_in_out = (in_planes==out_planes)
            self.convShortcut = (not self.equal_in_out) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None
    
        def forward(self, x):
            if not self.equal_in_out:
                x = self.relu1(self.bn1(x))
            else:
                out = self.relu1(self.bn1(x))
            out = self.relu2(self.bn2(self.conv1(out if self.equal_in_out else x)))
            if self.droprate > 0:
                out = F.dropout(out, p=self.droprate, training=self.training)
            out = self.conv2(out)
            return torch.add(x if self.equal_in_out else self.convShortcut(x), out)
    
    def forward(self, x):
        out = self.conv1(x)
        for i in range(len(self.block1)):
            out = self.block1[i](out)
        for i in range(len(self.block2)):
            out = self.block2[i](out)
        for i in range(len(self.block3)):
            out = self.block3[i](out)
        out = self.relu(self.bn1(out))
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

def predict(model, dataloader):
    """Make predictions on a dataset using a trained model"""
    device = next(model.parameters()).device
    model.eval()
    preds = []
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            preds.extend(predicted.cpu())
    return torch.stack(preds)


def train(model, loader, criterion, optimizer, epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(loader):
        inputs, targets = inputs.to("mps"), targets.to("mps")
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(inputs)}/{len(loader.dataset)} '
                  f'({100. * batch_idx / len(loader):.0f}%)]\tLoss: {loss.item():.6f}')
            
    train_acc = 100.*correct/total
    print(f'Train Epoch: {epoch} Loss: {train_loss/len(loader):.6f} | Acc: {train_acc:.2f}%')
    return train_loss, train_acc
    

def test(model, loader, criterion, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(loader):
            inputs, targets = inputs.to("mps"), targets.to("mps")
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            if batch_idx % 100 == 0:
                print(f'Test Epoch: {epoch} [{batch_idx * len(inputs)}/{len(loader.dataset)} '
                      f'({100. * batch_idx / len(loader):.0f}%)]\tLoss: {loss.item():.6f}')

    val_acc = 100.*correct/total
    print(f'Test Epoch: {epoch} Loss: {test_loss/len(loader):.6f} | Acc: {val_acc:.2f}%')
    return test_loss, val_acc


In [2]:
if __name__ == '__main__':
    # Set seed for reproducibility
    SEED = 1234
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    # Define hyperparameters
    num_epochs = 100
    batch_size = 128
    learning_rate = 0.001
    weight_decay = 5e-4

    # Prepare data
    transform_train = transforms.Compose([
        transforms.RandomCrop(28, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.2860,), std=(0.3530,))
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.2860,), std=(0.3530,))
    ])

    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Define model, loss function, and optimizer
    device = torch.device("cuda")
    model = Network(depth=28, widen_factor=10, dropout_rate=0.3, num_classes=10)
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # Train model
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    for epoch in range(num_epochs):
        train_loss, train_acc = train(model, trainloader, criterion, optimizer, epoch)
        test_loss, val_acc = test(model, testloader, criterion, epoch)

        train_losses.append(train_loss)
        val_losses.append(test_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

    plot_train_val_loss(train_losses, val_losses)
    plot_train_val_acc(train_accs, val_accs)

    # Calculate and print classification report
    y_true = testset.targets.numpy()
    y_pred = predict(model, testloader).numpy()
    report = sklearn.metrics.classification_report(y_true, y_pred)
    print(report)

    print('Finished Training')
    

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [02:42<00:00, 162332.53it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 116876.61it/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:04<00:00, 919286.36it/s] 


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 5160678.06it/s]


Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw



RuntimeError: PyTorch is not linked with support for mps devices