In [None]:
!rm -rf sample_data/  
from google.colab import drive
drive.mount('/content/drive')
!cp -r /content/drive/MyDrive/740_deeplearning/* /content/

# Training CIFAR-100 dataset 

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def test():
    net = ResNet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())


In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

# Define variables separately
batch_size = 128
test_batch_size = 128
epochs = 100
weight_decay = 2e-4
lr = 0.1
momentum = 0.9
no_cuda = False
seed = 1
model_dir = './model-cifar-ResNet18'

# Settings
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Setup data loader for CIFAR-100
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),])

transform_test = transforms.Compose([transforms.ToTensor(),])

trainset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train)
testset  = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)

# Train function
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    criterion = nn.CrossEntropyLoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # Calculate loss
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    return loss.item()

# Evaluation functions for train and test data
def eval_model(model, device, data_loader, loader_type, epoch):
    assert loader_type in ['train', 'test'], "loader_type must be either 'train' or 'test'"
    model.eval()
    data_loss = 0
    correct = 0
    criterion = nn.CrossEntropyLoss(reduction='sum')
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            data_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    data_loss /= len(data_loader.dataset)

    print('Epoch: {}, {}: Average loss: {:.4f}, Accuracy: {:.0f}%'.format(epoch, 
                                                                          loader_type.capitalize(), 
                                                                          data_loss,
                                                                          100. * correct / len(data_loader.dataset)))

    data_accuracy = correct / len(data_loader.dataset)
    return data_loss, data_accuracy


def plot_metrics(train_losses, train_accuracies, test_losses, test_accuracies, dataset_name):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(test_losses, label='Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Accuracy')
    plt.plot(test_accuracies, label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(os.path.join(model_dir, f'loss_accuracy_plot_{dataset_name}.png'))
    plt.show()


# Adjust learning rate function
def adjust_learning_rate(optimizer, epoch, initial_lr):
    lr = initial_lr
    if epoch >= 75:
        lr = initial_lr * 0.1
    if epoch >= 90:
        lr = initial_lr * 0.01
    if epoch >= 100:
        lr = initial_lr * 0.001
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def main():
    dataset_name = 'cifar100'
    print(f"Training on {dataset_name.upper()} dataset")
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, **kwargs)
    test_loader  = torch.utils.data.DataLoader(testset, batch_size=test_batch_size, shuffle=False, **kwargs)

    model = ResNet18().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)

    train_losses = []
    train_accuracies = []
    test_losses = []
    test_accuracies = []
    best_test_accuracy = 0

    for epoch in range(1, epochs + 1):

        adjust_learning_rate(optimizer, epoch, lr)

        # Training and updating losses and accuracies
        batch_train_loss = train(model, device, train_loader, optimizer, epoch)

        print('================================================================')
        avg_train_loss, train_accuracy = eval_model(model, device, train_loader, 'train', epoch)
        train_losses.append(avg_train_loss)
        train_accuracies.append(train_accuracy)

        # Testing and updating losses and accuracies
        avg_test_loss, test_accuracy = eval_model(model, device, test_loader, 'test', epoch)
        test_losses.append(avg_test_loss)
        test_accuracies.append(test_accuracy)
 

        # Save the model if the test accuracy is the best seen so far
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            best_epoch = epoch
            torch.save(model.state_dict(), os.path.join(model_dir, f'model-ResNet18-{dataset_name}-best_epoch.pt'))
            torch.save(optimizer.state_dict(), os.path.join(model_dir, f'opt-ResNet18-{dataset_name}-checkpoint_best_epoch.tar'))

    plot_metrics(train_losses, train_accuracies, test_losses, test_accuracies, dataset_name)

if __name__ == '__main__':
    main()
