# Homework 2, Step 2. CNN for CIFAR-100

In the step 2, you need to try CNN on the CIFAR-100 classification.

## 1. Prepare the dataset and the model
### Import Library

In [1]:
import os
import sys

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import time

import tqdm

### Hyperparameters
**For step 3, 4, 5,** you need to change the value of momentum, weight decay, data augmentation and batch normalization, to see the difference.

`mmt`: momentum for the optimizer. Use `0` if you do not want to use the momentum.

`wd`: weight decay for the optimizer. Use `0` if you do not want to use the weight decay.

`data_augmentation`: whether to use the data augmentation for the training.

`use_BN`: wheter to use the batch normalization for the training. 

In [2]:
lr = 0.05 # learning rate
opt = 'sgd'
batchsize = 256 # training batchsize

mmt = 0.9 # momentum for optimizer
wd = 0.001 # weight_decay for optimizer
data_augmentation = True
use_BN = False

### Load Dataset
In pytorch, you can use the following API to load the dataset.

The RGB mean and std are pre-calculated values for normalizing the data. **Do not modify them**.

In [3]:
rgb_mean = np.array([0.4914, 0.4822, 0.4465])
rgb_std = np.array([0.2023, 0.1994, 0.2010])
if data_augmentation:
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])
else:
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(rgb_mean, rgb_std),
])

trainset = torchvision.datasets.CIFAR100(
    root='../data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batchsize, shuffle=True, num_workers=1)

testset = torchvision.datasets.CIFAR100(
    root='../data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=500, shuffle=False, num_workers=1)

Files already downloaded and verified
Files already downloaded and verified


### Define the CNN Model

In [4]:
class Identity(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        return x

def _weights_init(m):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

def compute_feature_map_size(ks, pks):
    s = 32
    for k, pk in zip(ks, pks):
        s = s-k+1
        s = int(s/pk)
    return s

class LeNet(nn.Module):
    def __init__(self, num_classes, hidden_chns, ks, pks):
        super(LeNet, self).__init__()
        in_chns, out_chns = [3, *hidden_chns[:-1]], hidden_chns
        layers = []
        for ic, oc, k, pk in zip(in_chns, out_chns, ks, pks):
            layers.append(nn.Conv2d(ic, oc, kernel_size=k))
            if use_BN:
                layers.append(nn.BatchNorm2d(oc))
            layers.append(nn.MaxPool2d(kernel_size=pk) if pk>1 else Identity()) 
            layers.append(nn.ReLU(inplace=True))
        self.layers = nn.Sequential(*layers)

        s = compute_feature_map_size(ks, pks)
        print(f"feature size: {s}")

        self.fc = nn.Linear(hidden_chns[-1] * s * s, num_classes)

    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

def lenet(hidden_chns, ks, pks):
    return LeNet(num_classes=100, hidden_chns=hidden_chns, ks=ks, pks=pks)

## 2. Define the model and run

### Training settings
You may modify the `num_epochs` for the fast training or the better performance.

In [5]:
num_epochs = 20  # training epochs
best_acc = 0.0  # best accuracy

### Define the model, optimizer, loss function, learning rate scheduler
For step 2, 3, 4, 5, you need to change the network structure of the CNN.

`hidden_chns`: a list of the hidden channels of the conv layer.

`ks`: a list of the kernel sizes of the conv layer.

`pks`: a list of the pooling kernel sizes of the pooling layer. Use `1` if you do not want to use a pooling layer (and it will be an identity function).

In [6]:
net = lenet(hidden_chns=[32,64], ks=[3,3], pks=[2,2])
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
net = net.to(device)  # put the model on the specified device(e.g. gpu/cpu)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=mmt, weight_decay=wd) # momentum

# learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

feature size: 6
cpu


### Check the total number of parameters

In [7]:
def count_params(net):
    # you can use this function to count amount of your model parameters
    import numpy as np
    total_params = 0

    for x in filter(lambda p: p.requires_grad, net.parameters()):
        total_params += np.prod(x.data.cpu().numpy().shape)
    print("Total number of params", total_params)
    print("Total layers", len(list(filter(lambda p: p.requires_grad and len(p.data.size())>1, net.parameters()))))


count_params(net)

Total number of params 249892
Total layers 3


### Training logs
The training logs are saved in the `exp` folder. You can use tensorboard to see the logs.

In [8]:
if not os.path.exists("exp"):
    os.mkdir("exp")
last_train = max([eval(s.split("-")[-1]) for s in os.listdir("exp")] + [0])
current_train = last_train + 1
save_dir = "exp/cifar100-{}".format(current_train)
os.makedirs(save_dir)
writer = SummaryWriter(save_dir)

best_acc = 0

### Training and testing

In [9]:
# Training
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    with tqdm.tqdm(enumerate(trainloader), total=len(trainloader)) as t:
        t.set_description(f"Epoch {epoch} train")
        for batch_idx, (inputs, targets) in t:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

#             writer.add_scalars(
#                 "loss",
#                 {"train": loss.item()},
#                 global_step=epoch * len(trainloader) + batch_idx,
#             )

            t.set_postfix(
                {
                    "loss": f"{train_loss/(batch_idx+1):.3f}",
                    "acc": f"{100.*correct/total:.3f}%, {correct}/{total}",
                }
            )
    
    writer.add_scalars(
        "loss", {"train": train_loss/len(trainloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"train": correct/total},
        global_step=epoch * len(trainloader)
    )


# validation
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        with tqdm.tqdm(enumerate(testloader), total=len(testloader)) as t:
            t.set_description(f"Epoch {epoch}  test")
            for batch_idx, (inputs, targets) in t:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = net(inputs)
                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

                t.set_postfix(
                    {
                        "loss": f"{test_loss / (batch_idx + 1):.3f}",
                        "acc": f"{correct*100./total:.3f}%, {correct}/{total}",
                    }
                )

    writer.add_scalars(
        "loss", {"test": test_loss/len(testloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"test": correct/total},
        global_step=epoch * len(trainloader)
    )

    # Save checkpoint.
    acc = 100.0 * correct / total
    if acc > best_acc:
        print("Saving..")
        state = {
            "net": net.state_dict(),
            "acc": acc,
            "epoch": epoch,
        }
        torch.save(state, os.path.join(save_dir, "ckpt.pth"))
        best_acc = acc

    return acc


for epoch in range(0, num_epochs):
    tic = time.time()
    train(epoch)
    test_acc = test(epoch)
    t = time.time() - tic
    print(
        f"Epoch {epoch} | total time: {t:.0f}s, test acc: {test_acc:.3f}%, best acc: {best_acc:.3f}%"
    )
    scheduler.step()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Epoch 0 train: 100%|██████████| 196/196 [00:30<00:00,  6.35it/s, loss=3.791, acc=13.240%, 6620/50000]
Epoch 0  test: 100%|██████████| 20/20 [00:04<00:00,  4.52it/s, loss=3.316, acc=21.390%, 2139/10000]

Saving..
Epoch 0 | total time: 36s, test acc: 21.390%, best acc: 21.390%



Epoch 1 train: 100%|██████████| 196/196 [00:31<00:00,  6.23it/s, loss=3.178, acc=23.684%, 11842/50000]
Epoch 1  test: 100%|██████████| 20/20 [00:04<00:00,  4.43it/s, loss=2.869, acc=29.920%, 2992/10000]

Saving..
Epoch 1 | total time: 36s, test acc: 29.920%, best acc: 29.920%



Epoch 2 train: 100%|██████████| 196/196 [00:31<00:00,  6.22it/s, loss=2.925, acc=28.500%, 14250/50000]
Epoch 2  test: 100%|██████████| 20/20 [00:04<00:00,  4.53it/s, loss=2.704, acc=32.880%, 3288/10000]

Saving..
Epoch 2 | total time: 36s, test acc: 32.880%, best acc: 32.880%



Epoch 3 train: 100%|██████████| 196/196 [00:30<00:00,  6.34it/s, loss=2.803, acc=30.834%, 15417/50000]
Epoch 3  test: 100%|██████████| 20/20 [00:04<00:00,  4.87it/s, loss=2.620, acc=35.030%, 3503/10000]

Saving..
Epoch 3 | total time: 36s, test acc: 35.030%, best acc: 35.030%



Epoch 4 train: 100%|██████████| 196/196 [00:31<00:00,  6.26it/s, loss=2.705, acc=32.784%, 16392/50000]
Epoch 4  test: 100%|██████████| 20/20 [00:04<00:00,  4.22it/s, loss=2.504, acc=36.800%, 3680/10000]

Saving..
Epoch 4 | total time: 36s, test acc: 36.800%, best acc: 36.800%



Epoch 5 train: 100%|██████████| 196/196 [00:30<00:00,  6.33it/s, loss=2.639, acc=34.140%, 17070/50000]
Epoch 5  test: 100%|██████████| 20/20 [00:04<00:00,  4.24it/s, loss=2.473, acc=37.020%, 3702/10000]

Saving..
Epoch 5 | total time: 36s, test acc: 37.020%, best acc: 37.020%



Epoch 6 train: 100%|██████████| 196/196 [00:32<00:00,  6.11it/s, loss=2.586, acc=35.372%, 17686/50000]
Epoch 6  test: 100%|██████████| 20/20 [00:04<00:00,  4.24it/s, loss=2.483, acc=37.730%, 3773/10000]

Saving..
Epoch 6 | total time: 37s, test acc: 37.730%, best acc: 37.730%



Epoch 7 train: 100%|██████████| 196/196 [00:31<00:00,  6.25it/s, loss=2.547, acc=36.090%, 18045/50000]
Epoch 7  test: 100%|██████████| 20/20 [00:04<00:00,  4.08it/s, loss=2.402, acc=39.610%, 3961/10000]

Saving..
Epoch 7 | total time: 36s, test acc: 39.610%, best acc: 39.610%



Epoch 8 train: 100%|██████████| 196/196 [00:30<00:00,  6.37it/s, loss=2.517, acc=36.442%, 18221/50000]
Epoch 8  test: 100%|██████████| 20/20 [00:04<00:00,  4.95it/s, loss=2.429, acc=39.010%, 3901/10000]

Epoch 8 | total time: 35s, test acc: 39.010%, best acc: 39.610%



Epoch 9 train: 100%|██████████| 196/196 [00:31<00:00,  6.29it/s, loss=2.459, acc=37.944%, 18972/50000]
Epoch 9  test: 100%|██████████| 20/20 [00:03<00:00,  5.18it/s, loss=2.352, acc=39.910%, 3991/10000]

Saving..
Epoch 9 | total time: 35s, test acc: 39.910%, best acc: 39.910%



Epoch 10 train: 100%|██████████| 196/196 [00:31<00:00,  6.26it/s, loss=2.443, acc=38.094%, 19047/50000]
Epoch 10  test: 100%|██████████| 20/20 [00:04<00:00,  4.83it/s, loss=2.316, acc=40.950%, 4095/10000]

Saving..
Epoch 10 | total time: 36s, test acc: 40.950%, best acc: 40.950%



Epoch 11 train: 100%|██████████| 196/196 [00:31<00:00,  6.28it/s, loss=2.417, acc=38.544%, 19272/50000]
Epoch 11  test: 100%|██████████| 20/20 [00:04<00:00,  4.47it/s, loss=2.360, acc=40.010%, 4001/10000]

Epoch 11 | total time: 36s, test acc: 40.010%, best acc: 40.950%



Epoch 12 train: 100%|██████████| 196/196 [00:30<00:00,  6.41it/s, loss=2.406, acc=38.836%, 19418/50000]
Epoch 12  test: 100%|██████████| 20/20 [00:03<00:00,  5.02it/s, loss=2.272, acc=42.780%, 4278/10000]

Saving..
Epoch 12 | total time: 35s, test acc: 42.780%, best acc: 42.780%



Epoch 13 train: 100%|██████████| 196/196 [00:30<00:00,  6.35it/s, loss=2.395, acc=39.042%, 19521/50000]
Epoch 13  test: 100%|██████████| 20/20 [00:04<00:00,  4.75it/s, loss=2.268, acc=42.200%, 4220/10000]

Epoch 13 | total time: 36s, test acc: 42.200%, best acc: 42.780%



Epoch 14 train: 100%|██████████| 196/196 [00:30<00:00,  6.33it/s, loss=2.363, acc=39.850%, 19925/50000]
Epoch 14  test: 100%|██████████| 20/20 [00:03<00:00,  5.09it/s, loss=2.273, acc=42.120%, 4212/10000]

Epoch 14 | total time: 35s, test acc: 42.120%, best acc: 42.780%



Epoch 15 train: 100%|██████████| 196/196 [00:31<00:00,  6.26it/s, loss=2.339, acc=40.194%, 20097/50000]
Epoch 15  test: 100%|██████████| 20/20 [00:03<00:00,  5.10it/s, loss=2.270, acc=41.980%, 4198/10000]

Epoch 15 | total time: 36s, test acc: 41.980%, best acc: 42.780%



Epoch 16 train: 100%|██████████| 196/196 [00:31<00:00,  6.24it/s, loss=2.359, acc=40.118%, 20059/50000]
Epoch 16  test: 100%|██████████| 20/20 [00:03<00:00,  5.10it/s, loss=2.257, acc=43.050%, 4305/10000]

Saving..
Epoch 16 | total time: 36s, test acc: 43.050%, best acc: 43.050%



Epoch 17 train: 100%|██████████| 196/196 [00:31<00:00,  6.27it/s, loss=2.336, acc=40.420%, 20210/50000]
Epoch 17  test: 100%|██████████| 20/20 [00:04<00:00,  4.43it/s, loss=2.208, acc=43.820%, 4382/10000]

Saving..
Epoch 17 | total time: 36s, test acc: 43.820%, best acc: 43.820%



Epoch 18 train: 100%|██████████| 196/196 [00:30<00:00,  6.33it/s, loss=2.317, acc=40.784%, 20392/50000]
Epoch 18  test: 100%|██████████| 20/20 [00:03<00:00,  5.16it/s, loss=2.231, acc=43.070%, 4307/10000]

Epoch 18 | total time: 35s, test acc: 43.070%, best acc: 43.820%



Epoch 19 train: 100%|██████████| 196/196 [00:31<00:00,  6.29it/s, loss=2.302, acc=41.090%, 20545/50000]
Epoch 19  test: 100%|██████████| 20/20 [00:04<00:00,  4.42it/s, loss=2.174, acc=44.250%, 4425/10000]

Saving..
Epoch 19 | total time: 36s, test acc: 44.250%, best acc: 44.250%



