# Homework 3, ResNet for CIFAR-100


## 1. Prepare the dataset and the model
### Import Library

In [1]:
import os
import sys

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import time

import tqdm

### Hyperparameters
**For step 1, 2,** you may change the value of momentum, weight decay, data augmentation and batch normalization.

`mmt`: momentum for the optimizer. Use `0` if you do not want to use the momentum.

`wd`: weight decay for the optimizer. Use `0` if you do not want to use the weight decay.

`data_augmentation`: whether to use the data augmentation for the training.

`use_BN`: wheter to use the batch normalization for the training. 

In [2]:
lr = 0.1  # learning rate
opt = 'sgd'
batchsize = 256  # training batchsize

mmt = 0.9  # momentum for optimizer
wd = 5e-4  # weight_decay for optimizer
data_augmentation = True
use_BN = True

### Load Dataset
In pytorch, you can use the following API to load the dataset.

The RGB mean and std are pre-calculated values for normalizing the data. **Do not modify them**.

In [3]:
rgb_mean = np.array([0.4914, 0.4822, 0.4465])
rgb_std = np.array([0.2023, 0.1994, 0.2010])
if data_augmentation:
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])
else:
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(rgb_mean, rgb_std),
])

trainset = torchvision.datasets.CIFAR100(
    root='../data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batchsize, shuffle=True, num_workers=1)

testset = torchvision.datasets.CIFAR100(
    root='../data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=500, shuffle=False, num_workers=1)

Files already downloaded and verified
Files already downloaded and verified


### Define the CNN Model

In [4]:
class Identity(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        return x

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

def compute_feature_map_size(ks, pks):
    s = 32
    for k, pk in zip(ks, pks):
        s = s-k+3
        s = int(s/pk)
    return s

class LeNet(nn.Module):
    def __init__(self, num_classes, hidden_chns, ks, pks):
        super(LeNet, self).__init__()
        in_chns, out_chns = [3, *hidden_chns[:-1]], hidden_chns
        layers = []
        for ic, oc, k, pk in zip(in_chns, out_chns, ks, pks):
            layers.append(nn.Conv2d(ic, oc, kernel_size=k, stride=1, padding=1))
            if use_BN:
                layers.append(nn.BatchNorm2d(oc))
            layers.append(nn.MaxPool2d(kernel_size=pk, stride=2) if pk>1 else Identity()) 
            layers.append(nn.ReLU(inplace=True))
        self.layers = nn.Sequential(*layers)

        s = compute_feature_map_size(ks, pks)
        print(f"feature size: {s}")

#         self.fc = nn.Linear(hidden_chns[-1] * s * s, num_classes)
        self.fc = nn.Sequential(
            nn.Linear(hidden_chns[-1] * s * s, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(128, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes) if use_BN else Identity()
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes) if use_BN else Identity()

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            """
            For CIFAR10 ResNet paper uses option A.
            """
            self.shortcut = LambdaLayer(lambda x:
                                        F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, channels, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = channels[0]

        self.conv1 = nn.Conv2d(3, channels[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels[0]) if use_BN else Identity()
        self.layer1 = self._make_layer(block, channels[0], num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, channels[1], num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, channels[2], num_blocks[2], stride=2)
        self.linear = nn.Linear(channels[2], num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def lenet(hidden_chns, ks, pks):
    return LeNet(num_classes=100, hidden_chns=hidden_chns, ks=ks, pks=pks)


def resnet20(channels):
    return ResNet(BasicBlock, [3, 3, 3], channels, num_classes=100)


def resnet32(channels):
    return ResNet(BasicBlock, [5, 5, 5], channels, num_classes=100)


def resnet44(channels):
    return ResNet(BasicBlock, [7, 7, 7], channels, num_classes=100)


def resnet56(channels):
    return ResNet(BasicBlock, [9, 9, 9], channels, num_classes=100)


## 2. Define the model and run

### Training settings
You may modify the `num_epochs` for the fast training or the better performance.

In [5]:
num_epochs = 50  # training epochs
best_acc = 0.0  # best accuracy

### Define the model, optimizer, loss function, learning rate scheduler
For step 1, 2, you need to change the network structure of the CNN.

For CNN:

  - `hidden_chns`: a list of the hidden channels of the conv layer.

  - `ks`: a list of the kernel sizes of the conv layer.

  - `pks`: a list of the pooling kernel sizes of the pooling layer. Use `1` if you do not want to use a pooling layer (and it will be an identity function).

For ResNet:

  - You can use resnet20, 32, 44 and 56 (which are the default settings for the original ResNet).
 
  - `channels`: a list of the hidden channels of the resblock.

In [6]:
## For Step 1
net = lenet(
    hidden_chns=[32],
    ks=[3],
    pks=[2]
)

## For Step 2
# net = resnet20(channels=[16, 32, 64])

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
net = net.to(device)  # put the model on the specified device(e.g. gpu/cpu)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=mmt, weight_decay=wd) # momentum

# learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

feature size: 16
cuda


### Check the total number of parameters

In [7]:
def count_params(net):
    # you can use this function to count amount of your model parameters
    import numpy as np
    total_params = 0

    for x in filter(lambda p: p.requires_grad, net.parameters()):
        total_params += np.prod(x.data.cpu().numpy().shape)
    print("Total number of params", total_params)
    print("Total layers", len(list(filter(lambda p: p.requires_grad and len(p.data.size())>1, net.parameters()))))


count_params(net)

Total number of params 1079076
Total layers 4


### Training logs
The training logs are saved in the `exp` folder. You can use tensorboard to see the logs.

In [8]:
if not os.path.exists("exp"):
    os.mkdir("exp")
last_train = max([eval(s.split("-")[-1]) for s in os.listdir("exp")] + [0])
current_train = last_train + 1
save_dir = "exp/cifar100-{}".format(current_train)
os.makedirs(save_dir)
writer = SummaryWriter(save_dir)

best_acc = 0

### Training and testing

In [9]:
# Training
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    with tqdm.tqdm(enumerate(trainloader), total=len(trainloader)) as t:
        t.set_description(f"Epoch {epoch} train")
        for batch_idx, (inputs, targets) in t:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            t.set_postfix(
                {
                    "loss": f"{train_loss/(batch_idx+1):.3f}",
                    "acc": f"{100.*correct/total:.3f}%, {correct}/{total}",
                }
            )

    writer.add_scalars(
        "loss", {"train": train_loss / len(trainloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"train": correct/total},
        global_step=epoch
    )


# validation
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        with tqdm.tqdm(enumerate(testloader), total=len(testloader)) as t:
            t.set_description(f"Epoch {epoch}  test")
            for batch_idx, (inputs, targets) in t:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = net(inputs)
                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

                t.set_postfix(
                    {
                        "loss": f"{test_loss / (batch_idx + 1):.3f}",
                        "acc": f"{correct*100./total:.3f}%, {correct}/{total}",
                    }
                )

    writer.add_scalars(
        "loss", {"test": test_loss/len(testloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"test": correct/total},
        global_step=epoch
    )

    # Save checkpoint.
    acc = 100.0 * correct / total
    if acc > best_acc:
        print("Saving..")
        state = {
            "net": net.state_dict(),
            "acc": acc,
            "epoch": epoch,
        }
        torch.save(state, os.path.join(save_dir, "ckpt.pth"))
        best_acc = acc

    return acc


for epoch in range(0, num_epochs):
    tic = time.time()
    train(epoch)
    test_acc = test(epoch)
    t = time.time() - tic
    print(
        f"Epoch {epoch} | total time: {t:.0f}s, test acc: {test_acc:.3f}%, best acc: {best_acc:.3f}%"
    )
    scheduler.step()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Epoch 0 train: 100%|██████████| 196/196 [00:16<00:00, 11.91it/s, loss=4.448, acc=2.670%, 1335/50000]
Epoch 0  test: 100%|██████████| 20/20 [00:02<00:00,  6.73it/s, loss=4.233, acc=4.700%, 470/10000]

Saving..
Epoch 0 | total time: 20s, test acc: 4.700%, best acc: 4.700%



Epoch 1 train: 100%|██████████| 196/196 [00:16<00:00, 12.24it/s, loss=4.304, acc=3.702%, 1851/50000]
Epoch 1  test: 100%|██████████| 20/20 [00:02<00:00,  8.40it/s, loss=4.077, acc=6.610%, 661/10000]

Saving..
Epoch 1 | total time: 19s, test acc: 6.610%, best acc: 6.610%



Epoch 2 train: 100%|██████████| 196/196 [00:16<00:00, 11.63it/s, loss=4.196, acc=4.450%, 2225/50000]
Epoch 2  test: 100%|██████████| 20/20 [00:03<00:00,  6.47it/s, loss=4.019, acc=8.160%, 816/10000]

Saving..
Epoch 2 | total time: 20s, test acc: 8.160%, best acc: 8.160%



Epoch 3 train: 100%|██████████| 196/196 [00:16<00:00, 11.67it/s, loss=4.140, acc=5.082%, 2541/50000]
Epoch 3  test: 100%|██████████| 20/20 [00:02<00:00,  6.72it/s, loss=3.981, acc=8.470%, 847/10000]

Saving..
Epoch 3 | total time: 20s, test acc: 8.470%, best acc: 8.470%



Epoch 4 train: 100%|██████████| 196/196 [00:16<00:00, 11.74it/s, loss=4.108, acc=5.486%, 2743/50000]
Epoch 4  test: 100%|██████████| 20/20 [00:02<00:00,  7.43it/s, loss=3.951, acc=8.730%, 873/10000]

Saving..
Epoch 4 | total time: 20s, test acc: 8.730%, best acc: 8.730%



Epoch 5 train: 100%|██████████| 196/196 [00:16<00:00, 11.86it/s, loss=4.079, acc=5.884%, 2942/50000]
Epoch 5  test: 100%|██████████| 20/20 [00:03<00:00,  6.45it/s, loss=3.919, acc=9.120%, 912/10000]

Saving..
Epoch 5 | total time: 20s, test acc: 9.120%, best acc: 9.120%



Epoch 6 train: 100%|██████████| 196/196 [00:16<00:00, 12.13it/s, loss=4.055, acc=6.398%, 3199/50000]
Epoch 6  test: 100%|██████████| 20/20 [00:03<00:00,  6.24it/s, loss=3.874, acc=10.050%, 1005/10000]

Saving..
Epoch 6 | total time: 20s, test acc: 10.050%, best acc: 10.050%



Epoch 7 train: 100%|██████████| 196/196 [00:16<00:00, 11.67it/s, loss=4.018, acc=6.742%, 3371/50000]
Epoch 7  test: 100%|██████████| 20/20 [00:02<00:00,  8.16it/s, loss=3.787, acc=10.910%, 1091/10000]

Saving..
Epoch 7 | total time: 20s, test acc: 10.910%, best acc: 10.910%



Epoch 8 train: 100%|██████████| 196/196 [00:16<00:00, 11.73it/s, loss=4.002, acc=7.080%, 3540/50000]
Epoch 8  test: 100%|██████████| 20/20 [00:02<00:00,  6.80it/s, loss=3.785, acc=11.930%, 1193/10000]

Saving..
Epoch 8 | total time: 20s, test acc: 11.930%, best acc: 11.930%



Epoch 9 train: 100%|██████████| 196/196 [00:16<00:00, 11.89it/s, loss=3.984, acc=7.366%, 3683/50000]
Epoch 9  test: 100%|██████████| 20/20 [00:02<00:00,  6.71it/s, loss=3.758, acc=11.850%, 1185/10000]

Epoch 9 | total time: 20s, test acc: 11.850%, best acc: 11.930%



Epoch 10 train: 100%|██████████| 196/196 [00:16<00:00, 11.61it/s, loss=3.966, acc=7.690%, 3845/50000]
Epoch 10  test: 100%|██████████| 20/20 [00:03<00:00,  6.63it/s, loss=3.715, acc=14.410%, 1441/10000]

Saving..
Epoch 10 | total time: 20s, test acc: 14.410%, best acc: 14.410%



Epoch 11 train: 100%|██████████| 196/196 [00:16<00:00, 11.65it/s, loss=3.939, acc=7.966%, 3983/50000]
Epoch 11  test: 100%|██████████| 20/20 [00:03<00:00,  6.32it/s, loss=3.757, acc=12.210%, 1221/10000]

Epoch 11 | total time: 20s, test acc: 12.210%, best acc: 14.410%



Epoch 12 train: 100%|██████████| 196/196 [00:16<00:00, 11.93it/s, loss=3.946, acc=8.014%, 4007/50000]
Epoch 12  test: 100%|██████████| 20/20 [00:03<00:00,  6.57it/s, loss=3.753, acc=12.560%, 1256/10000]

Epoch 12 | total time: 20s, test acc: 12.560%, best acc: 14.410%



Epoch 13 train: 100%|██████████| 196/196 [00:16<00:00, 11.65it/s, loss=3.935, acc=8.022%, 4011/50000]
Epoch 13  test: 100%|██████████| 20/20 [00:03<00:00,  6.63it/s, loss=3.696, acc=13.720%, 1372/10000]

Epoch 13 | total time: 20s, test acc: 13.720%, best acc: 14.410%



Epoch 14 train: 100%|██████████| 196/196 [00:16<00:00, 11.63it/s, loss=3.933, acc=8.300%, 4150/50000]
Epoch 14  test: 100%|██████████| 20/20 [00:03<00:00,  6.52it/s, loss=3.753, acc=11.900%, 1190/10000]

Epoch 14 | total time: 20s, test acc: 11.900%, best acc: 14.410%



Epoch 15 train: 100%|██████████| 196/196 [00:16<00:00, 11.73it/s, loss=3.937, acc=8.262%, 4131/50000]
Epoch 15  test: 100%|██████████| 20/20 [00:03<00:00,  6.41it/s, loss=3.706, acc=13.620%, 1362/10000]

Epoch 15 | total time: 20s, test acc: 13.620%, best acc: 14.410%



Epoch 16 train: 100%|██████████| 196/196 [00:16<00:00, 11.70it/s, loss=3.927, acc=8.606%, 4303/50000]
Epoch 16  test: 100%|██████████| 20/20 [00:02<00:00,  7.66it/s, loss=3.679, acc=13.430%, 1343/10000]

Epoch 16 | total time: 20s, test acc: 13.430%, best acc: 14.410%



Epoch 17 train: 100%|██████████| 196/196 [00:16<00:00, 11.88it/s, loss=3.914, acc=8.854%, 4427/50000]
Epoch 17  test: 100%|██████████| 20/20 [00:03<00:00,  6.45it/s, loss=3.631, acc=14.800%, 1480/10000]

Saving..
Epoch 17 | total time: 20s, test acc: 14.800%, best acc: 14.800%



Epoch 18 train: 100%|██████████| 196/196 [00:16<00:00, 11.69it/s, loss=3.904, acc=8.656%, 4328/50000]
Epoch 18  test: 100%|██████████| 20/20 [00:03<00:00,  6.44it/s, loss=3.657, acc=14.670%, 1467/10000]

Epoch 18 | total time: 20s, test acc: 14.670%, best acc: 14.800%



Epoch 19 train: 100%|██████████| 196/196 [00:16<00:00, 11.86it/s, loss=3.896, acc=8.958%, 4479/50000]
Epoch 19  test: 100%|██████████| 20/20 [00:03<00:00,  6.48it/s, loss=3.687, acc=13.680%, 1368/10000]

Epoch 19 | total time: 20s, test acc: 13.680%, best acc: 14.800%



Epoch 20 train: 100%|██████████| 196/196 [00:16<00:00, 11.78it/s, loss=3.910, acc=8.844%, 4422/50000]
Epoch 20  test: 100%|██████████| 20/20 [00:03<00:00,  6.14it/s, loss=3.652, acc=15.070%, 1507/10000]

Saving..
Epoch 20 | total time: 20s, test acc: 15.070%, best acc: 15.070%



Epoch 21 train: 100%|██████████| 196/196 [00:16<00:00, 11.64it/s, loss=3.873, acc=9.234%, 4617/50000]
Epoch 21  test: 100%|██████████| 20/20 [00:02<00:00,  7.37it/s, loss=3.663, acc=13.960%, 1396/10000]

Epoch 21 | total time: 20s, test acc: 13.960%, best acc: 15.070%



Epoch 22 train: 100%|██████████| 196/196 [00:16<00:00, 11.62it/s, loss=3.879, acc=9.212%, 4606/50000]
Epoch 22  test: 100%|██████████| 20/20 [00:03<00:00,  6.61it/s, loss=3.576, acc=16.350%, 1635/10000]

Saving..
Epoch 22 | total time: 20s, test acc: 16.350%, best acc: 16.350%



Epoch 23 train: 100%|██████████| 196/196 [00:16<00:00, 11.69it/s, loss=3.878, acc=9.532%, 4766/50000]
Epoch 23  test: 100%|██████████| 20/20 [00:03<00:00,  6.28it/s, loss=3.663, acc=14.150%, 1415/10000]

Epoch 23 | total time: 20s, test acc: 14.150%, best acc: 16.350%



Epoch 24 train: 100%|██████████| 196/196 [00:16<00:00, 12.01it/s, loss=3.864, acc=9.220%, 4610/50000]
Epoch 24  test: 100%|██████████| 20/20 [00:03<00:00,  6.55it/s, loss=3.632, acc=14.260%, 1426/10000]

Epoch 24 | total time: 20s, test acc: 14.260%, best acc: 16.350%



Epoch 25 train: 100%|██████████| 196/196 [00:16<00:00, 11.63it/s, loss=3.865, acc=9.506%, 4753/50000]
Epoch 25  test: 100%|██████████| 20/20 [00:03<00:00,  6.63it/s, loss=3.584, acc=15.350%, 1535/10000]

Epoch 25 | total time: 20s, test acc: 15.350%, best acc: 16.350%



Epoch 26 train: 100%|██████████| 196/196 [00:16<00:00, 11.96it/s, loss=3.854, acc=9.812%, 4906/50000]
Epoch 26  test: 100%|██████████| 20/20 [00:03<00:00,  6.66it/s, loss=3.629, acc=13.920%, 1392/10000]

Epoch 26 | total time: 20s, test acc: 13.920%, best acc: 16.350%



Epoch 27 train: 100%|██████████| 196/196 [00:16<00:00, 11.64it/s, loss=3.845, acc=9.748%, 4874/50000]
Epoch 27  test: 100%|██████████| 20/20 [00:03<00:00,  6.58it/s, loss=3.571, acc=16.440%, 1644/10000]

Saving..
Epoch 27 | total time: 20s, test acc: 16.440%, best acc: 16.440%



Epoch 28 train: 100%|██████████| 196/196 [00:16<00:00, 12.02it/s, loss=3.853, acc=9.840%, 4920/50000]
Epoch 28  test: 100%|██████████| 20/20 [00:03<00:00,  6.32it/s, loss=3.530, acc=16.370%, 1637/10000]

Epoch 28 | total time: 20s, test acc: 16.370%, best acc: 16.440%



Epoch 29 train: 100%|██████████| 196/196 [00:16<00:00, 12.14it/s, loss=3.845, acc=9.668%, 4834/50000]
Epoch 29  test: 100%|██████████| 20/20 [00:03<00:00,  6.58it/s, loss=3.527, acc=16.480%, 1648/10000]

Saving..
Epoch 29 | total time: 20s, test acc: 16.480%, best acc: 16.480%



Epoch 30 train: 100%|██████████| 196/196 [00:16<00:00, 12.14it/s, loss=3.834, acc=10.156%, 5078/50000]
Epoch 30  test: 100%|██████████| 20/20 [00:03<00:00,  6.12it/s, loss=3.468, acc=17.160%, 1716/10000]

Saving..
Epoch 30 | total time: 20s, test acc: 17.160%, best acc: 17.160%



Epoch 31 train: 100%|██████████| 196/196 [00:16<00:00, 11.72it/s, loss=3.848, acc=9.664%, 4832/50000]
Epoch 31  test: 100%|██████████| 20/20 [00:03<00:00,  6.61it/s, loss=3.521, acc=17.400%, 1740/10000]

Saving..
Epoch 31 | total time: 20s, test acc: 17.400%, best acc: 17.400%



Epoch 32 train: 100%|██████████| 196/196 [00:16<00:00, 11.67it/s, loss=3.825, acc=10.080%, 5040/50000]
Epoch 32  test: 100%|██████████| 20/20 [00:02<00:00,  7.99it/s, loss=3.515, acc=17.030%, 1703/10000]

Epoch 32 | total time: 20s, test acc: 17.030%, best acc: 17.400%



Epoch 33 train: 100%|██████████| 196/196 [00:16<00:00, 12.16it/s, loss=3.842, acc=10.050%, 5025/50000]
Epoch 33  test: 100%|██████████| 20/20 [00:03<00:00,  6.60it/s, loss=3.529, acc=16.680%, 1668/10000]

Epoch 33 | total time: 20s, test acc: 16.680%, best acc: 17.400%



Epoch 34 train: 100%|██████████| 196/196 [00:16<00:00, 11.88it/s, loss=3.821, acc=10.454%, 5227/50000]
Epoch 34  test: 100%|██████████| 20/20 [00:03<00:00,  6.52it/s, loss=3.505, acc=17.140%, 1714/10000]

Epoch 34 | total time: 20s, test acc: 17.140%, best acc: 17.400%



Epoch 35 train: 100%|██████████| 196/196 [00:16<00:00, 11.86it/s, loss=3.807, acc=10.632%, 5316/50000]
Epoch 35  test: 100%|██████████| 20/20 [00:02<00:00,  6.68it/s, loss=3.605, acc=15.090%, 1509/10000]

Epoch 35 | total time: 20s, test acc: 15.090%, best acc: 17.400%



Epoch 36 train: 100%|██████████| 196/196 [00:16<00:00, 11.69it/s, loss=3.823, acc=10.482%, 5241/50000]
Epoch 36  test: 100%|██████████| 20/20 [00:03<00:00,  6.63it/s, loss=3.504, acc=16.770%, 1677/10000]

Epoch 36 | total time: 20s, test acc: 16.770%, best acc: 17.400%



Epoch 37 train: 100%|██████████| 196/196 [00:16<00:00, 11.75it/s, loss=3.803, acc=10.620%, 5310/50000]
Epoch 37  test: 100%|██████████| 20/20 [00:03<00:00,  6.62it/s, loss=3.480, acc=17.340%, 1734/10000]

Epoch 37 | total time: 20s, test acc: 17.340%, best acc: 17.400%



Epoch 38 train: 100%|██████████| 196/196 [00:16<00:00, 11.66it/s, loss=3.801, acc=10.516%, 5258/50000]
Epoch 38  test: 100%|██████████| 20/20 [00:02<00:00,  7.46it/s, loss=3.464, acc=17.520%, 1752/10000]

Saving..
Epoch 38 | total time: 20s, test acc: 17.520%, best acc: 17.520%



Epoch 39 train: 100%|██████████| 196/196 [00:16<00:00, 12.12it/s, loss=3.803, acc=10.634%, 5317/50000]
Epoch 39  test: 100%|██████████| 20/20 [00:03<00:00,  6.40it/s, loss=3.490, acc=16.660%, 1666/10000]

Epoch 39 | total time: 20s, test acc: 16.660%, best acc: 17.520%



Epoch 40 train: 100%|██████████| 196/196 [00:16<00:00, 11.67it/s, loss=3.797, acc=10.518%, 5259/50000]
Epoch 40  test: 100%|██████████| 20/20 [00:02<00:00,  6.95it/s, loss=3.531, acc=18.210%, 1821/10000]

Saving..
Epoch 40 | total time: 20s, test acc: 18.210%, best acc: 18.210%



Epoch 41 train: 100%|██████████| 196/196 [00:16<00:00, 11.82it/s, loss=3.793, acc=10.700%, 5350/50000]
Epoch 41  test: 100%|██████████| 20/20 [00:03<00:00,  6.59it/s, loss=3.459, acc=18.510%, 1851/10000]

Saving..
Epoch 41 | total time: 20s, test acc: 18.510%, best acc: 18.510%



Epoch 42 train: 100%|██████████| 196/196 [00:16<00:00, 11.89it/s, loss=3.788, acc=10.796%, 5398/50000]
Epoch 42  test: 100%|██████████| 20/20 [00:03<00:00,  6.50it/s, loss=3.545, acc=17.060%, 1706/10000]

Epoch 42 | total time: 20s, test acc: 17.060%, best acc: 18.510%



Epoch 43 train: 100%|██████████| 196/196 [00:16<00:00, 11.77it/s, loss=3.775, acc=10.654%, 5327/50000]
Epoch 43  test: 100%|██████████| 20/20 [00:02<00:00,  6.73it/s, loss=3.507, acc=17.690%, 1769/10000]

Epoch 43 | total time: 20s, test acc: 17.690%, best acc: 18.510%



Epoch 44 train: 100%|██████████| 196/196 [00:16<00:00, 11.64it/s, loss=3.783, acc=10.932%, 5466/50000]
Epoch 44  test: 100%|██████████| 20/20 [00:03<00:00,  6.54it/s, loss=3.466, acc=18.400%, 1840/10000]

Epoch 44 | total time: 20s, test acc: 18.400%, best acc: 18.510%



Epoch 45 train: 100%|██████████| 196/196 [00:17<00:00, 11.53it/s, loss=3.772, acc=11.020%, 5510/50000]
Epoch 45  test: 100%|██████████| 20/20 [00:03<00:00,  6.56it/s, loss=3.449, acc=17.940%, 1794/10000]

Epoch 45 | total time: 20s, test acc: 17.940%, best acc: 18.510%



Epoch 46 train: 100%|██████████| 196/196 [00:16<00:00, 11.83it/s, loss=3.778, acc=11.080%, 5540/50000]
Epoch 46  test: 100%|██████████| 20/20 [00:02<00:00,  6.67it/s, loss=3.487, acc=18.700%, 1870/10000]

Saving..
Epoch 46 | total time: 20s, test acc: 18.700%, best acc: 18.700%



Epoch 47 train: 100%|██████████| 196/196 [00:16<00:00, 12.08it/s, loss=3.783, acc=10.854%, 5427/50000]
Epoch 47  test: 100%|██████████| 20/20 [00:03<00:00,  6.47it/s, loss=3.435, acc=17.900%, 1790/10000]

Epoch 47 | total time: 20s, test acc: 17.900%, best acc: 18.700%



Epoch 48 train: 100%|██████████| 196/196 [00:16<00:00, 12.12it/s, loss=3.784, acc=10.940%, 5470/50000]
Epoch 48  test: 100%|██████████| 20/20 [00:03<00:00,  6.57it/s, loss=3.451, acc=19.020%, 1902/10000]

Saving..
Epoch 48 | total time: 20s, test acc: 19.020%, best acc: 19.020%



Epoch 49 train: 100%|██████████| 196/196 [00:16<00:00, 11.73it/s, loss=3.777, acc=10.798%, 5399/50000]
Epoch 49  test: 100%|██████████| 20/20 [00:02<00:00,  6.69it/s, loss=3.445, acc=18.390%, 1839/10000]

Epoch 49 | total time: 20s, test acc: 18.390%, best acc: 19.020%



