# Homework 3, ResNet for CIFAR-100


## 1. Prepare the dataset and the model
### Import Library

In [1]:
import os
import sys

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import time

import tqdm

### Hyperparameters
**For step 1, 2,** you may change the value of momentum, weight decay, data augmentation and batch normalization.

`mmt`: momentum for the optimizer. Use `0` if you do not want to use the momentum.

`wd`: weight decay for the optimizer. Use `0` if you do not want to use the weight decay.

`data_augmentation`: whether to use the data augmentation for the training.

`use_BN`: wheter to use the batch normalization for the training. 

In [2]:
lr = 0.1  # learning rate
opt = 'sgd'
batchsize = 256  # training batchsize

mmt = 0.9  # momentum for optimizer
wd = 5e-4  # weight_decay for optimizer
data_augmentation = True
use_BN = True

### Load Dataset
In pytorch, you can use the following API to load the dataset.

The RGB mean and std are pre-calculated values for normalizing the data. **Do not modify them**.

In [3]:
rgb_mean = np.array([0.4914, 0.4822, 0.4465])
rgb_std = np.array([0.2023, 0.1994, 0.2010])
if data_augmentation:
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])
else:
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(rgb_mean, rgb_std),
    ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(rgb_mean, rgb_std),
])

trainset = torchvision.datasets.CIFAR100(
    root='../data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batchsize, shuffle=True, num_workers=1)

testset = torchvision.datasets.CIFAR100(
    root='../data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=500, shuffle=False, num_workers=1)

Files already downloaded and verified
Files already downloaded and verified


### Define the CNN Model

In [4]:
class Identity(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        return x

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)

def compute_feature_map_size(ks, pks):
    s = 32
    for k, pk in zip(ks, pks):
        s = s-k+3
        s = int(s/pk)
    return s

class LeNet(nn.Module):
    def __init__(self, num_classes, hidden_chns, ks, pks):
        super(LeNet, self).__init__()
        in_chns, out_chns = [3, *hidden_chns[:-1]], hidden_chns
        layers = []
        for ic, oc, k, pk in zip(in_chns, out_chns, ks, pks):
            layers.append(nn.Conv2d(ic, oc, kernel_size=k, stride=1, padding=1))
            if use_BN:
                layers.append(nn.BatchNorm2d(oc))
            layers.append(nn.MaxPool2d(kernel_size=pk, stride=2) if pk>1 else Identity()) 
            layers.append(nn.ReLU(inplace=True))
        self.layers = nn.Sequential(*layers)

        s = compute_feature_map_size(ks, pks)
        print(f"feature size: {s}")

#         self.fc = nn.Linear(hidden_chns[-1] * s * s, num_classes)
        self.fc = nn.Sequential(
            nn.Linear(hidden_chns[-1] * s * s, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(128, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.layers(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes) if use_BN else Identity()
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes) if use_BN else Identity()

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            """
            For CIFAR10 ResNet paper uses option A.
            """
            self.shortcut = LambdaLayer(lambda x:
                                        F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, channels, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = channels[0]

        self.conv1 = nn.Conv2d(3, channels[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels[0]) if use_BN else Identity()
        self.layer1 = self._make_layer(block, channels[0], num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, channels[1], num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, channels[2], num_blocks[2], stride=2)
        self.linear = nn.Linear(channels[2], num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def lenet(hidden_chns, ks, pks):
    return LeNet(num_classes=100, hidden_chns=hidden_chns, ks=ks, pks=pks)


def resnet20(channels):
    return ResNet(BasicBlock, [3, 3, 3], channels, num_classes=100)


def resnet32(channels):
    return ResNet(BasicBlock, [5, 5, 5], channels, num_classes=100)


def resnet44(channels):
    return ResNet(BasicBlock, [7, 7, 7], channels, num_classes=100)


def resnet56(channels):
    return ResNet(BasicBlock, [9, 9, 9], channels, num_classes=100)


## 2. Define the model and run

### Training settings
You may modify the `num_epochs` for the fast training or the better performance.

In [5]:
num_epochs = 50  # training epochs
best_acc = 0.0  # best accuracy

### Define the model, optimizer, loss function, learning rate scheduler
For step 1, 2, you need to change the network structure of the CNN.

For CNN:

  - `hidden_chns`: a list of the hidden channels of the conv layer.

  - `ks`: a list of the kernel sizes of the conv layer.

  - `pks`: a list of the pooling kernel sizes of the pooling layer. Use `1` if you do not want to use a pooling layer (and it will be an identity function).

For ResNet:

  - You can use resnet20, 32, 44 and 56 (which are the default settings for the original ResNet).
 
  - `channels`: a list of the hidden channels of the resblock.

In [6]:
## For Step 1
net = lenet(
    hidden_chns=[32] * 10,
    ks=[3] * 10,
    pks=[1, 2] * 5
)

## For Step 2
# net = resnet20(channels=[16, 32, 64])

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
net = net.to(device)  # put the model on the specified device(e.g. gpu/cpu)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=mmt, weight_decay=wd) # momentum

# learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

feature size: 1
cuda


### Check the total number of parameters

In [7]:
def count_params(net):
    # you can use this function to count amount of your model parameters
    import numpy as np
    total_params = 0

    for x in filter(lambda p: p.requires_grad, net.parameters()):
        total_params += np.prod(x.data.cpu().numpy().shape)
    print("Total number of params", total_params)
    print("Total layers", len(list(filter(lambda p: p.requires_grad and len(p.data.size())>1, net.parameters()))))


count_params(net)

Total number of params 118404
Total layers 13


### Training logs
The training logs are saved in the `exp` folder. You can use tensorboard to see the logs.

In [8]:
if not os.path.exists("exp"):
    os.mkdir("exp")
last_train = max([eval(s.split("-")[-1]) for s in os.listdir("exp")] + [0])
current_train = last_train + 1
save_dir = "exp/cifar100-{}".format(current_train)
os.makedirs(save_dir)
writer = SummaryWriter(save_dir)

best_acc = 0

### Training and testing

In [9]:
# Training
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    with tqdm.tqdm(enumerate(trainloader), total=len(trainloader)) as t:
        t.set_description(f"Epoch {epoch} train")
        for batch_idx, (inputs, targets) in t:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            t.set_postfix(
                {
                    "loss": f"{train_loss/(batch_idx+1):.3f}",
                    "acc": f"{100.*correct/total:.3f}%, {correct}/{total}",
                }
            )

    writer.add_scalars(
        "loss", {"train": train_loss / len(trainloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"train": correct/total},
        global_step=epoch
    )


# validation
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        with tqdm.tqdm(enumerate(testloader), total=len(testloader)) as t:
            t.set_description(f"Epoch {epoch}  test")
            for batch_idx, (inputs, targets) in t:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = net(inputs)
                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

                t.set_postfix(
                    {
                        "loss": f"{test_loss / (batch_idx + 1):.3f}",
                        "acc": f"{correct*100./total:.3f}%, {correct}/{total}",
                    }
                )

    writer.add_scalars(
        "loss", {"test": test_loss/len(testloader)},
        global_step=epoch * len(trainloader)
    )
    writer.add_scalars(
        "accuracy", {"test": correct/total},
        global_step=epoch
    )

    # Save checkpoint.
    acc = 100.0 * correct / total
    if acc > best_acc:
        print("Saving..")
        state = {
            "net": net.state_dict(),
            "acc": acc,
            "epoch": epoch,
        }
        torch.save(state, os.path.join(save_dir, "ckpt.pth"))
        best_acc = acc

    return acc


for epoch in range(0, num_epochs):
    tic = time.time()
    train(epoch)
    test_acc = test(epoch)
    t = time.time() - tic
    print(
        f"Epoch {epoch} | total time: {t:.0f}s, test acc: {test_acc:.3f}%, best acc: {best_acc:.3f}%"
    )
    scheduler.step()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Epoch 0 train: 100%|██████████| 196/196 [00:36<00:00,  5.43it/s, loss=4.430, acc=2.544%, 1272/50000]
Epoch 0  test: 100%|██████████| 20/20 [00:05<00:00,  3.59it/s, loss=4.233, acc=3.470%, 347/10000]

Saving..
Epoch 0 | total time: 43s, test acc: 3.470%, best acc: 3.470%



Epoch 1 train: 100%|██████████| 196/196 [00:35<00:00,  5.50it/s, loss=4.147, acc=4.418%, 2209/50000]
Epoch 1  test: 100%|██████████| 20/20 [00:05<00:00,  3.73it/s, loss=4.215, acc=5.370%, 537/10000]

Saving..
Epoch 1 | total time: 41s, test acc: 5.370%, best acc: 5.370%



Epoch 2 train: 100%|██████████| 196/196 [00:35<00:00,  5.56it/s, loss=3.981, acc=5.876%, 2938/50000]
Epoch 2  test: 100%|██████████| 20/20 [00:05<00:00,  3.56it/s, loss=3.898, acc=7.180%, 718/10000]

Saving..
Epoch 2 | total time: 42s, test acc: 7.180%, best acc: 7.180%



Epoch 3 train: 100%|██████████| 196/196 [00:35<00:00,  5.53it/s, loss=3.874, acc=7.512%, 3756/50000]
Epoch 3  test: 100%|██████████| 20/20 [00:05<00:00,  3.92it/s, loss=3.778, acc=9.020%, 902/10000]

Saving..
Epoch 3 | total time: 41s, test acc: 9.020%, best acc: 9.020%



Epoch 4 train: 100%|██████████| 196/196 [00:35<00:00,  5.49it/s, loss=3.747, acc=9.102%, 4551/50000]
Epoch 4  test: 100%|██████████| 20/20 [00:05<00:00,  3.88it/s, loss=3.598, acc=11.270%, 1127/10000]

Saving..
Epoch 4 | total time: 41s, test acc: 11.270%, best acc: 11.270%



Epoch 5 train: 100%|██████████| 196/196 [00:35<00:00,  5.54it/s, loss=3.631, acc=10.656%, 5328/50000]
Epoch 5  test: 100%|██████████| 20/20 [00:05<00:00,  4.00it/s, loss=3.654, acc=10.490%, 1049/10000]

Epoch 5 | total time: 41s, test acc: 10.490%, best acc: 11.270%



Epoch 6 train: 100%|██████████| 196/196 [00:35<00:00,  5.57it/s, loss=3.516, acc=12.480%, 6240/50000]
Epoch 6  test: 100%|██████████| 20/20 [00:05<00:00,  3.60it/s, loss=3.494, acc=14.390%, 1439/10000]

Saving..
Epoch 6 | total time: 42s, test acc: 14.390%, best acc: 14.390%



Epoch 7 train: 100%|██████████| 196/196 [00:36<00:00,  5.40it/s, loss=3.410, acc=14.016%, 7008/50000]
Epoch 7  test: 100%|██████████| 20/20 [00:05<00:00,  3.73it/s, loss=3.354, acc=16.140%, 1614/10000]


Saving..
Epoch 7 | total time: 42s, test acc: 16.140%, best acc: 16.140%


Epoch 8 train: 100%|██████████| 196/196 [00:35<00:00,  5.49it/s, loss=3.317, acc=15.646%, 7823/50000]
Epoch 8  test: 100%|██████████| 20/20 [00:05<00:00,  3.65it/s, loss=3.316, acc=16.650%, 1665/10000]

Saving..
Epoch 8 | total time: 42s, test acc: 16.650%, best acc: 16.650%



Epoch 9 train: 100%|██████████| 196/196 [00:36<00:00,  5.42it/s, loss=3.249, acc=16.726%, 8363/50000]
Epoch 9  test: 100%|██████████| 20/20 [00:05<00:00,  3.91it/s, loss=3.313, acc=16.760%, 1676/10000]

Saving..
Epoch 9 | total time: 42s, test acc: 16.760%, best acc: 16.760%



Epoch 10 train: 100%|██████████| 196/196 [00:35<00:00,  5.45it/s, loss=3.183, acc=18.060%, 9030/50000]
Epoch 10  test: 100%|██████████| 20/20 [00:05<00:00,  3.52it/s, loss=3.289, acc=17.890%, 1789/10000]

Saving..
Epoch 10 | total time: 42s, test acc: 17.890%, best acc: 17.890%



Epoch 11 train: 100%|██████████| 196/196 [00:36<00:00,  5.41it/s, loss=3.136, acc=18.970%, 9485/50000]
Epoch 11  test: 100%|██████████| 20/20 [00:05<00:00,  3.73it/s, loss=3.089, acc=20.330%, 2033/10000]

Saving..
Epoch 11 | total time: 42s, test acc: 20.330%, best acc: 20.330%



Epoch 12 train: 100%|██████████| 196/196 [00:36<00:00,  5.43it/s, loss=3.103, acc=19.982%, 9991/50000]
Epoch 12  test: 100%|██████████| 20/20 [00:05<00:00,  3.94it/s, loss=3.086, acc=20.310%, 2031/10000]

Epoch 12 | total time: 42s, test acc: 20.310%, best acc: 20.330%



Epoch 13 train: 100%|██████████| 196/196 [00:35<00:00,  5.52it/s, loss=3.063, acc=20.776%, 10388/50000]
Epoch 13  test: 100%|██████████| 20/20 [00:05<00:00,  3.55it/s, loss=2.983, acc=22.180%, 2218/10000]


Saving..
Epoch 13 | total time: 42s, test acc: 22.180%, best acc: 22.180%


Epoch 14 train: 100%|██████████| 196/196 [00:35<00:00,  5.54it/s, loss=3.033, acc=21.434%, 10717/50000]
Epoch 14  test: 100%|██████████| 20/20 [00:05<00:00,  3.49it/s, loss=3.131, acc=20.810%, 2081/10000]

Epoch 14 | total time: 42s, test acc: 20.810%, best acc: 22.180%



Epoch 15 train: 100%|██████████| 196/196 [00:35<00:00,  5.58it/s, loss=3.006, acc=21.964%, 10982/50000]
Epoch 15  test: 100%|██████████| 20/20 [00:06<00:00,  3.15it/s, loss=3.009, acc=22.610%, 2261/10000]

Saving..
Epoch 15 | total time: 43s, test acc: 22.610%, best acc: 22.610%



Epoch 16 train: 100%|██████████| 196/196 [00:35<00:00,  5.49it/s, loss=2.983, acc=22.620%, 11310/50000]
Epoch 16  test: 100%|██████████| 20/20 [00:04<00:00,  4.00it/s, loss=3.018, acc=22.230%, 2223/10000]

Epoch 16 | total time: 41s, test acc: 22.230%, best acc: 22.610%



Epoch 17 train: 100%|██████████| 196/196 [00:35<00:00,  5.51it/s, loss=2.964, acc=22.766%, 11383/50000]
Epoch 17  test: 100%|██████████| 20/20 [00:05<00:00,  3.97it/s, loss=2.847, acc=25.200%, 2520/10000]

Saving..
Epoch 17 | total time: 41s, test acc: 25.200%, best acc: 25.200%



Epoch 18 train: 100%|██████████| 196/196 [00:35<00:00,  5.45it/s, loss=2.938, acc=23.698%, 11849/50000]
Epoch 18  test: 100%|██████████| 20/20 [00:05<00:00,  3.84it/s, loss=3.110, acc=21.470%, 2147/10000]

Epoch 18 | total time: 42s, test acc: 21.470%, best acc: 25.200%



Epoch 19 train: 100%|██████████| 196/196 [00:35<00:00,  5.50it/s, loss=2.924, acc=24.074%, 12037/50000]
Epoch 19  test: 100%|██████████| 20/20 [00:05<00:00,  3.52it/s, loss=3.027, acc=22.750%, 2275/10000]

Epoch 19 | total time: 42s, test acc: 22.750%, best acc: 25.200%



Epoch 20 train: 100%|██████████| 196/196 [00:35<00:00,  5.57it/s, loss=2.906, acc=24.270%, 12135/50000]
Epoch 20  test: 100%|██████████| 20/20 [00:05<00:00,  3.70it/s, loss=2.900, acc=25.120%, 2512/10000]

Epoch 20 | total time: 42s, test acc: 25.120%, best acc: 25.200%



Epoch 21 train: 100%|██████████| 196/196 [00:36<00:00,  5.41it/s, loss=2.882, acc=24.746%, 12373/50000]
Epoch 21  test: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s, loss=2.833, acc=25.280%, 2528/10000]

Saving..
Epoch 21 | total time: 42s, test acc: 25.280%, best acc: 25.280%



Epoch 22 train: 100%|██████████| 196/196 [00:35<00:00,  5.48it/s, loss=2.868, acc=25.300%, 12650/50000]
Epoch 22  test: 100%|██████████| 20/20 [00:04<00:00,  4.02it/s, loss=2.999, acc=24.260%, 2426/10000]

Epoch 22 | total time: 42s, test acc: 24.260%, best acc: 25.280%



Epoch 23 train: 100%|██████████| 196/196 [00:36<00:00,  5.41it/s, loss=2.866, acc=25.342%, 12671/50000]
Epoch 23  test: 100%|██████████| 20/20 [00:05<00:00,  3.74it/s, loss=3.049, acc=23.810%, 2381/10000]

Epoch 23 | total time: 42s, test acc: 23.810%, best acc: 25.280%



Epoch 24 train: 100%|██████████| 196/196 [00:35<00:00,  5.55it/s, loss=2.841, acc=25.848%, 12924/50000]
Epoch 24  test: 100%|██████████| 20/20 [00:05<00:00,  3.51it/s, loss=2.935, acc=25.950%, 2595/10000]

Saving..
Epoch 24 | total time: 42s, test acc: 25.950%, best acc: 25.950%



Epoch 25 train: 100%|██████████| 196/196 [00:36<00:00,  5.40it/s, loss=2.825, acc=26.100%, 13050/50000]
Epoch 25  test: 100%|██████████| 20/20 [00:05<00:00,  3.99it/s, loss=2.828, acc=27.020%, 2702/10000]


Saving..
Epoch 25 | total time: 42s, test acc: 27.020%, best acc: 27.020%


Epoch 26 train: 100%|██████████| 196/196 [00:36<00:00,  5.42it/s, loss=2.812, acc=26.282%, 13141/50000]
Epoch 26  test: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s, loss=2.682, acc=29.950%, 2995/10000]

Saving..
Epoch 26 | total time: 43s, test acc: 29.950%, best acc: 29.950%



Epoch 27 train: 100%|██████████| 196/196 [00:31<00:00,  6.19it/s, loss=2.795, acc=26.916%, 13458/50000]
Epoch 27  test: 100%|██████████| 20/20 [00:05<00:00,  3.95it/s, loss=2.603, acc=30.280%, 3028/10000]


Saving..
Epoch 27 | total time: 37s, test acc: 30.280%, best acc: 30.280%


Epoch 28 train: 100%|██████████| 196/196 [00:35<00:00,  5.46it/s, loss=2.798, acc=26.950%, 13475/50000]
Epoch 28  test: 100%|██████████| 20/20 [00:05<00:00,  3.94it/s, loss=2.658, acc=29.530%, 2953/10000]

Epoch 28 | total time: 42s, test acc: 29.530%, best acc: 30.280%



Epoch 29 train: 100%|██████████| 196/196 [00:36<00:00,  5.40it/s, loss=2.783, acc=27.264%, 13632/50000]
Epoch 29  test: 100%|██████████| 20/20 [00:05<00:00,  3.88it/s, loss=2.807, acc=28.130%, 2813/10000]

Epoch 29 | total time: 42s, test acc: 28.130%, best acc: 30.280%



Epoch 30 train: 100%|██████████| 196/196 [00:36<00:00,  5.39it/s, loss=2.771, acc=27.718%, 13859/50000]
Epoch 30  test: 100%|██████████| 20/20 [00:05<00:00,  3.98it/s, loss=2.813, acc=26.570%, 2657/10000]

Epoch 30 | total time: 42s, test acc: 26.570%, best acc: 30.280%



Epoch 31 train: 100%|██████████| 196/196 [00:35<00:00,  5.46it/s, loss=2.756, acc=28.018%, 14009/50000]
Epoch 31  test: 100%|██████████| 20/20 [00:05<00:00,  3.59it/s, loss=2.825, acc=26.590%, 2659/10000]

Epoch 31 | total time: 42s, test acc: 26.590%, best acc: 30.280%



Epoch 32 train: 100%|██████████| 196/196 [00:36<00:00,  5.43it/s, loss=2.752, acc=28.294%, 14147/50000]
Epoch 32  test: 100%|██████████| 20/20 [00:05<00:00,  3.92it/s, loss=2.716, acc=29.550%, 2955/10000]

Epoch 32 | total time: 42s, test acc: 29.550%, best acc: 30.280%



Epoch 33 train: 100%|██████████| 196/196 [00:36<00:00,  5.33it/s, loss=2.742, acc=28.456%, 14228/50000]
Epoch 33  test: 100%|██████████| 20/20 [00:05<00:00,  3.97it/s, loss=2.537, acc=32.420%, 3242/10000]

Saving..
Epoch 33 | total time: 42s, test acc: 32.420%, best acc: 32.420%



Epoch 34 train: 100%|██████████| 196/196 [00:35<00:00,  5.46it/s, loss=2.731, acc=28.820%, 14410/50000]
Epoch 34  test: 100%|██████████| 20/20 [00:05<00:00,  3.99it/s, loss=2.613, acc=31.550%, 3155/10000]

Epoch 34 | total time: 41s, test acc: 31.550%, best acc: 32.420%



Epoch 35 train: 100%|██████████| 196/196 [00:35<00:00,  5.55it/s, loss=2.722, acc=28.738%, 14369/50000]
Epoch 35  test: 100%|██████████| 20/20 [00:05<00:00,  3.95it/s, loss=2.646, acc=31.290%, 3129/10000]

Epoch 35 | total time: 42s, test acc: 31.290%, best acc: 32.420%



Epoch 36 train: 100%|██████████| 196/196 [00:35<00:00,  5.45it/s, loss=2.713, acc=29.196%, 14598/50000]
Epoch 36  test: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s, loss=2.693, acc=30.160%, 3016/10000]

Epoch 36 | total time: 42s, test acc: 30.160%, best acc: 32.420%



Epoch 37 train: 100%|██████████| 196/196 [00:36<00:00,  5.44it/s, loss=2.711, acc=28.958%, 14479/50000]
Epoch 37  test: 100%|██████████| 20/20 [00:05<00:00,  3.58it/s, loss=2.613, acc=30.600%, 3060/10000]

Epoch 37 | total time: 42s, test acc: 30.600%, best acc: 32.420%



Epoch 38 train: 100%|██████████| 196/196 [00:35<00:00,  5.54it/s, loss=2.698, acc=29.692%, 14846/50000]
Epoch 38  test: 100%|██████████| 20/20 [00:05<00:00,  3.90it/s, loss=2.619, acc=31.130%, 3113/10000]

Epoch 38 | total time: 42s, test acc: 31.130%, best acc: 32.420%



Epoch 39 train: 100%|██████████| 196/196 [00:35<00:00,  5.49it/s, loss=2.682, acc=29.886%, 14943/50000]
Epoch 39  test: 100%|██████████| 20/20 [00:05<00:00,  3.58it/s, loss=2.634, acc=31.390%, 3139/10000]

Epoch 39 | total time: 42s, test acc: 31.390%, best acc: 32.420%



Epoch 40 train: 100%|██████████| 196/196 [00:36<00:00,  5.34it/s, loss=2.666, acc=30.302%, 15151/50000]
Epoch 40  test: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s, loss=3.042, acc=25.730%, 2573/10000]

Epoch 40 | total time: 43s, test acc: 25.730%, best acc: 32.420%



Epoch 41 train: 100%|██████████| 196/196 [00:35<00:00,  5.46it/s, loss=2.662, acc=30.252%, 15126/50000]
Epoch 41  test: 100%|██████████| 20/20 [00:05<00:00,  3.93it/s, loss=2.593, acc=32.300%, 3230/10000]

Epoch 41 | total time: 42s, test acc: 32.300%, best acc: 32.420%



Epoch 42 train: 100%|██████████| 196/196 [00:35<00:00,  5.48it/s, loss=2.660, acc=30.346%, 15173/50000]
Epoch 42  test: 100%|██████████| 20/20 [00:05<00:00,  3.59it/s, loss=2.731, acc=30.240%, 3024/10000]

Epoch 42 | total time: 42s, test acc: 30.240%, best acc: 32.420%



Epoch 43 train: 100%|██████████| 196/196 [00:35<00:00,  5.50it/s, loss=2.663, acc=30.440%, 15220/50000]
Epoch 43  test: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s, loss=2.626, acc=31.520%, 3152/10000]

Epoch 43 | total time: 42s, test acc: 31.520%, best acc: 32.420%



Epoch 44 train: 100%|██████████| 196/196 [00:36<00:00,  5.35it/s, loss=2.655, acc=30.666%, 15333/50000]
Epoch 44  test: 100%|██████████| 20/20 [00:05<00:00,  3.53it/s, loss=2.564, acc=32.330%, 3233/10000]

Epoch 44 | total time: 43s, test acc: 32.330%, best acc: 32.420%



Epoch 45 train: 100%|██████████| 196/196 [00:36<00:00,  5.38it/s, loss=2.642, acc=30.808%, 15404/50000]
Epoch 45  test: 100%|██████████| 20/20 [00:05<00:00,  3.85it/s, loss=2.539, acc=32.860%, 3286/10000]

Saving..
Epoch 45 | total time: 42s, test acc: 32.860%, best acc: 32.860%



Epoch 46 train: 100%|██████████| 196/196 [00:35<00:00,  5.45it/s, loss=2.638, acc=30.914%, 15457/50000]
Epoch 46  test: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s, loss=2.760, acc=29.840%, 2984/10000]

Epoch 46 | total time: 42s, test acc: 29.840%, best acc: 32.860%



Epoch 47 train: 100%|██████████| 196/196 [00:36<00:00,  5.38it/s, loss=2.626, acc=31.022%, 15511/50000]
Epoch 47  test: 100%|██████████| 20/20 [00:05<00:00,  3.63it/s, loss=2.731, acc=30.210%, 3021/10000]

Epoch 47 | total time: 42s, test acc: 30.210%, best acc: 32.860%



Epoch 48 train: 100%|██████████| 196/196 [00:35<00:00,  5.50it/s, loss=2.606, acc=31.614%, 15807/50000]
Epoch 48  test: 100%|██████████| 20/20 [00:05<00:00,  3.82it/s, loss=2.605, acc=31.540%, 3154/10000]

Epoch 48 | total time: 42s, test acc: 31.540%, best acc: 32.860%



Epoch 49 train: 100%|██████████| 196/196 [00:36<00:00,  5.44it/s, loss=2.612, acc=31.726%, 15863/50000]
Epoch 49  test: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s, loss=2.654, acc=30.880%, 3088/10000]

Epoch 49 | total time: 42s, test acc: 30.880%, best acc: 32.860%



