In [1]:
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import csv

load_pickle() => is used to read the CIFAR 10 dataset and stores in binary format 

Data files are loaded in 5 batches, then the images and label's were extracted and were transformed from (C,H,W) to (H,W,C) using transpose(1,2,0)

Data augmentation and normalization was done using tensor, mean and standard deviation to stablize and speedup training. 

Images were randomly cropeed to 32x32 pixels and padding of 4 pixels was added to help in training so that images with slight shifts were also classified correctly 

DataLoader is used to help the model in learning data in random instead of leaning in a particular order every epoch, 4 worker threads were used to speedup the training process by parallelizing it. 

Batch size of 256 is used to process many samples of data simultaneously 

In [2]:
import pickle
import numpy as np

import pickle
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f, encoding='bytes')
    return data

batch_files = [
    "./data_batch_1",
    "./data_batch_2",
    "./data_batch_3",
    "./data_batch_4",
    "./data_batch_5"
]

train_data, train_labels = [], []
for file in batch_files:
    batch = load_pickle(file)
    train_data.append(batch[b'data'])       
    train_labels.extend(batch[b'labels'])

train_data = np.vstack(train_data).reshape(-1, 3, 32, 32)
train_labels = np.array(train_labels)

class CIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx].transpose(1, 2, 0)  
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

trainset = CIFAR10Dataset(train_data, train_labels, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=512, shuffle=True, num_workers=4)

mish activation function is used to provide smooth gradients and improving the convergence of the network
Mish(x) = x . tanh(ln(1+e^x))

SE block generates attention weights which scales the output of the convolutional layers

Basic block has 2 convolutional layers with batch normalization and mish activation, skip connection is added to allow the flow of gradients solving the vanishing gradient problem


Modified ResNet18 - there are 4 major layers to capture different levels of abstraction that is built using stacking the basic blocks

Adaptive Average Pooling layers compresses the spatial dimensions into 1x1 per channel - used to summerize each feature map into a single value per channel

Normalization is added here as well for stability and speeding up the training 


While training the model all the transformations and conversions that were used while reading and procressing was added here.

Using cross entropy loss to evaluate how well it will align with the prediction 

SGD is used for stable learning over time 

Weight decay is used to reguarlize the model, tried with 5e-3, 1e-3

CosineAnnealingWarmRestarts is used to dynamically change learning rate based on the loss and number of epochs 

Training Loss

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm import tqdm
from torchsummary import summary

class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            Mish(),
            nn.Conv2d(channels // reduction, channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SEBlock(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        return Mish()(out)

class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, dropout_prob=0.5):
        super(ResNet18, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)

        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(dropout_prob)  
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = self.dropout(out) 
        out = torch.flatten(out, 1)
        return self.fc(out)

def train():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(),  
        transforms.ToTensor(),         
        transforms.RandomErasing(),     
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)
    
    testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
    
    model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-3)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        total_train = 0
        
        for inputs, labels in tqdm(trainloader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
        
        avg_train_loss = train_loss / total_train
        train_acc = 100. * train_correct / total_train
        
        model.eval()
        test_loss = 0.0
        test_correct = 0
        total_test = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(testloader, desc=f"Testing Epoch {epoch+1}/{num_epochs}"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()
        
        avg_test_loss = test_loss / total_test
        test_acc = 100. * test_correct / total_test
        
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"    Training Loss: {avg_train_loss:.4f} | Training Acc: {train_acc:.2f}%")
        print(f"    Testing Loss: {avg_test_loss:.4f}  | Testing Acc: {test_acc:.2f}%")
        torch.save(model.state_dict(), "1optimized_resnet18.pth")
    
    print("\n Model Summary After Training:")
    summary(model, (3, 32, 32))
    
if __name__ == '__main__':
    train()


Training Epoch 1/100: 100%|██████████| 196/196 [00:15<00:00, 12.72it/s]
Testing Epoch 1/100: 100%|██████████| 40/40 [00:00<00:00, 49.60it/s]


Epoch 1/100:
    Training Loss: 2.0521 | Training Acc: 27.17%
    Testing Loss: 1.6034  | Testing Acc: 48.82%


Training Epoch 2/100: 100%|██████████| 196/196 [00:12<00:00, 15.29it/s]
Testing Epoch 2/100: 100%|██████████| 40/40 [00:00<00:00, 55.84it/s]


Epoch 2/100:
    Training Loss: 1.7645 | Training Acc: 41.86%
    Testing Loss: 1.4413  | Testing Acc: 56.87%


Training Epoch 3/100: 100%|██████████| 196/196 [00:12<00:00, 15.41it/s]
Testing Epoch 3/100: 100%|██████████| 40/40 [00:00<00:00, 55.28it/s]


Epoch 3/100:
    Training Loss: 1.6096 | Training Acc: 49.60%
    Testing Loss: 1.3369  | Testing Acc: 63.08%


Training Epoch 4/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 4/100: 100%|██████████| 40/40 [00:00<00:00, 56.01it/s]


Epoch 4/100:
    Training Loss: 1.5003 | Training Acc: 55.22%
    Testing Loss: 1.1873  | Testing Acc: 70.47%


Training Epoch 5/100: 100%|██████████| 196/196 [00:12<00:00, 15.22it/s]
Testing Epoch 5/100: 100%|██████████| 40/40 [00:00<00:00, 53.93it/s]


Epoch 5/100:
    Training Loss: 1.4177 | Training Acc: 59.60%
    Testing Loss: 1.1826  | Testing Acc: 70.35%


Training Epoch 6/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 6/100: 100%|██████████| 40/40 [00:00<00:00, 56.23it/s]


Epoch 6/100:
    Training Loss: 1.3543 | Training Acc: 62.63%
    Testing Loss: 1.1156  | Testing Acc: 73.65%


Training Epoch 7/100: 100%|██████████| 196/196 [00:12<00:00, 15.40it/s]
Testing Epoch 7/100: 100%|██████████| 40/40 [00:00<00:00, 55.40it/s]


Epoch 7/100:
    Training Loss: 1.3028 | Training Acc: 64.89%
    Testing Loss: 1.0513  | Testing Acc: 76.98%


Training Epoch 8/100: 100%|██████████| 196/196 [00:12<00:00, 15.48it/s]
Testing Epoch 8/100: 100%|██████████| 40/40 [00:00<00:00, 56.22it/s]


Epoch 8/100:
    Training Loss: 1.2497 | Training Acc: 67.48%
    Testing Loss: 0.9830  | Testing Acc: 80.17%


Training Epoch 9/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 9/100: 100%|██████████| 40/40 [00:00<00:00, 56.11it/s]


Epoch 9/100:
    Training Loss: 1.2047 | Training Acc: 69.35%
    Testing Loss: 0.9654  | Testing Acc: 80.94%


Training Epoch 10/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 10/100: 100%|██████████| 40/40 [00:00<00:00, 56.53it/s]


Epoch 10/100:
    Training Loss: 1.1753 | Training Acc: 70.70%
    Testing Loss: 0.9365  | Testing Acc: 82.29%


Training Epoch 11/100: 100%|██████████| 196/196 [00:12<00:00, 15.31it/s]
Testing Epoch 11/100: 100%|██████████| 40/40 [00:00<00:00, 53.67it/s]


Epoch 11/100:
    Training Loss: 1.3911 | Training Acc: 60.89%
    Testing Loss: 1.1006  | Testing Acc: 74.48%


Training Epoch 12/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 12/100: 100%|██████████| 40/40 [00:00<00:00, 55.49it/s]


Epoch 12/100:
    Training Loss: 1.3424 | Training Acc: 63.08%
    Testing Loss: 1.0712  | Testing Acc: 75.88%


Training Epoch 13/100: 100%|██████████| 196/196 [00:12<00:00, 15.34it/s]
Testing Epoch 13/100: 100%|██████████| 40/40 [00:00<00:00, 53.79it/s]


Epoch 13/100:
    Training Loss: 1.3215 | Training Acc: 64.03%
    Testing Loss: 1.0379  | Testing Acc: 77.48%


Training Epoch 14/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 14/100: 100%|██████████| 40/40 [00:00<00:00, 55.47it/s]


Epoch 14/100:
    Training Loss: 1.2997 | Training Acc: 64.95%
    Testing Loss: 1.0570  | Testing Acc: 76.57%


Training Epoch 15/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 15/100: 100%|██████████| 40/40 [00:00<00:00, 55.77it/s]


Epoch 15/100:
    Training Loss: 1.2824 | Training Acc: 65.92%
    Testing Loss: 1.0647  | Testing Acc: 76.30%


Training Epoch 16/100: 100%|██████████| 196/196 [00:12<00:00, 15.32it/s]
Testing Epoch 16/100: 100%|██████████| 40/40 [00:00<00:00, 55.51it/s]


Epoch 16/100:
    Training Loss: 1.2642 | Training Acc: 66.78%
    Testing Loss: 1.0264  | Testing Acc: 77.64%


Training Epoch 17/100: 100%|██████████| 196/196 [00:12<00:00, 15.31it/s]
Testing Epoch 17/100: 100%|██████████| 40/40 [00:00<00:00, 56.67it/s]


Epoch 17/100:
    Training Loss: 1.2438 | Training Acc: 68.02%
    Testing Loss: 0.9876  | Testing Acc: 79.63%


Training Epoch 18/100: 100%|██████████| 196/196 [00:12<00:00, 15.40it/s]
Testing Epoch 18/100: 100%|██████████| 40/40 [00:00<00:00, 55.53it/s]


Epoch 18/100:
    Training Loss: 1.2288 | Training Acc: 68.49%
    Testing Loss: 0.9801  | Testing Acc: 80.12%


Training Epoch 19/100: 100%|██████████| 196/196 [00:12<00:00, 15.43it/s]
Testing Epoch 19/100: 100%|██████████| 40/40 [00:00<00:00, 55.83it/s]


Epoch 19/100:
    Training Loss: 1.2097 | Training Acc: 69.47%
    Testing Loss: 0.9884  | Testing Acc: 79.81%


Training Epoch 20/100: 100%|██████████| 196/196 [00:12<00:00, 15.37it/s]
Testing Epoch 20/100: 100%|██████████| 40/40 [00:00<00:00, 55.23it/s]


Epoch 20/100:
    Training Loss: 1.1962 | Training Acc: 69.85%
    Testing Loss: 0.9490  | Testing Acc: 81.16%


Training Epoch 21/100: 100%|██████████| 196/196 [00:12<00:00, 15.30it/s]
Testing Epoch 21/100: 100%|██████████| 40/40 [00:00<00:00, 54.88it/s]


Epoch 21/100:
    Training Loss: 1.1718 | Training Acc: 71.13%
    Testing Loss: 0.9437  | Testing Acc: 81.77%


Training Epoch 22/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 22/100: 100%|██████████| 40/40 [00:00<00:00, 55.36it/s]


Epoch 22/100:
    Training Loss: 1.1569 | Training Acc: 71.70%
    Testing Loss: 0.9215  | Testing Acc: 82.75%


Training Epoch 23/100: 100%|██████████| 196/196 [00:12<00:00, 15.36it/s]
Testing Epoch 23/100: 100%|██████████| 40/40 [00:00<00:00, 55.27it/s]


Epoch 23/100:
    Training Loss: 1.1304 | Training Acc: 73.18%
    Testing Loss: 0.9230  | Testing Acc: 82.51%


Training Epoch 24/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 24/100: 100%|██████████| 40/40 [00:00<00:00, 56.15it/s]


Epoch 24/100:
    Training Loss: 1.1036 | Training Acc: 74.43%
    Testing Loss: 0.8865  | Testing Acc: 84.40%


Training Epoch 25/100: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 25/100: 100%|██████████| 40/40 [00:00<00:00, 54.35it/s]


Epoch 25/100:
    Training Loss: 1.0820 | Training Acc: 75.38%
    Testing Loss: 0.8666  | Testing Acc: 85.16%


Training Epoch 26/100: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 26/100: 100%|██████████| 40/40 [00:00<00:00, 55.46it/s]


Epoch 26/100:
    Training Loss: 1.0571 | Training Acc: 76.19%
    Testing Loss: 0.8429  | Testing Acc: 86.04%


Training Epoch 27/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 27/100: 100%|██████████| 40/40 [00:00<00:00, 55.43it/s]


Epoch 27/100:
    Training Loss: 1.0326 | Training Acc: 77.53%
    Testing Loss: 0.8271  | Testing Acc: 86.94%


Training Epoch 28/100: 100%|██████████| 196/196 [00:12<00:00, 15.42it/s]
Testing Epoch 28/100: 100%|██████████| 40/40 [00:00<00:00, 55.30it/s]


Epoch 28/100:
    Training Loss: 1.0152 | Training Acc: 78.33%
    Testing Loss: 0.8184  | Testing Acc: 87.32%


Training Epoch 29/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 29/100: 100%|██████████| 40/40 [00:00<00:00, 55.88it/s]


Epoch 29/100:
    Training Loss: 1.0008 | Training Acc: 79.10%
    Testing Loss: 0.8069  | Testing Acc: 87.51%


Training Epoch 30/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 30/100: 100%|██████████| 40/40 [00:00<00:00, 53.81it/s]


Epoch 30/100:
    Training Loss: 0.9955 | Training Acc: 79.52%
    Testing Loss: 0.8041  | Testing Acc: 87.70%


Training Epoch 31/100: 100%|██████████| 196/196 [00:12<00:00, 15.36it/s]
Testing Epoch 31/100: 100%|██████████| 40/40 [00:00<00:00, 55.59it/s]


Epoch 31/100:
    Training Loss: 1.2981 | Training Acc: 65.21%
    Testing Loss: 1.0259  | Testing Acc: 77.67%


Training Epoch 32/100: 100%|██████████| 196/196 [00:12<00:00, 15.31it/s]
Testing Epoch 32/100: 100%|██████████| 40/40 [00:00<00:00, 54.72it/s]


Epoch 32/100:
    Training Loss: 1.2489 | Training Acc: 67.55%
    Testing Loss: 0.9789  | Testing Acc: 80.79%


Training Epoch 33/100: 100%|██████████| 196/196 [00:12<00:00, 15.37it/s]
Testing Epoch 33/100: 100%|██████████| 40/40 [00:00<00:00, 55.71it/s]


Epoch 33/100:
    Training Loss: 1.2271 | Training Acc: 68.34%
    Testing Loss: 0.9902  | Testing Acc: 79.59%


Training Epoch 34/100: 100%|██████████| 196/196 [00:12<00:00, 15.43it/s]
Testing Epoch 34/100: 100%|██████████| 40/40 [00:00<00:00, 56.03it/s]


Epoch 34/100:
    Training Loss: 1.2245 | Training Acc: 68.73%
    Testing Loss: 1.0179  | Testing Acc: 78.03%


Training Epoch 35/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 35/100: 100%|██████████| 40/40 [00:00<00:00, 55.34it/s]


Epoch 35/100:
    Training Loss: 1.2132 | Training Acc: 69.14%
    Testing Loss: 0.9993  | Testing Acc: 78.75%


Training Epoch 36/100: 100%|██████████| 196/196 [00:12<00:00, 15.43it/s]
Testing Epoch 36/100: 100%|██████████| 40/40 [00:00<00:00, 55.69it/s]


Epoch 36/100:
    Training Loss: 1.2072 | Training Acc: 69.49%
    Testing Loss: 0.9852  | Testing Acc: 79.75%


Training Epoch 37/100: 100%|██████████| 196/196 [00:12<00:00, 15.12it/s]
Testing Epoch 37/100: 100%|██████████| 40/40 [00:00<00:00, 55.79it/s]


Epoch 37/100:
    Training Loss: 1.2009 | Training Acc: 69.63%
    Testing Loss: 0.9698  | Testing Acc: 79.96%


Training Epoch 38/100: 100%|██████████| 196/196 [00:12<00:00, 15.17it/s]
Testing Epoch 38/100: 100%|██████████| 40/40 [00:00<00:00, 52.64it/s]


Epoch 38/100:
    Training Loss: 1.1826 | Training Acc: 70.61%
    Testing Loss: 0.9492  | Testing Acc: 81.02%


Training Epoch 39/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 39/100: 100%|██████████| 40/40 [00:00<00:00, 56.02it/s]


Epoch 39/100:
    Training Loss: 1.1851 | Training Acc: 70.51%
    Testing Loss: 1.0109  | Testing Acc: 78.06%


Training Epoch 40/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 40/100: 100%|██████████| 40/40 [00:00<00:00, 53.79it/s]


Epoch 40/100:
    Training Loss: 1.1739 | Training Acc: 71.00%
    Testing Loss: 1.0003  | Testing Acc: 79.23%


Training Epoch 41/100: 100%|██████████| 196/196 [00:13<00:00, 15.02it/s]
Testing Epoch 41/100: 100%|██████████| 40/40 [00:00<00:00, 54.65it/s]


Epoch 41/100:
    Training Loss: 1.1656 | Training Acc: 71.19%
    Testing Loss: 0.9182  | Testing Acc: 82.81%


Training Epoch 42/100: 100%|██████████| 196/196 [00:13<00:00, 14.86it/s]
Testing Epoch 42/100: 100%|██████████| 40/40 [00:00<00:00, 53.40it/s]


Epoch 42/100:
    Training Loss: 1.1523 | Training Acc: 72.14%
    Testing Loss: 0.9318  | Testing Acc: 81.82%


Training Epoch 43/100: 100%|██████████| 196/196 [00:12<00:00, 15.37it/s]
Testing Epoch 43/100: 100%|██████████| 40/40 [00:00<00:00, 56.34it/s]


Epoch 43/100:
    Training Loss: 1.1478 | Training Acc: 71.94%
    Testing Loss: 0.9272  | Testing Acc: 81.85%


Training Epoch 44/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 44/100: 100%|██████████| 40/40 [00:00<00:00, 55.59it/s]


Epoch 44/100:
    Training Loss: 1.1384 | Training Acc: 72.50%
    Testing Loss: 0.9231  | Testing Acc: 82.13%


Training Epoch 45/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 45/100: 100%|██████████| 40/40 [00:00<00:00, 55.35it/s]


Epoch 45/100:
    Training Loss: 1.1300 | Training Acc: 72.92%
    Testing Loss: 0.9289  | Testing Acc: 81.49%


Training Epoch 46/100: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 46/100: 100%|██████████| 40/40 [00:00<00:00, 55.43it/s]


Epoch 46/100:
    Training Loss: 1.1224 | Training Acc: 73.39%
    Testing Loss: 0.9081  | Testing Acc: 83.05%


Training Epoch 47/100: 100%|██████████| 196/196 [00:13<00:00, 14.94it/s]
Testing Epoch 47/100: 100%|██████████| 40/40 [00:00<00:00, 53.12it/s]


Epoch 47/100:
    Training Loss: 1.1152 | Training Acc: 73.49%
    Testing Loss: 0.8998  | Testing Acc: 83.73%


Training Epoch 48/100: 100%|██████████| 196/196 [00:13<00:00, 14.90it/s]
Testing Epoch 48/100: 100%|██████████| 40/40 [00:00<00:00, 55.70it/s]


Epoch 48/100:
    Training Loss: 1.1016 | Training Acc: 74.14%
    Testing Loss: 0.8911  | Testing Acc: 83.53%


Training Epoch 49/100: 100%|██████████| 196/196 [00:12<00:00, 15.12it/s]
Testing Epoch 49/100: 100%|██████████| 40/40 [00:00<00:00, 55.16it/s]


Epoch 49/100:
    Training Loss: 1.0935 | Training Acc: 74.48%
    Testing Loss: 0.8900  | Testing Acc: 83.56%


Training Epoch 50/100: 100%|██████████| 196/196 [00:12<00:00, 15.16it/s]
Testing Epoch 50/100: 100%|██████████| 40/40 [00:00<00:00, 55.60it/s]


Epoch 50/100:
    Training Loss: 1.0862 | Training Acc: 74.82%
    Testing Loss: 0.8698  | Testing Acc: 84.69%


Training Epoch 51/100: 100%|██████████| 196/196 [00:12<00:00, 15.32it/s]
Testing Epoch 51/100: 100%|██████████| 40/40 [00:00<00:00, 55.33it/s]


Epoch 51/100:
    Training Loss: 1.0723 | Training Acc: 75.50%
    Testing Loss: 0.8659  | Testing Acc: 84.61%


Training Epoch 52/100: 100%|██████████| 196/196 [00:12<00:00, 15.34it/s]
Testing Epoch 52/100: 100%|██████████| 40/40 [00:00<00:00, 55.61it/s]


Epoch 52/100:
    Training Loss: 1.0725 | Training Acc: 75.39%
    Testing Loss: 0.8706  | Testing Acc: 84.53%


Training Epoch 53/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 53/100: 100%|██████████| 40/40 [00:00<00:00, 55.38it/s]


Epoch 53/100:
    Training Loss: 1.0585 | Training Acc: 76.25%
    Testing Loss: 0.8572  | Testing Acc: 85.03%


Training Epoch 54/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 54/100: 100%|██████████| 40/40 [00:00<00:00, 55.81it/s]


Epoch 54/100:
    Training Loss: 1.0470 | Training Acc: 76.69%
    Testing Loss: 0.8521  | Testing Acc: 85.38%


Training Epoch 55/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 55/100: 100%|██████████| 40/40 [00:00<00:00, 55.79it/s]


Epoch 55/100:
    Training Loss: 1.0341 | Training Acc: 77.19%
    Testing Loss: 0.8316  | Testing Acc: 86.07%


Training Epoch 56/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 56/100: 100%|██████████| 40/40 [00:00<00:00, 55.53it/s]


Epoch 56/100:
    Training Loss: 1.0198 | Training Acc: 77.74%
    Testing Loss: 0.8165  | Testing Acc: 86.87%


Training Epoch 57/100: 100%|██████████| 196/196 [00:13<00:00, 14.75it/s]
Testing Epoch 57/100: 100%|██████████| 40/40 [00:00<00:00, 54.54it/s]


Epoch 57/100:
    Training Loss: 1.0129 | Training Acc: 78.04%
    Testing Loss: 0.8135  | Testing Acc: 86.80%


Training Epoch 58/100: 100%|██████████| 196/196 [00:12<00:00, 15.17it/s]
Testing Epoch 58/100: 100%|██████████| 40/40 [00:00<00:00, 56.17it/s]


Epoch 58/100:
    Training Loss: 0.9983 | Training Acc: 78.82%
    Testing Loss: 0.8070  | Testing Acc: 87.36%


Training Epoch 59/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 59/100: 100%|██████████| 40/40 [00:00<00:00, 55.20it/s]


Epoch 59/100:
    Training Loss: 0.9901 | Training Acc: 78.97%
    Testing Loss: 0.7921  | Testing Acc: 87.90%


Training Epoch 60/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 60/100: 100%|██████████| 40/40 [00:00<00:00, 55.06it/s]


Epoch 60/100:
    Training Loss: 0.9720 | Training Acc: 79.87%
    Testing Loss: 0.7804  | Testing Acc: 88.49%


Training Epoch 61/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 61/100: 100%|██████████| 40/40 [00:00<00:00, 54.71it/s]


Epoch 61/100:
    Training Loss: 0.9607 | Training Acc: 80.60%
    Testing Loss: 0.7745  | Testing Acc: 88.38%


Training Epoch 62/100: 100%|██████████| 196/196 [00:12<00:00, 15.36it/s]
Testing Epoch 62/100: 100%|██████████| 40/40 [00:00<00:00, 54.91it/s]


Epoch 62/100:
    Training Loss: 0.9480 | Training Acc: 80.83%
    Testing Loss: 0.7750  | Testing Acc: 88.79%


Training Epoch 63/100: 100%|██████████| 196/196 [00:12<00:00, 15.36it/s]
Testing Epoch 63/100: 100%|██████████| 40/40 [00:00<00:00, 56.04it/s]


Epoch 63/100:
    Training Loss: 0.9297 | Training Acc: 82.00%
    Testing Loss: 0.7629  | Testing Acc: 89.31%


Training Epoch 64/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 64/100: 100%|██████████| 40/40 [00:00<00:00, 55.58it/s]


Epoch 64/100:
    Training Loss: 0.9271 | Training Acc: 82.02%
    Testing Loss: 0.7495  | Testing Acc: 89.67%


Training Epoch 65/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 65/100: 100%|██████████| 40/40 [00:00<00:00, 54.78it/s]


Epoch 65/100:
    Training Loss: 0.9172 | Training Acc: 82.47%
    Testing Loss: 0.7411  | Testing Acc: 90.09%


Training Epoch 66/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 66/100: 100%|██████████| 40/40 [00:00<00:00, 54.95it/s]


Epoch 66/100:
    Training Loss: 0.9018 | Training Acc: 83.16%
    Testing Loss: 0.7336  | Testing Acc: 90.32%


Training Epoch 67/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 67/100: 100%|██████████| 40/40 [00:00<00:00, 55.48it/s]


Epoch 67/100:
    Training Loss: 0.8954 | Training Acc: 83.30%
    Testing Loss: 0.7302  | Testing Acc: 90.53%


Training Epoch 68/100: 100%|██████████| 196/196 [00:12<00:00, 15.35it/s]
Testing Epoch 68/100: 100%|██████████| 40/40 [00:00<00:00, 55.64it/s]


Epoch 68/100:
    Training Loss: 0.8900 | Training Acc: 83.68%
    Testing Loss: 0.7291  | Testing Acc: 90.77%


Training Epoch 69/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 69/100: 100%|██████████| 40/40 [00:00<00:00, 55.40it/s]


Epoch 69/100:
    Training Loss: 0.8847 | Training Acc: 84.06%
    Testing Loss: 0.7254  | Testing Acc: 90.73%


Training Epoch 70/100: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 70/100: 100%|██████████| 40/40 [00:00<00:00, 47.32it/s]


Epoch 70/100:
    Training Loss: 0.8827 | Training Acc: 84.06%
    Testing Loss: 0.7249  | Testing Acc: 90.70%


Training Epoch 71/100: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 71/100: 100%|██████████| 40/40 [00:00<00:00, 56.37it/s]


Epoch 71/100:
    Training Loss: 1.2394 | Training Acc: 67.97%
    Testing Loss: 1.0006  | Testing Acc: 78.51%


Training Epoch 72/100: 100%|██████████| 196/196 [00:12<00:00, 15.41it/s]
Testing Epoch 72/100: 100%|██████████| 40/40 [00:00<00:00, 56.43it/s]


Epoch 72/100:
    Training Loss: 1.1780 | Training Acc: 70.37%
    Testing Loss: 0.9420  | Testing Acc: 81.03%


Training Epoch 73/100: 100%|██████████| 196/196 [00:12<00:00, 15.42it/s]
Testing Epoch 73/100: 100%|██████████| 40/40 [00:00<00:00, 55.72it/s]


Epoch 73/100:
    Training Loss: 1.1540 | Training Acc: 71.62%
    Testing Loss: 0.9493  | Testing Acc: 80.88%


Training Epoch 74/100: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 74/100: 100%|██████████| 40/40 [00:00<00:00, 54.88it/s]


Epoch 74/100:
    Training Loss: 1.1440 | Training Acc: 72.14%
    Testing Loss: 0.9316  | Testing Acc: 81.45%


Training Epoch 75/100: 100%|██████████| 196/196 [00:12<00:00, 15.37it/s]
Testing Epoch 75/100: 100%|██████████| 40/40 [00:00<00:00, 55.56it/s]


Epoch 75/100:
    Training Loss: 1.1383 | Training Acc: 72.50%
    Testing Loss: 0.9092  | Testing Acc: 82.55%


Training Epoch 76/100: 100%|██████████| 196/196 [00:12<00:00, 15.42it/s]
Testing Epoch 76/100: 100%|██████████| 40/40 [00:00<00:00, 55.83it/s]


Epoch 76/100:
    Training Loss: 1.1366 | Training Acc: 72.41%
    Testing Loss: 0.8970  | Testing Acc: 83.09%


Training Epoch 77/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 77/100: 100%|██████████| 40/40 [00:00<00:00, 55.93it/s]


Epoch 77/100:
    Training Loss: 1.1288 | Training Acc: 72.80%
    Testing Loss: 0.9239  | Testing Acc: 82.27%


Training Epoch 78/100: 100%|██████████| 196/196 [00:12<00:00, 15.34it/s]
Testing Epoch 78/100: 100%|██████████| 40/40 [00:00<00:00, 55.35it/s]


Epoch 78/100:
    Training Loss: 1.1270 | Training Acc: 72.92%
    Testing Loss: 0.9391  | Testing Acc: 81.12%


Training Epoch 79/100: 100%|██████████| 196/196 [00:12<00:00, 15.24it/s]
Testing Epoch 79/100: 100%|██████████| 40/40 [00:00<00:00, 55.98it/s]


Epoch 79/100:
    Training Loss: 1.1212 | Training Acc: 73.22%
    Testing Loss: 0.8705  | Testing Acc: 84.40%


Training Epoch 80/100: 100%|██████████| 196/196 [00:12<00:00, 15.41it/s]
Testing Epoch 80/100: 100%|██████████| 40/40 [00:00<00:00, 55.09it/s]


Epoch 80/100:
    Training Loss: 1.1172 | Training Acc: 73.33%
    Testing Loss: 0.9025  | Testing Acc: 83.11%


Training Epoch 81/100: 100%|██████████| 196/196 [00:12<00:00, 15.42it/s]
Testing Epoch 81/100: 100%|██████████| 40/40 [00:00<00:00, 55.33it/s]


Epoch 81/100:
    Training Loss: 1.1173 | Training Acc: 73.38%
    Testing Loss: 0.8970  | Testing Acc: 83.04%


Training Epoch 82/100: 100%|██████████| 196/196 [00:12<00:00, 15.39it/s]
Testing Epoch 82/100: 100%|██████████| 40/40 [00:00<00:00, 56.87it/s]


Epoch 82/100:
    Training Loss: 1.1160 | Training Acc: 73.39%
    Testing Loss: 0.8727  | Testing Acc: 84.35%


Training Epoch 83/100: 100%|██████████| 196/196 [00:13<00:00, 14.84it/s]
Testing Epoch 83/100: 100%|██████████| 40/40 [00:00<00:00, 56.59it/s]


Epoch 83/100:
    Training Loss: 1.1103 | Training Acc: 73.60%
    Testing Loss: 0.9221  | Testing Acc: 82.23%


Training Epoch 84/100: 100%|██████████| 196/196 [00:12<00:00, 15.36it/s]
Testing Epoch 84/100: 100%|██████████| 40/40 [00:00<00:00, 56.31it/s]


Epoch 84/100:
    Training Loss: 1.1136 | Training Acc: 73.55%
    Testing Loss: 0.8643  | Testing Acc: 84.82%


Training Epoch 85/100:  67%|██████▋   | 131/196 [00:08<00:04, 14.61it/s]

Code to Create the submissions.csv file after unpickling the nolabel data and running an inference on it

In [4]:
import torch
import torchvision.transforms as transforms
import pandas as pd
import pickle

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
state_dict = torch.load("optimized_resnet18.pth", map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()  
print("Model loaded successfully!")

def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

file_path = "./cifar_test_nolabel.pkl"
cifar10_batch = load_cifar_batch(file_path)

test_images = cifar10_batch[b'data']  
image_ids = cifar10_batch[b'ids'] 

print(f"Test set loaded: {test_images.shape}")

test_images = torch.tensor(test_images, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
transform = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
test_images = torch.stack([transform(img) for img in test_images])


test_images = test_images.to(device)  
batch_size = 64  

predictions = []
with torch.no_grad():
    for i in range(0, len(test_images), batch_size):
        batch = test_images[i:i + batch_size]  
        outputs = model(batch)  
        _, predicted_labels = torch.max(outputs, 1) 
        predictions.extend(predicted_labels.cpu().numpy())  

print("Inference completed!")

submission_df = pd.DataFrame({
    "ID": image_ids,
    "Labels": predictions
})
submission_df.to_csv("submission.csv", index=False)
print("submission.csv generated.")

Model loaded successfully!
Test set loaded: (10000, 32, 32, 3)
Inference completed!
submission.csv generated.


In [5]:
import pandas as pd

df = pd.read_csv("./submission.csv")
print(df.head())

   ID  Labels
0   0       6
1   1       1
2   2       8
3   3       6
4   4       9


Code to check the distribution of labels for a total of 10,000 images

In [6]:
unique_labels = df['Labels'].nunique()
print(f"Total unique labels: {unique_labels}")

label_counts = df['Labels'].value_counts()
print(label_counts)

Total unique labels: 10
Labels
3    1207
2    1112
8    1079
1    1060
5    1004
7     989
4     943
6     904
9     883
0     819
Name: count, dtype: int64


Code to check the accuracy of the model trained using the test_batch

In [7]:
import torch
import torchvision.transforms as transforms
import pickle

def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
state_dict = torch.load("optimized_resnet18.pth", map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()  
print("Model loaded successfully!")

file_path = "test_batch"  
cifar_batch = load_cifar_batch(file_path)

test_images = cifar_batch[b'data']   
true_labels = cifar_batch[b'labels']  

print(f"Test set loaded: {test_images.shape}")

if test_images.shape[1] == 3072:
    test_images = test_images.reshape(-1, 3, 32, 32)
elif test_images.shape[1] == 32:
    test_images = torch.tensor(test_images, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
else:
    test_images = torch.tensor(test_images, dtype=torch.float32) / 255.0

if not isinstance(test_images, torch.Tensor):
    test_images = torch.tensor(test_images, dtype=torch.float32) / 255.0

if test_images.shape[1] != 3:
    test_images = test_images.permute(0, 3, 1, 2)

normalize_transform = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                             std=[0.2470, 0.2435, 0.2616])

test_images = torch.stack([normalize_transform(img) for img in test_images])
test_images = test_images.to(device)

true_labels = torch.tensor(true_labels, dtype=torch.long).to(device)

batch_size = 64
total = test_images.size(0)
correct = 0

with torch.no_grad():
    for i in range(0, total, batch_size):
        batch = test_images[i:i+batch_size]
        outputs = model(batch)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == true_labels[i:i+batch_size]).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on test set: {accuracy:.2f}%")

Model loaded successfully!
Test set loaded: (10000, 3072)
Accuracy on test set: 92.72%


Code to Count the number of parameters = 4.5 million

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm import tqdm
from torchsummary import summary

class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            Mish(),
            nn.Conv2d(channels // reduction, channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SEBlock(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        return Mish()(out)

class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, dropout_prob=0.5):
        super(ResNet18, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)

        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(dropout_prob)  
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = F.dropout(out, p=0.5, training=self.training) 
        return self.fc(out)

def continue_training():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
   
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(),
        transforms.ToTensor(),          
        transforms.RandomErasing(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)
    
    testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
    
    model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
    model.load_state_dict(torch.load("optimized_resnet18.pth"))

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    num_epochs = 200
    for epoch in range(1):  
        model.train()
        train_loss = 0.0
        train_correct = 0
        total_train = 0
        
        for inputs, labels in tqdm(trainloader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
        
        avg_train_loss = train_loss / total_train
        train_acc = 100. * train_correct / total_train
        
        model.eval()
        test_loss = 0.0
        test_correct = 0
        total_test = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(testloader, desc=f"Testing Epoch {epoch+1}/{num_epochs}"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()
        
        avg_test_loss = test_loss / total_test
        test_acc = 100. * test_correct / total_test
        
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"    Training Loss: {avg_train_loss:.4f} | Training Acc: {train_acc:.2f}%")
        print(f"    Testing Loss: {avg_test_loss:.4f}  | Testing Acc: {test_acc:.2f}%")
        
        torch.save(model.state_dict(), "optimized_resnet18.pth")
    
    print("\n Model Summary After Training:")
    summary(model, (3, 32, 32))
    
if __name__ == '__main__':
    continue_training()

Training Epoch 1/200: 100%|██████████| 196/196 [00:15<00:00, 12.88it/s]
Testing Epoch 1/200: 100%|██████████| 40/40 [00:00<00:00, 49.44it/s]


Epoch 1/200:
    Training Loss: 0.7354 | Training Acc: 90.15%
    Testing Loss: 0.6711  | Testing Acc: 92.73%

 Model Summary After Training:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             864
       BatchNorm2d-2           [-1, 32, 32, 32]              64
            Conv2d-3           [-1, 32, 32, 32]           9,216
       BatchNorm2d-4           [-1, 32, 32, 32]              64
            Conv2d-5           [-1, 32, 32, 32]           9,216
       BatchNorm2d-6           [-1, 32, 32, 32]              64
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 2, 1, 1]              66
              Mish-9              [-1, 2, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]              96
          Sigmoid-11             [-1, 32, 1, 1]               0
          SEBlock-12     

This block of code was used to continue training the model from where it had left off due to HPC crashing. 

In our code, we are uploading the model after every epoch. When HPC crashes and we have ro restart out training, then we continue training from the same number of epoch using the latest model. This way we could train the model for 200 epochs over multiple HPC sessions even if HPC crashed. 

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from tqdm import tqdm
from torchsummary import summary

class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            Mish(),
            nn.Conv2d(channels // reduction, channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SEBlock(planes)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        return Mish()(out)

class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, dropout_prob=0.5):
        super(ResNet18, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)

        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(dropout_prob)  
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = Mish()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = F.dropout(out, p=0.5, training=self.training) 
        return self.fc(out)

def continue_training():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
   
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.AutoAugment(),
        transforms.ToTensor(),          
        transforms.RandomErasing(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    trainset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=4)
    
    testset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
    
    model = ResNet18(BasicBlock, [2, 2, 2, 2]).to(device)
    model.load_state_dict(torch.load("optimized_resnet18.pth"))

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    num_epochs = 200
    for epoch in range(84,200):  
        model.train()
        train_loss = 0.0
        train_correct = 0
        total_train = 0
        
        for inputs, labels in tqdm(trainloader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
        
        avg_train_loss = train_loss / total_train
        train_acc = 100. * train_correct / total_train
        
        model.eval()
        test_loss = 0.0
        test_correct = 0
        total_test = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(testloader, desc=f"Testing Epoch {epoch+1}/{num_epochs}"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()
        
        avg_test_loss = test_loss / total_test
        test_acc = 100. * test_correct / total_test
        
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"    Training Loss: {avg_train_loss:.4f} | Training Acc: {train_acc:.2f}%")
        print(f"    Testing Loss: {avg_test_loss:.4f}  | Testing Acc: {test_acc:.2f}%")
        
        torch.save(model.state_dict(), "optimized_resnet18.pth")
    
    print("\n Model Summary After Training:")
    summary(model, (3, 32, 32))
    
if __name__ == '__main__':
    continue_training()

Training Epoch 85/200: 100%|██████████| 196/196 [00:15<00:00, 12.85it/s]
Testing Epoch 85/200: 100%|██████████| 40/40 [00:00<00:00, 47.41it/s]


Epoch 85/200:
    Training Loss: 1.2218 | Training Acc: 73.90%
    Testing Loss: 0.8299  | Testing Acc: 86.35%


Training Epoch 86/200: 100%|██████████| 196/196 [00:12<00:00, 15.37it/s]
Testing Epoch 86/200: 100%|██████████| 40/40 [00:00<00:00, 58.09it/s]


Epoch 86/200:
    Training Loss: 1.0564 | Training Acc: 79.53%
    Testing Loss: 0.8231  | Testing Acc: 86.20%


Training Epoch 87/200: 100%|██████████| 196/196 [00:13<00:00, 15.00it/s]
Testing Epoch 87/200: 100%|██████████| 40/40 [00:00<00:00, 54.99it/s]


Epoch 87/200:
    Training Loss: 0.9776 | Training Acc: 81.98%
    Testing Loss: 0.7608  | Testing Acc: 89.04%


Training Epoch 88/200: 100%|██████████| 196/196 [00:12<00:00, 15.12it/s]
Testing Epoch 88/200: 100%|██████████| 40/40 [00:00<00:00, 56.52it/s]


Epoch 88/200:
    Training Loss: 0.9246 | Training Acc: 83.92%
    Testing Loss: 0.7149  | Testing Acc: 90.98%


Training Epoch 89/200: 100%|██████████| 196/196 [00:13<00:00, 15.08it/s]
Testing Epoch 89/200: 100%|██████████| 40/40 [00:00<00:00, 57.17it/s]


Epoch 89/200:
    Training Loss: 0.8894 | Training Acc: 85.10%
    Testing Loss: 0.7120  | Testing Acc: 90.89%


Training Epoch 90/200: 100%|██████████| 196/196 [00:12<00:00, 15.41it/s]
Testing Epoch 90/200: 100%|██████████| 40/40 [00:00<00:00, 56.25it/s]


Epoch 90/200:
    Training Loss: 0.8553 | Training Acc: 86.56%
    Testing Loss: 0.6809  | Testing Acc: 92.34%


Training Epoch 91/200: 100%|██████████| 196/196 [00:12<00:00, 15.09it/s]
Testing Epoch 91/200: 100%|██████████| 40/40 [00:00<00:00, 56.87it/s]


Epoch 91/200:
    Training Loss: 0.8246 | Training Acc: 87.95%
    Testing Loss: 0.6640  | Testing Acc: 92.93%


Training Epoch 92/200: 100%|██████████| 196/196 [00:13<00:00, 14.90it/s]
Testing Epoch 92/200: 100%|██████████| 40/40 [00:00<00:00, 54.42it/s]


Epoch 92/200:
    Training Loss: 0.7965 | Training Acc: 89.06%
    Testing Loss: 0.6506  | Testing Acc: 93.65%


Training Epoch 93/200: 100%|██████████| 196/196 [00:12<00:00, 15.46it/s]
Testing Epoch 93/200: 100%|██████████| 40/40 [00:00<00:00, 54.52it/s]


Epoch 93/200:
    Training Loss: 0.7779 | Training Acc: 89.80%
    Testing Loss: 0.6408  | Testing Acc: 94.04%


Training Epoch 94/200: 100%|██████████| 196/196 [00:12<00:00, 15.30it/s]
Testing Epoch 94/200: 100%|██████████| 40/40 [00:00<00:00, 54.20it/s]


Epoch 94/200:
    Training Loss: 0.7687 | Training Acc: 90.25%
    Testing Loss: 0.6389  | Testing Acc: 93.99%


Training Epoch 95/200: 100%|██████████| 196/196 [00:13<00:00, 14.91it/s]
Testing Epoch 95/200: 100%|██████████| 40/40 [00:00<00:00, 54.52it/s]


Epoch 95/200:
    Training Loss: 0.8915 | Training Acc: 84.81%
    Testing Loss: 0.7150  | Testing Acc: 90.98%


Training Epoch 96/200: 100%|██████████| 196/196 [00:12<00:00, 15.16it/s]
Testing Epoch 96/200: 100%|██████████| 40/40 [00:00<00:00, 56.43it/s]


Epoch 96/200:
    Training Loss: 0.8946 | Training Acc: 84.45%
    Testing Loss: 0.7353  | Testing Acc: 90.17%


Training Epoch 97/200: 100%|██████████| 196/196 [00:12<00:00, 15.18it/s]
Testing Epoch 97/200: 100%|██████████| 40/40 [00:00<00:00, 56.26it/s]


Epoch 97/200:
    Training Loss: 0.8842 | Training Acc: 84.82%
    Testing Loss: 0.7209  | Testing Acc: 90.61%


Training Epoch 98/200: 100%|██████████| 196/196 [00:13<00:00, 14.59it/s]
Testing Epoch 98/200: 100%|██████████| 40/40 [00:00<00:00, 54.45it/s]


Epoch 98/200:
    Training Loss: 0.8639 | Training Acc: 85.43%
    Testing Loss: 0.7029  | Testing Acc: 91.48%


Training Epoch 99/200: 100%|██████████| 196/196 [00:12<00:00, 15.20it/s]
Testing Epoch 99/200: 100%|██████████| 40/40 [00:00<00:00, 54.12it/s]


Epoch 99/200:
    Training Loss: 0.8521 | Training Acc: 86.04%
    Testing Loss: 0.6955  | Testing Acc: 91.83%


Training Epoch 100/200: 100%|██████████| 196/196 [00:12<00:00, 15.60it/s]
Testing Epoch 100/200: 100%|██████████| 40/40 [00:00<00:00, 54.19it/s]


Epoch 100/200:
    Training Loss: 0.8412 | Training Acc: 86.51%
    Testing Loss: 0.6997  | Testing Acc: 91.48%


Training Epoch 101/200: 100%|██████████| 196/196 [00:12<00:00, 15.62it/s]
Testing Epoch 101/200: 100%|██████████| 40/40 [00:00<00:00, 56.70it/s]


Epoch 101/200:
    Training Loss: 0.8266 | Training Acc: 86.96%
    Testing Loss: 0.6895  | Testing Acc: 92.16%


Training Epoch 102/200: 100%|██████████| 196/196 [00:12<00:00, 15.53it/s]
Testing Epoch 102/200: 100%|██████████| 40/40 [00:00<00:00, 56.33it/s]


Epoch 102/200:
    Training Loss: 0.8181 | Training Acc: 87.29%
    Testing Loss: 0.6796  | Testing Acc: 92.28%


Training Epoch 103/200: 100%|██████████| 196/196 [00:12<00:00, 15.58it/s]
Testing Epoch 103/200: 100%|██████████| 40/40 [00:00<00:00, 55.54it/s]


Epoch 103/200:
    Training Loss: 0.7995 | Training Acc: 88.22%
    Testing Loss: 0.6784  | Testing Acc: 92.39%


Training Epoch 104/200: 100%|██████████| 196/196 [00:13<00:00, 15.05it/s]
Testing Epoch 104/200: 100%|██████████| 40/40 [00:00<00:00, 55.20it/s]


Epoch 104/200:
    Training Loss: 0.7887 | Training Acc: 88.45%
    Testing Loss: 0.6640  | Testing Acc: 93.16%


Training Epoch 105/200: 100%|██████████| 196/196 [00:12<00:00, 15.49it/s]
Testing Epoch 105/200: 100%|██████████| 40/40 [00:00<00:00, 55.40it/s]


Epoch 105/200:
    Training Loss: 0.7721 | Training Acc: 89.30%
    Testing Loss: 0.6635  | Testing Acc: 92.97%


Training Epoch 106/200: 100%|██████████| 196/196 [00:12<00:00, 15.52it/s]
Testing Epoch 106/200: 100%|██████████| 40/40 [00:00<00:00, 53.53it/s]


Epoch 106/200:
    Training Loss: 0.7704 | Training Acc: 89.19%
    Testing Loss: 0.6549  | Testing Acc: 93.43%


Training Epoch 107/200: 100%|██████████| 196/196 [00:12<00:00, 15.53it/s]
Testing Epoch 107/200: 100%|██████████| 40/40 [00:00<00:00, 53.64it/s]


Epoch 107/200:
    Training Loss: 0.7533 | Training Acc: 89.98%
    Testing Loss: 0.6506  | Testing Acc: 93.72%


Training Epoch 108/200: 100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
Testing Epoch 108/200: 100%|██████████| 40/40 [00:00<00:00, 56.65it/s]


Epoch 108/200:
    Training Loss: 0.7440 | Training Acc: 90.51%
    Testing Loss: 0.6462  | Testing Acc: 93.94%


Training Epoch 109/200: 100%|██████████| 196/196 [00:12<00:00, 15.47it/s]
Testing Epoch 109/200: 100%|██████████| 40/40 [00:00<00:00, 56.87it/s]


Epoch 109/200:
    Training Loss: 0.7318 | Training Acc: 91.04%
    Testing Loss: 0.6381  | Testing Acc: 94.27%


Training Epoch 110/200: 100%|██████████| 196/196 [00:12<00:00, 15.47it/s]
Testing Epoch 110/200: 100%|██████████| 40/40 [00:00<00:00, 56.70it/s]


Epoch 110/200:
    Training Loss: 0.7276 | Training Acc: 91.05%
    Testing Loss: 0.6338  | Testing Acc: 94.39%


Training Epoch 111/200: 100%|██████████| 196/196 [00:12<00:00, 15.46it/s]
Testing Epoch 111/200: 100%|██████████| 40/40 [00:00<00:00, 56.51it/s]


Epoch 111/200:
    Training Loss: 0.7210 | Training Acc: 91.37%
    Testing Loss: 0.6331  | Testing Acc: 94.46%


Training Epoch 112/200: 100%|██████████| 196/196 [00:13<00:00, 14.94it/s]
Testing Epoch 112/200: 100%|██████████| 40/40 [00:00<00:00, 55.03it/s]


Epoch 112/200:
    Training Loss: 0.7151 | Training Acc: 91.63%
    Testing Loss: 0.6302  | Testing Acc: 94.48%


Training Epoch 113/200: 100%|██████████| 196/196 [00:12<00:00, 15.21it/s]
Testing Epoch 113/200: 100%|██████████| 40/40 [00:00<00:00, 56.49it/s]


Epoch 113/200:
    Training Loss: 0.7072 | Training Acc: 91.97%
    Testing Loss: 0.6300  | Testing Acc: 94.54%


Training Epoch 114/200: 100%|██████████| 196/196 [00:12<00:00, 15.31it/s]
Testing Epoch 114/200: 100%|██████████| 40/40 [00:00<00:00, 54.13it/s]


Epoch 114/200:
    Training Loss: 0.7109 | Training Acc: 91.90%
    Testing Loss: 0.6302  | Testing Acc: 94.45%


Training Epoch 115/200: 100%|██████████| 196/196 [00:12<00:00, 15.28it/s]
Testing Epoch 115/200: 100%|██████████| 40/40 [00:00<00:00, 54.85it/s]


Epoch 115/200:
    Training Loss: 0.8180 | Training Acc: 87.00%
    Testing Loss: 0.6886  | Testing Acc: 92.09%


Training Epoch 116/200: 100%|██████████| 196/196 [00:13<00:00, 14.99it/s]
Testing Epoch 116/200: 100%|██████████| 40/40 [00:00<00:00, 53.95it/s]


Epoch 116/200:
    Training Loss: 0.8218 | Training Acc: 86.86%
    Testing Loss: 0.6954  | Testing Acc: 91.84%


Training Epoch 117/200: 100%|██████████| 196/196 [00:12<00:00, 15.29it/s]
Testing Epoch 117/200: 100%|██████████| 40/40 [00:00<00:00, 56.07it/s]


Epoch 117/200:
    Training Loss: 0.8231 | Training Acc: 86.81%
    Testing Loss: 0.6986  | Testing Acc: 91.66%


Training Epoch 118/200: 100%|██████████| 196/196 [00:12<00:00, 15.23it/s]
Testing Epoch 118/200: 100%|██████████| 40/40 [00:00<00:00, 57.30it/s]


Epoch 118/200:
    Training Loss: 0.8146 | Training Acc: 87.27%
    Testing Loss: 0.6866  | Testing Acc: 92.31%


Training Epoch 119/200: 100%|██████████| 196/196 [00:12<00:00, 15.45it/s]
Testing Epoch 119/200: 100%|██████████| 40/40 [00:00<00:00, 57.04it/s]


Epoch 119/200:
    Training Loss: 0.8107 | Training Acc: 87.32%
    Testing Loss: 0.6944  | Testing Acc: 91.87%


Training Epoch 120/200: 100%|██████████| 196/196 [00:12<00:00, 15.28it/s]
Testing Epoch 120/200: 100%|██████████| 40/40 [00:00<00:00, 56.64it/s]


Epoch 120/200:
    Training Loss: 0.8007 | Training Acc: 87.82%
    Testing Loss: 0.7004  | Testing Acc: 91.34%


Training Epoch 121/200: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 121/200: 100%|██████████| 40/40 [00:00<00:00, 54.87it/s]


Epoch 121/200:
    Training Loss: 0.7989 | Training Acc: 87.71%
    Testing Loss: 0.6943  | Testing Acc: 91.96%


Training Epoch 122/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 122/200: 100%|██████████| 40/40 [00:00<00:00, 55.33it/s]


Epoch 122/200:
    Training Loss: 0.7984 | Training Acc: 87.80%
    Testing Loss: 0.6771  | Testing Acc: 92.71%


Training Epoch 123/200: 100%|██████████| 196/196 [00:12<00:00, 15.50it/s]
Testing Epoch 123/200: 100%|██████████| 40/40 [00:00<00:00, 55.78it/s]


Epoch 123/200:
    Training Loss: 0.7908 | Training Acc: 88.11%
    Testing Loss: 0.6790  | Testing Acc: 92.31%


Training Epoch 124/200: 100%|██████████| 196/196 [00:13<00:00, 14.88it/s]
Testing Epoch 124/200: 100%|██████████| 40/40 [00:00<00:00, 54.30it/s]


Epoch 124/200:
    Training Loss: 0.7854 | Training Acc: 88.21%
    Testing Loss: 0.6826  | Testing Acc: 92.44%


Training Epoch 125/200: 100%|██████████| 196/196 [00:13<00:00, 15.03it/s]
Testing Epoch 125/200: 100%|██████████| 40/40 [00:00<00:00, 57.05it/s]


Epoch 125/200:
    Training Loss: 0.7799 | Training Acc: 88.62%
    Testing Loss: 0.6587  | Testing Acc: 93.39%


Training Epoch 126/200: 100%|██████████| 196/196 [00:13<00:00, 15.03it/s]
Testing Epoch 126/200: 100%|██████████| 40/40 [00:00<00:00, 56.42it/s]


Epoch 126/200:
    Training Loss: 0.7753 | Training Acc: 88.70%
    Testing Loss: 0.6924  | Testing Acc: 91.83%


Training Epoch 127/200: 100%|██████████| 196/196 [00:12<00:00, 15.30it/s]
Testing Epoch 127/200: 100%|██████████| 40/40 [00:00<00:00, 53.58it/s]


Epoch 127/200:
    Training Loss: 0.7734 | Training Acc: 88.82%
    Testing Loss: 0.6673  | Testing Acc: 92.73%


Training Epoch 128/200: 100%|██████████| 196/196 [00:13<00:00, 15.00it/s]
Testing Epoch 128/200: 100%|██████████| 40/40 [00:00<00:00, 55.74it/s]


Epoch 128/200:
    Training Loss: 0.7670 | Training Acc: 89.04%
    Testing Loss: 0.6524  | Testing Acc: 93.47%


Training Epoch 129/200: 100%|██████████| 196/196 [00:13<00:00, 14.98it/s]
Testing Epoch 129/200: 100%|██████████| 40/40 [00:00<00:00, 54.42it/s]


Epoch 129/200:
    Training Loss: 0.7623 | Training Acc: 89.30%
    Testing Loss: 0.6629  | Testing Acc: 93.05%


Training Epoch 130/200: 100%|██████████| 196/196 [00:12<00:00, 15.61it/s]
Testing Epoch 130/200: 100%|██████████| 40/40 [00:00<00:00, 54.80it/s]


Epoch 130/200:
    Training Loss: 0.7535 | Training Acc: 89.63%
    Testing Loss: 0.6584  | Testing Acc: 93.53%


Training Epoch 131/200: 100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
Testing Epoch 131/200: 100%|██████████| 40/40 [00:00<00:00, 55.95it/s]


Epoch 131/200:
    Training Loss: 0.7484 | Training Acc: 89.84%
    Testing Loss: 0.6534  | Testing Acc: 93.53%


Training Epoch 132/200: 100%|██████████| 196/196 [00:12<00:00, 15.53it/s]
Testing Epoch 132/200: 100%|██████████| 40/40 [00:00<00:00, 56.48it/s]


Epoch 132/200:
    Training Loss: 0.7465 | Training Acc: 89.98%
    Testing Loss: 0.6597  | Testing Acc: 93.35%


Training Epoch 133/200: 100%|██████████| 196/196 [00:12<00:00, 15.33it/s]
Testing Epoch 133/200: 100%|██████████| 40/40 [00:00<00:00, 56.60it/s]


Epoch 133/200:
    Training Loss: 0.7373 | Training Acc: 90.38%
    Testing Loss: 0.6498  | Testing Acc: 93.44%


Training Epoch 134/200: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 134/200: 100%|██████████| 40/40 [00:00<00:00, 56.90it/s]


Epoch 134/200:
    Training Loss: 0.7321 | Training Acc: 90.47%
    Testing Loss: 0.6529  | Testing Acc: 93.53%


Training Epoch 135/200: 100%|██████████| 196/196 [00:13<00:00, 15.05it/s]
Testing Epoch 135/200: 100%|██████████| 40/40 [00:00<00:00, 55.71it/s]


Epoch 135/200:
    Training Loss: 0.7272 | Training Acc: 90.68%
    Testing Loss: 0.6539  | Testing Acc: 93.47%


Training Epoch 136/200: 100%|██████████| 196/196 [00:13<00:00, 15.04it/s]
Testing Epoch 136/200: 100%|██████████| 40/40 [00:00<00:00, 55.76it/s]


Epoch 136/200:
    Training Loss: 0.7231 | Training Acc: 90.98%
    Testing Loss: 0.6481  | Testing Acc: 94.02%


Training Epoch 137/200: 100%|██████████| 196/196 [00:12<00:00, 15.34it/s]
Testing Epoch 137/200: 100%|██████████| 40/40 [00:00<00:00, 55.99it/s]


Epoch 137/200:
    Training Loss: 0.7248 | Training Acc: 90.94%
    Testing Loss: 0.6342  | Testing Acc: 94.37%


Training Epoch 138/200: 100%|██████████| 196/196 [00:13<00:00, 14.86it/s]
Testing Epoch 138/200: 100%|██████████| 40/40 [00:00<00:00, 56.30it/s]


Epoch 138/200:
    Training Loss: 0.7173 | Training Acc: 91.22%
    Testing Loss: 0.6356  | Testing Acc: 94.11%


Training Epoch 139/200: 100%|██████████| 196/196 [00:12<00:00, 15.57it/s]
Testing Epoch 139/200: 100%|██████████| 40/40 [00:00<00:00, 56.84it/s]


Epoch 139/200:
    Training Loss: 0.7085 | Training Acc: 91.52%
    Testing Loss: 0.6413  | Testing Acc: 94.10%


Training Epoch 140/200: 100%|██████████| 196/196 [00:12<00:00, 15.64it/s]
Testing Epoch 140/200: 100%|██████████| 40/40 [00:00<00:00, 55.77it/s]


Epoch 140/200:
    Training Loss: 0.7032 | Training Acc: 91.76%
    Testing Loss: 0.6359  | Testing Acc: 94.21%


Training Epoch 141/200: 100%|██████████| 196/196 [00:12<00:00, 15.58it/s]
Testing Epoch 141/200: 100%|██████████| 40/40 [00:00<00:00, 56.37it/s]


Epoch 141/200:
    Training Loss: 0.7041 | Training Acc: 91.79%
    Testing Loss: 0.6343  | Testing Acc: 94.38%


Training Epoch 142/200: 100%|██████████| 196/196 [00:13<00:00, 15.00it/s]
Testing Epoch 142/200: 100%|██████████| 40/40 [00:00<00:00, 56.62it/s]


Epoch 142/200:
    Training Loss: 0.6994 | Training Acc: 91.98%
    Testing Loss: 0.6359  | Testing Acc: 94.30%


Training Epoch 143/200: 100%|██████████| 196/196 [00:12<00:00, 15.32it/s]
Testing Epoch 143/200: 100%|██████████| 40/40 [00:00<00:00, 54.79it/s]


Epoch 143/200:
    Training Loss: 0.6928 | Training Acc: 92.28%
    Testing Loss: 0.6337  | Testing Acc: 94.48%


Training Epoch 144/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 144/200: 100%|██████████| 40/40 [00:00<00:00, 56.68it/s]


Epoch 144/200:
    Training Loss: 0.6907 | Training Acc: 92.41%
    Testing Loss: 0.6276  | Testing Acc: 94.52%


Training Epoch 145/200: 100%|██████████| 196/196 [00:12<00:00, 15.51it/s]
Testing Epoch 145/200: 100%|██████████| 40/40 [00:00<00:00, 56.32it/s]


Epoch 145/200:
    Training Loss: 0.6855 | Training Acc: 92.62%
    Testing Loss: 0.6296  | Testing Acc: 94.53%


Training Epoch 146/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 146/200: 100%|██████████| 40/40 [00:00<00:00, 56.94it/s]


Epoch 146/200:
    Training Loss: 0.6803 | Training Acc: 92.82%
    Testing Loss: 0.6240  | Testing Acc: 94.75%


Training Epoch 147/200: 100%|██████████| 196/196 [00:12<00:00, 15.50it/s]
Testing Epoch 147/200: 100%|██████████| 40/40 [00:00<00:00, 56.06it/s]


Epoch 147/200:
    Training Loss: 0.6817 | Training Acc: 92.81%
    Testing Loss: 0.6249  | Testing Acc: 94.72%


Training Epoch 148/200: 100%|██████████| 196/196 [00:13<00:00, 14.94it/s]
Testing Epoch 148/200: 100%|██████████| 40/40 [00:00<00:00, 56.05it/s]


Epoch 148/200:
    Training Loss: 0.6760 | Training Acc: 92.97%
    Testing Loss: 0.6258  | Testing Acc: 94.66%


Training Epoch 149/200: 100%|██████████| 196/196 [00:12<00:00, 15.58it/s]
Testing Epoch 149/200: 100%|██████████| 40/40 [00:00<00:00, 55.56it/s]


Epoch 149/200:
    Training Loss: 0.6755 | Training Acc: 92.94%
    Testing Loss: 0.6259  | Testing Acc: 94.68%


Training Epoch 150/200: 100%|██████████| 196/196 [00:13<00:00, 15.07it/s]
Testing Epoch 150/200: 100%|██████████| 40/40 [00:00<00:00, 56.81it/s]


Epoch 150/200:
    Training Loss: 0.6745 | Training Acc: 93.15%
    Testing Loss: 0.6237  | Testing Acc: 94.72%


Training Epoch 151/200: 100%|██████████| 196/196 [00:12<00:00, 15.22it/s]
Testing Epoch 151/200: 100%|██████████| 40/40 [00:00<00:00, 52.31it/s]


Epoch 151/200:
    Training Loss: 0.6713 | Training Acc: 93.20%
    Testing Loss: 0.6251  | Testing Acc: 94.79%


Training Epoch 152/200: 100%|██████████| 196/196 [00:13<00:00, 15.08it/s]
Testing Epoch 152/200: 100%|██████████| 40/40 [00:00<00:00, 55.29it/s]


Epoch 152/200:
    Training Loss: 0.6687 | Training Acc: 93.44%
    Testing Loss: 0.6227  | Testing Acc: 94.84%


Training Epoch 153/200: 100%|██████████| 196/196 [00:12<00:00, 15.27it/s]
Testing Epoch 153/200: 100%|██████████| 40/40 [00:00<00:00, 54.88it/s]


Epoch 153/200:
    Training Loss: 0.6682 | Training Acc: 93.26%
    Testing Loss: 0.6236  | Testing Acc: 94.81%


Training Epoch 154/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 154/200: 100%|██████████| 40/40 [00:00<00:00, 56.30it/s]


Epoch 154/200:
    Training Loss: 0.6693 | Training Acc: 93.26%
    Testing Loss: 0.6238  | Testing Acc: 94.76%


Training Epoch 155/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 155/200: 100%|██████████| 40/40 [00:00<00:00, 56.35it/s]


Epoch 155/200:
    Training Loss: 0.7682 | Training Acc: 88.90%
    Testing Loss: 0.6790  | Testing Acc: 92.41%


Training Epoch 156/200: 100%|██████████| 196/196 [00:12<00:00, 15.41it/s]
Testing Epoch 156/200: 100%|██████████| 40/40 [00:00<00:00, 57.68it/s]


Epoch 156/200:
    Training Loss: 0.7777 | Training Acc: 88.41%
    Testing Loss: 0.6908  | Testing Acc: 92.13%


Training Epoch 157/200: 100%|██████████| 196/196 [00:12<00:00, 15.63it/s]
Testing Epoch 157/200: 100%|██████████| 40/40 [00:00<00:00, 54.92it/s]


Epoch 157/200:
    Training Loss: 0.7768 | Training Acc: 88.42%
    Testing Loss: 0.6826  | Testing Acc: 92.17%


Training Epoch 158/200: 100%|██████████| 196/196 [00:12<00:00, 15.56it/s]
Testing Epoch 158/200: 100%|██████████| 40/40 [00:00<00:00, 54.15it/s]


Epoch 158/200:
    Training Loss: 0.7786 | Training Acc: 88.40%
    Testing Loss: 0.6759  | Testing Acc: 92.62%


Training Epoch 159/200: 100%|██████████| 196/196 [00:12<00:00, 15.49it/s]
Testing Epoch 159/200: 100%|██████████| 40/40 [00:00<00:00, 56.17it/s]


Epoch 159/200:
    Training Loss: 0.7726 | Training Acc: 88.59%
    Testing Loss: 0.6752  | Testing Acc: 92.69%


Training Epoch 160/200: 100%|██████████| 196/196 [00:12<00:00, 15.53it/s]
Testing Epoch 160/200: 100%|██████████| 40/40 [00:00<00:00, 55.98it/s]


Epoch 160/200:
    Training Loss: 0.7698 | Training Acc: 88.70%
    Testing Loss: 0.6792  | Testing Acc: 92.46%


Training Epoch 161/200: 100%|██████████| 196/196 [00:12<00:00, 15.44it/s]
Testing Epoch 161/200: 100%|██████████| 40/40 [00:00<00:00, 55.16it/s]


Epoch 161/200:
    Training Loss: 0.7705 | Training Acc: 88.81%
    Testing Loss: 0.6654  | Testing Acc: 93.00%


Training Epoch 162/200: 100%|██████████| 196/196 [00:12<00:00, 15.48it/s]
Testing Epoch 162/200: 100%|██████████| 40/40 [00:00<00:00, 54.33it/s]


Epoch 162/200:
    Training Loss: 0.7673 | Training Acc: 89.03%
    Testing Loss: 0.6758  | Testing Acc: 92.42%


Training Epoch 163/200: 100%|██████████| 196/196 [00:12<00:00, 15.59it/s]
Testing Epoch 163/200: 100%|██████████| 40/40 [00:00<00:00, 56.06it/s]


Epoch 163/200:
    Training Loss: 0.7656 | Training Acc: 88.89%
    Testing Loss: 0.6739  | Testing Acc: 92.54%


Training Epoch 164/200: 100%|██████████| 196/196 [00:13<00:00, 15.02it/s]
Testing Epoch 164/200: 100%|██████████| 40/40 [00:00<00:00, 56.31it/s]


Epoch 164/200:
    Training Loss: 0.7631 | Training Acc: 89.01%
    Testing Loss: 0.6726  | Testing Acc: 92.74%


Training Epoch 165/200: 100%|██████████| 196/196 [00:12<00:00, 15.52it/s]
Testing Epoch 165/200: 100%|██████████| 40/40 [00:00<00:00, 54.59it/s]


Epoch 165/200:
    Training Loss: 0.7612 | Training Acc: 89.19%
    Testing Loss: 0.6717  | Testing Acc: 92.39%


Training Epoch 166/200: 100%|██████████| 196/196 [00:13<00:00, 15.05it/s]
Testing Epoch 166/200: 100%|██████████| 40/40 [00:00<00:00, 55.84it/s]


Epoch 166/200:
    Training Loss: 0.7571 | Training Acc: 89.31%
    Testing Loss: 0.6642  | Testing Acc: 93.04%


Training Epoch 167/200: 100%|██████████| 196/196 [00:12<00:00, 15.38it/s]
Testing Epoch 167/200: 100%|██████████| 40/40 [00:00<00:00, 56.08it/s]


Epoch 167/200:
    Training Loss: 0.7595 | Training Acc: 89.29%
    Testing Loss: 0.6597  | Testing Acc: 93.05%


Training Epoch 168/200: 100%|██████████| 196/196 [00:12<00:00, 15.15it/s]
Testing Epoch 168/200: 100%|██████████| 40/40 [00:00<00:00, 53.64it/s]


Epoch 168/200:
    Training Loss: 0.7555 | Training Acc: 89.26%
    Testing Loss: 0.6579  | Testing Acc: 93.00%


Training Epoch 169/200: 100%|██████████| 196/196 [00:13<00:00, 15.06it/s]
Testing Epoch 169/200: 100%|██████████| 40/40 [00:00<00:00, 55.01it/s]


Epoch 169/200:
    Training Loss: 0.7563 | Training Acc: 89.25%
    Testing Loss: 0.6755  | Testing Acc: 92.59%


Training Epoch 170/200: 100%|██████████| 196/196 [00:12<00:00, 15.47it/s]
Testing Epoch 170/200: 100%|██████████| 40/40 [00:00<00:00, 56.58it/s]


Epoch 170/200:
    Training Loss: 0.7533 | Training Acc: 89.42%
    Testing Loss: 0.6633  | Testing Acc: 93.19%


Training Epoch 171/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 171/200: 100%|██████████| 40/40 [00:00<00:00, 56.34it/s]


Epoch 171/200:
    Training Loss: 0.7529 | Training Acc: 89.32%
    Testing Loss: 0.6624  | Testing Acc: 92.85%


Training Epoch 172/200: 100%|██████████| 196/196 [00:12<00:00, 15.54it/s]
Testing Epoch 172/200: 100%|██████████| 40/40 [00:00<00:00, 56.63it/s]


Epoch 172/200:
    Training Loss: 0.7447 | Training Acc: 89.76%
    Testing Loss: 0.6612  | Testing Acc: 93.18%


Training Epoch 173/200: 100%|██████████| 196/196 [00:12<00:00, 15.49it/s]
Testing Epoch 173/200: 100%|██████████| 40/40 [00:00<00:00, 56.07it/s]


Epoch 173/200:
    Training Loss: 0.7456 | Training Acc: 89.70%
    Testing Loss: 0.6729  | Testing Acc: 92.86%


Training Epoch 174/200: 100%|██████████| 196/196 [00:12<00:00, 15.50it/s]
Testing Epoch 174/200: 100%|██████████| 40/40 [00:00<00:00, 56.38it/s]


Epoch 174/200:
    Training Loss: 0.7456 | Training Acc: 89.71%
    Testing Loss: 0.6525  | Testing Acc: 93.69%


Training Epoch 175/200: 100%|██████████| 196/196 [00:12<00:00, 15.51it/s]
Testing Epoch 175/200: 100%|██████████| 40/40 [00:00<00:00, 55.97it/s]


Epoch 175/200:
    Training Loss: 0.7393 | Training Acc: 89.94%
    Testing Loss: 0.6632  | Testing Acc: 93.08%


Training Epoch 176/200: 100%|██████████| 196/196 [00:12<00:00, 15.20it/s]
Testing Epoch 176/200: 100%|██████████| 40/40 [00:00<00:00, 55.80it/s]


Epoch 176/200:
    Training Loss: 0.7395 | Training Acc: 90.13%
    Testing Loss: 0.6609  | Testing Acc: 93.35%


Training Epoch 177/200: 100%|██████████| 196/196 [00:12<00:00, 15.57it/s]
Testing Epoch 177/200: 100%|██████████| 40/40 [00:00<00:00, 56.20it/s]


Epoch 177/200:
    Training Loss: 0.7377 | Training Acc: 90.02%
    Testing Loss: 0.6593  | Testing Acc: 93.32%


Training Epoch 178/200: 100%|██████████| 196/196 [00:13<00:00, 14.94it/s]
Testing Epoch 178/200: 100%|██████████| 40/40 [00:00<00:00, 55.59it/s]


Epoch 178/200:
    Training Loss: 0.7370 | Training Acc: 90.14%
    Testing Loss: 0.6550  | Testing Acc: 93.53%


Training Epoch 179/200: 100%|██████████| 196/196 [00:12<00:00, 15.60it/s]
Testing Epoch 179/200: 100%|██████████| 40/40 [00:00<00:00, 56.29it/s]


Epoch 179/200:
    Training Loss: 0.7315 | Training Acc: 90.34%
    Testing Loss: 0.6513  | Testing Acc: 93.82%


Training Epoch 180/200: 100%|██████████| 196/196 [00:12<00:00, 15.21it/s]
Testing Epoch 180/200: 100%|██████████| 40/40 [00:00<00:00, 57.88it/s]


Epoch 180/200:
    Training Loss: 0.7373 | Training Acc: 90.06%
    Testing Loss: 0.6461  | Testing Acc: 93.82%


Training Epoch 181/200: 100%|██████████| 196/196 [00:12<00:00, 15.15it/s]
Testing Epoch 181/200: 100%|██████████| 40/40 [00:00<00:00, 56.59it/s]


Epoch 181/200:
    Training Loss: 0.7333 | Training Acc: 90.33%
    Testing Loss: 0.6554  | Testing Acc: 93.64%


Training Epoch 182/200: 100%|██████████| 196/196 [00:12<00:00, 15.52it/s]
Testing Epoch 182/200: 100%|██████████| 40/40 [00:00<00:00, 56.30it/s]


Epoch 182/200:
    Training Loss: 0.7292 | Training Acc: 90.36%
    Testing Loss: 0.6578  | Testing Acc: 93.17%


Training Epoch 183/200: 100%|██████████| 196/196 [00:12<00:00, 15.51it/s]
Testing Epoch 183/200: 100%|██████████| 40/40 [00:00<00:00, 53.25it/s]


Epoch 183/200:
    Training Loss: 0.7250 | Training Acc: 90.58%
    Testing Loss: 0.6541  | Testing Acc: 93.58%


Training Epoch 184/200: 100%|██████████| 196/196 [00:13<00:00, 15.07it/s]
Testing Epoch 184/200: 100%|██████████| 40/40 [00:00<00:00, 56.71it/s]


Epoch 184/200:
    Training Loss: 0.7263 | Training Acc: 90.64%
    Testing Loss: 0.6485  | Testing Acc: 93.69%


Training Epoch 185/200: 100%|██████████| 196/196 [00:13<00:00, 14.83it/s]
Testing Epoch 185/200: 100%|██████████| 40/40 [00:00<00:00, 56.80it/s]


Epoch 185/200:
    Training Loss: 0.7250 | Training Acc: 90.65%
    Testing Loss: 0.6464  | Testing Acc: 93.76%


Training Epoch 186/200: 100%|██████████| 196/196 [00:12<00:00, 15.48it/s]
Testing Epoch 186/200: 100%|██████████| 40/40 [00:00<00:00, 54.43it/s]


Epoch 186/200:
    Training Loss: 0.7169 | Training Acc: 91.05%
    Testing Loss: 0.6521  | Testing Acc: 93.62%


Training Epoch 187/200: 100%|██████████| 196/196 [00:12<00:00, 15.50it/s]
Testing Epoch 187/200: 100%|██████████| 40/40 [00:00<00:00, 53.48it/s]


Epoch 187/200:
    Training Loss: 0.7171 | Training Acc: 91.17%
    Testing Loss: 0.6424  | Testing Acc: 94.04%


Training Epoch 188/200: 100%|██████████| 196/196 [00:12<00:00, 15.50it/s]
Testing Epoch 188/200: 100%|██████████| 40/40 [00:00<00:00, 55.50it/s]


Epoch 188/200:
    Training Loss: 0.7205 | Training Acc: 90.93%
    Testing Loss: 0.6447  | Testing Acc: 93.76%


Training Epoch 189/200: 100%|██████████| 196/196 [00:13<00:00, 15.03it/s]
Testing Epoch 189/200: 100%|██████████| 40/40 [00:00<00:00, 56.51it/s]


Epoch 189/200:
    Training Loss: 0.7092 | Training Acc: 91.36%
    Testing Loss: 0.6459  | Testing Acc: 93.74%


Training Epoch 190/200: 100%|██████████| 196/196 [00:12<00:00, 15.46it/s]
Testing Epoch 190/200: 100%|██████████| 40/40 [00:00<00:00, 55.69it/s]


Epoch 190/200:
    Training Loss: 0.7085 | Training Acc: 91.33%
    Testing Loss: 0.6384  | Testing Acc: 94.27%


Training Epoch 191/200: 100%|██████████| 196/196 [00:12<00:00, 15.57it/s]
Testing Epoch 191/200: 100%|██████████| 40/40 [00:00<00:00, 57.18it/s]


Epoch 191/200:
    Training Loss: 0.7103 | Training Acc: 91.28%
    Testing Loss: 0.6445  | Testing Acc: 93.98%


Training Epoch 192/200: 100%|██████████| 196/196 [00:12<00:00, 15.51it/s]
Testing Epoch 192/200: 100%|██████████| 40/40 [00:00<00:00, 56.75it/s]


Epoch 192/200:
    Training Loss: 0.7080 | Training Acc: 91.37%
    Testing Loss: 0.6454  | Testing Acc: 93.90%


Training Epoch 193/200:  21%|██        | 41/196 [00:03<00:12, 12.79it/s]