In [1]:

import torch
from torch import nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from einops import rearrange, repeat
from einops.layers.torch import Rearrange
import logging

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# log 
# record the process of training
logging.basicConfig(
    filename='res.log', 
    level=logging.INFO,           
    format='%(asctime)s %(message)s',  
)
logger_all = logging.getLogger("logger_all")

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out



class ResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet, self).__init__()
        self.li = [128, 256, 256, 512, 512]
        self.conv1 = nn.Conv2d(3, self.li[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.li[0])
        blks = []
        for i in range(1,len(self.li)):
            
            blk = ResBlock(self.li[i-1],self.li[i-1], stride=1)
            blks.append(blk)
            blk = ResBlock(self.li[i-1],self.li[i])
            blks.append(blk)
        self.blocks = nn.Sequential(*blks)
        self.fc = nn.Linear(self.li[-1], num_classes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.blocks(out)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out





In [4]:

# reshape image size and transform image into tensor
transform = transforms.Compose([
    # transforms.Resize((32, 32)), 
    transforms.ToTensor(), 
])

# train dataset and dataloader
# use CIFAR-10
train_batch = 1024
trainset = torchvision.datasets.CIFAR10(root="./cifar",
                                         train=True,
                                         download=True,
                                         transform=transform)
# num_worker should be same to the number of CPU core
trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size=train_batch, 
    shuffle=True, 
    num_workers=16)


# test dataset and dataloader
# use CIFAR-10
test_batch = 1024
testset = torchvision.datasets.CIFAR10(root="./cifar",
                                        train=False,
                                        download=True,
                                        transform=transform)
# num_worker should be same to the number of CPU core
testloader = torch.utils.data.DataLoader(
    testset, 
    batch_size=test_batch, 
    shuffle=False, 
    num_workers=16)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
model = ResNet().to(device)


In [6]:

# Loss function for classification
criterion = nn.CrossEntropyLoss()

lr = 5e-4
optimizer = optim.Adam(model.parameters(), lr=lr)

# Training loop
num_epochs = 500
for epoch in range(num_epochs):
    # Set model to training mode
    model.train()
    
    # running loss for each epoch
    running_loss = 0.0
    
    # counter of processed items for each epoch 
    cnt = 0
    
    for i, (images, labels) in enumerate(trainloader):
        
        # Move data to GPU
        images = images.to(device)
        labels = labels.to(device)

        # classification result
        outputs = model(images)
        
        # loss
        loss = criterion(outputs, labels)

        # optimize model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # count running loss and processed items
        running_loss += loss.item()
        b = images.shape[0]
        cnt += b
        
        # log message for each iteration
        msg = f'Epoch [{epoch + 1}/{num_epochs}], Item [{cnt}/{len(trainset)}], Loss: {running_loss / (i+1):.4f}'
        logger_all.info(msg)
        print(msg)
    

    # log message for each epoch
    msg = f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}'
    logger_all.info(msg)
    print(msg)
    
    # save checkpoint
    if (epoch + 1)%5==0:
        model_save_path = f'./ckpt/res/res_{epoch+1}.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f'Model parameters saved to {model_save_path}')

model_save_path = f'./ckpt/res/res_last.pth'
torch.save(model.state_dict(), model_save_path)
print(f'Model parameters saved to {model_save_path}')

Epoch [1/500], Item [1024/50000], Loss: 2.3783
Epoch [1/500], Item [2048/50000], Loss: 2.3820
Epoch [1/500], Item [3072/50000], Loss: 2.3305
Epoch [1/500], Item [4096/50000], Loss: 2.2740
Epoch [1/500], Item [5120/50000], Loss: 2.2180
Epoch [1/500], Item [6144/50000], Loss: 2.1628
Epoch [1/500], Item [7168/50000], Loss: 2.1044
Epoch [1/500], Item [8192/50000], Loss: 2.0588
Epoch [1/500], Item [9216/50000], Loss: 2.0194
Epoch [1/500], Item [10240/50000], Loss: 1.9858
Epoch [1/500], Item [11264/50000], Loss: 1.9591
Epoch [1/500], Item [12288/50000], Loss: 1.9331
Epoch [1/500], Item [13312/50000], Loss: 1.9041
Epoch [1/500], Item [14336/50000], Loss: 1.8797
Epoch [1/500], Item [15360/50000], Loss: 1.8588
Epoch [1/500], Item [16384/50000], Loss: 1.8357
Epoch [1/500], Item [17408/50000], Loss: 1.8162
Epoch [1/500], Item [18432/50000], Loss: 1.8007
Epoch [1/500], Item [19456/50000], Loss: 1.7816
Epoch [1/500], Item [20480/50000], Loss: 1.7644
Epoch [1/500], Item [21504/50000], Loss: 1.7481
E

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7fbe63a4c2e0>>
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 

KeyboardInterrupt



In [7]:
# Evaluation for models at different epoches
res = []
mat = torch.zeros((10, 10))
for e in range(5,25+5,5):
    print(e)
    model = ResNet().to(device)
    
    # load checkpoint
    model_save_path = f'./ckpt/res/res_{e}.pth'
    model.load_state_dict(torch.load(model_save_path))
    
    # mode model to GPU
    model = model.to(device)
    
    # Set model to evaluation mode
    model.eval() 

    # Accuracy calculation
    # counter for test
    total = 0
    correct = 0
    with torch.no_grad():
        for images, labels in testloader:
            
            # Move data to GPU
            images = images.to(device)
            labels = labels.to(device)
            
            # classification results
            outputs = model(images)
            
            # predicted class label
            _, predicted = torch.max(outputs.data, 1)
            
            # update counters
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            b = torch.arange(images.shape[0])
            add = torch.zeros((images.shape[0], 10, 10))
            add[b,labels,predicted] = 1
            mat += add.sum(0, keepdims=False)
    
    # get test result for an epoch
    accuracy = 100 * correct / total
    res.append(accuracy)
    print(accuracy, "%")
    

5
73.27 %
10
78.17 %
15
79.69 %
20
80.29 %
25
80.47 %


In [None]:
from thop import profile
model = ResNet()
x = torch.randn(1, 3, 32, 32)
# macs, params = profile(model, inputs=(input, ))
flops, params = profile(model, inputs=(x,))
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

In [7]:
model = ResNet().to(device)

# Evaluation for models at different epoches

mat = torch.zeros((10, 10))


# load checkpoint
model_save_path = f'./ckpt/res/res_25.pth'
model.load_state_dict(torch.load(model_save_path))

# mode model to GPU
model = model.to(device)

# Set model to evaluation mode
model.eval() 

# Accuracy calculation
# counter for test
total = 0
correct = 0
with torch.no_grad():
    for images, labels in testloader:
        
        # Move data to GPU
        images = images.to(device)
        labels = labels.to(device)
        
        # classification results
        outputs = model(images)
        
        # predicted class label
        _, predicted = torch.max(outputs.data, 1)
        
        # update counters
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        b = torch.arange(images.shape[0])
        add = torch.zeros((images.shape[0], 10, 10))
        add[b,labels,predicted] = 1
        mat += add.sum(0, keepdims=False)

# get test result for an epoch
accuracy = 100 * correct / total
print(accuracy, "%")
print(mat)

80.47 %
tensor([[866.,   9.,  26.,  13.,  15.,   9.,   3.,   9.,  34.,  16.],
        [ 12., 907.,   0.,   3.,   3.,   3.,   5.,   0.,  24.,  43.],
        [ 60.,   6., 684.,  51.,  45.,  69.,  31.,  40.,   6.,   8.],
        [ 13.,   2.,  35., 636.,  44., 165.,  40.,  46.,  12.,   7.],
        [  8.,   2.,  53.,  45., 766.,  41.,  14.,  63.,   6.,   2.],
        [  5.,   2.,  12., 124.,  25., 778.,   7.,  41.,   1.,   5.],
        [  4.,   3.,  31.,  65.,  29.,  33., 804.,   9.,  14.,   8.],
        [ 14.,   2.,  10.,  17.,  32.,  49.,   1., 870.,   2.,   3.],
        [ 64.,  11.,   8.,   7.,   3.,   5.,   2.,   7., 878.,  15.],
        [ 28.,  56.,   5.,   7.,   2.,   3.,   4.,   9.,  28., 858.]])


In [8]:
model = ResNet().to(device)

# Evaluation for models at different epoches

mat = torch.zeros((10, 10))


# load checkpoint
model_save_path = f'./ckpt/res/res_25.pth'
model.load_state_dict(torch.load(model_save_path))

# mode model to GPU
model = model.to(device)

# Set model to evaluation mode
model.eval() 

# Accuracy calculation
# counter for test
total = 0
correct = 0
with torch.no_grad():
    for images, labels in testloader:
        
        # Move data to GPU
        images = images.to(device)[:100]
        
        # classification results
        outputs = model(images)
        
        # predicted class label
        _, predicted = torch.max(outputs.data, 1)
        
        break
print(predicted)

tensor([3, 8, 8, 0, 6, 6, 1, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8, 6, 7, 0, 4, 9,
        4, 2, 4, 0, 9, 6, 6, 5, 4, 5, 9, 1, 4, 9, 9, 5, 4, 6, 7, 6, 0, 9, 3, 9,
        7, 2, 9, 8, 3, 5, 8, 8, 7, 7, 5, 4, 7, 5, 6, 3, 6, 2, 1, 2, 5, 7, 2, 3,
        8, 8, 0, 2, 9, 3, 3, 0, 8, 8, 1, 7, 2, 7, 7, 0, 8, 9, 0, 5, 8, 6, 4, 6,
        3, 0, 0, 7], device='cuda:0')
