In [1]:
import torch
import torch.nn as nn
import torchvision
from torchinfo import summary         
import numpy as np
import torchvision.transforms as tf
import transformers                    
from tensorboardX import SummaryWriter 
from pkg_resources import packaging    
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from openTSNE import TSNE              
import pandas as pd
import seaborn as sns

from tqdm import tqdm
import random
import re
import time
import math

  from .autonotebook import tqdm as notebook_tqdm


set seed

In [2]:
seed = 2023
random.seed(seed)  
np.random.seed(seed)  
torch.manual_seed(seed) 
torch.cuda.manual_seed(seed)  
torch.cuda.manual_seed_all(seed) 
torch.backends.cudnn.benchmark = False  
torch.backends.cudnn.deterministic = True  

load data

In [3]:
train_set = torchvision.datasets.CIFAR10("./data", train=True, download=True)
test_set = torchvision.datasets.CIFAR10("./data", train=False, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
simple_tf = tf.Compose([
    tf.PILToTensor(),
    tf.ConvertImageDtype(torch.float),
    tf.Resize((224, 224)),# interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
    tf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

train_tf = tf.Compose([
    tf.PILToTensor(),
    tf.ConvertImageDtype(torch.float),
    tf.RandomHorizontalFlip(),
    tf.Resize((224, 224)),# interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
    tf.RandomErasing(),
    tf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

In [5]:
train_set = torchvision.datasets.CIFAR10("./data", train=True, download=False, transform=train_tf)
test_set = torchvision.datasets.CIFAR10("./data", train=False, download=False, transform=simple_tf)

In [6]:
if packaging.version.parse(torchvision.__version__) < packaging.version.parse("0.13"):
    new_version = False
else:
    new_version = True
print(f"torchvision version: {torchvision.__version__}")

torchvision version: 0.14.1+cpu


load resnet model

In [7]:
def build_model():
    if new_version:
        model = torchvision.models.resnet50(weights="IMAGENET1K_V2")
    else:
        model = torchvision.models.resnet50(pretrained=True)
    model.fc = nn.Linear(2048, 10)
    nn.init.xavier_uniform_(model.fc.weight)
    return model
model = build_model()
summary(model)
# for k,v in model.named_parameters():
#     print(f"{k}:{v.shape}:{v.requires_grad}")

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─Bottleneck: 2-1                   --
│    │    └─Conv2d: 3-1                  4,096
│    │    └─BatchNorm2d: 3-2             128
│    │    └─Conv2d: 3-3                  36,864
│    │    └─BatchNorm2d: 3-4             128
│    │    └─Conv2d: 3-5                  16,384
│    │    └─BatchNorm2d: 3-6             512
│    │    └─ReLU: 3-7                    --
│    │    └─Sequential: 3-8              16,896
│    └─Bottleneck: 2-2                   --
│    │    └─Conv2d: 3-9                  16,384
│    │    └─BatchNorm2d: 3-10            128
│    │    └─Conv2d: 3-11                 36,864
│    │    └─BatchNorm2d: 3-12            128
│    │    └─Conv2d: 3-13               

set optimizer

In [8]:
def freeze_backbone(pt_model):
    for k,v in model.named_parameters():
        if not k.startswith('fc.'):
            v.requires_grad = False

In [9]:
def _get_resnet_name_to_layer(pt_model):
    name_to_layer = {}
    for k,v in model.named_parameters():
        if k.startswith('conv1.') or k.startswith('bn1.'):
            name_to_layer[k] = 0
        elif k.startswith('layer1.'):
            name_to_layer[k] = 1
        elif k.startswith('layer2.'):
            name_to_layer[k] = 2
        elif k.startswith('layer3.'):
            name_to_layer[k] = 3
        elif k.startswith('layer4.'):
            name_to_layer[k] = 4
        elif k.startswith('fc.'):
            name_to_layer[k] = 5
        else:
            print("ERROR")
    return name_to_layer, 5

def _get_resnet_no_decay_param_names(pt_model):
    no_decay_param_names = []
    for k,v in model.named_parameters():
        if re.search("\.?bn[0-9][\.]", k) is not None:  
            no_decay_param_names.append(k)
            #print(k)
        elif k.endswith('.bias'):
            no_decay_param_names.append(k)
            #print(k)
    return set(no_decay_param_names)

In [10]:
def get_optim(pt_model, optim_name="adam", lr=1e-5, weight_decay=0.01,
              filter_bias_and_bn=True, lr_decay_factor=None):
    param_groups = []
    if lr_decay_factor is not None:
        name_to_layer, num_layers = _get_resnet_name_to_layer(pt_model)
        layer_scales = list(lr_decay_factor ** (num_layers - i) for i in range(num_layers + 1))
    if filter_bias_and_bn is True and weight_decay != 0.0:
        no_decay_param_names = _get_resnet_no_decay_param_names(pt_model)
        for k, v in pt_model.named_parameters():
            if k in no_decay_param_names:
                param_groups.append({'params': v, 'lr': lr * layer_scales[name_to_layer[k]]})
            else:
                param_groups.append({'params': v, 'lr': lr * layer_scales[name_to_layer[k]], 'weight_decay': weight_decay})
    else:
        for k, v in pt_model.named_parameters():
            param_groups.append({'params': v, 'lr': lr * layer_scales[name_to_layer[k]], 'weight_decay': weight_decay})

    if optim_name.lower() == 'adam':
        optimizer = torch.optim.AdamW(param_groups, lr=lr)
    elif optim_name.lower() == 'sgd':
        optimizer = torch.optim.SGD(param_groups, lr=lr)
        
    return optimizer

def get_scheduler(optim, num_warmup_steps, num_training_steps):
    return transformers.get_linear_schedule_with_warmup(optim, num_warmup_steps, num_training_steps)

train

In [11]:
def _cal_accuracy(y_hat, y):
    y_hat = torch.argmax(y_hat, dim=1)
    accuracy = (y == y_hat).float().sum() / y.shape[0]
    return accuracy

def _log_lr(writer, optimizer, step):
    unique_lr_list = list(set([i['lr'] for i in optimizer.state_dict()['param_groups']]))
    unique_lr_list.sort()
    writer.add_scalars('Learning_Rates', {str(i): lr for i, lr in enumerate(unique_lr_list)}, step)
    

def train(model, dataloaders, optimizer, scheduler, **kwargs):
    train_loader, test_loader = dataloaders
    device = kwargs['device']
    writer = SummaryWriter(kwargs['logger_name'])
    loss = nn.CrossEntropyLoss().to(device)
    model = model.to(device)
    
    for epoch in range(kwargs['max_epochs']):
        ########  Train  ########
        loss_list, acc_list = [], []
        model.train()
        last_time = time.time()
        for local_step, (x, y) in enumerate(train_loader):
            step = epoch * len(train_loader) + local_step
            x, y = x.to(device), y.to(device)
            
            optimizer.zero_grad()
            logits = model(x)
            l = loss(logits, y)
            acc = _cal_accuracy(logits, y)
            l.backward()
            optimizer.step()
            scheduler.step()
            
            # log
            loss_list.append(l.detach().cpu().item())
            acc_list.append(acc.detach().cpu().item())
            if (local_step % 100 == 0 and local_step != 0) or local_step == len(train_loader) - 1:
                print("Epoch {}/{} | Step {}/{} | loss:{:.5f} accuracy: {:.4f} time: {:.1f}s".format(
                    epoch, kwargs['max_epochs'], local_step, len(train_loader),
                    sum(loss_list)/len(loss_list), sum(acc_list)/len(acc_list),
                    time.time() - last_time
                ))
                last_time = time.time()
            writer.add_scalar('Train/Loss', l, step)
            writer.add_scalar('Train/Acc', acc, step)
            writer.add_scalar('Epoch', epoch, step)
            if kwargs['log_lr'] is True and local_step % 200 == 0:
                _log_lr(writer, optimizer, step)
        ########  Test  ########
        loss_list, acc_list = [], []
        print("-"*20 + "   Testing   " + "-"*20)
        model.eval()
        with torch.no_grad():
            for local_step, (x, y) in enumerate(test_loader):
                x, y = x.to(device), y.to(device)
                logits = model(x)
                l = loss(logits, y)
                acc = _cal_accuracy(logits, y)
                loss_list.append(l.cpu().item())
                acc_list.append(acc.cpu().item())
            # log
            print("Epoch {}/{} | loss:{:.5f} accuracy: {:.4f}".format(
                epoch, kwargs['max_epochs'],
                sum(loss_list)/len(loss_list), sum(acc_list)/len(acc_list),
            ))
            writer.add_scalar('Test/Loss', sum(loss_list)/len(loss_list), epoch * len(train_loader))
            writer.add_scalar('Test/Acc', sum(acc_list)/len(acc_list), epoch * len(train_loader))
        print("=" * 53)
        

In [12]:
batch_size = 64
device = torch.device('cuda')
max_epochs = 20
lr = 5e-6
weight_decay = 0.05
num_warmup_steps = 3 * math.ceil(len(train_set) / batch_size)
num_training_steps = max_epochs * math.ceil(len(train_set) / batch_size)
lr_decay_factor = .75
filter_bias_and_bn = True
optim_name = 'adam'
log_lr = True
only_train_fc = False
logger_name = "ResNet-50_v1"

In [13]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
optimizer = get_optim(model, lr_decay_factor=lr_decay_factor)
scheduler = get_scheduler(optimizer, num_warmup_steps, num_training_steps)
if only_train_fc is True:
    freeze_backbone(model)

train(model, (train_dataloader, test_dataloader), optimizer, scheduler,
      device=device, max_epochs=max_epochs, log_lr=log_lr, logger_name=logger_name)

Epoch 0/20 | Step 100/782 | loss:2.34350 accuracy: 0.1151 time: 46.3s
Epoch 0/20 | Step 200/782 | loss:2.33771 accuracy: 0.1173 time: 41.6s
Epoch 0/20 | Step 300/782 | loss:2.33127 accuracy: 0.1218 time: 42.3s
Epoch 0/20 | Step 400/782 | loss:2.32070 accuracy: 0.1290 time: 42.0s
Epoch 0/20 | Step 500/782 | loss:2.30730 accuracy: 0.1372 time: 42.3s
Epoch 0/20 | Step 600/782 | loss:2.29198 accuracy: 0.1495 time: 43.7s
Epoch 0/20 | Step 700/782 | loss:2.27284 accuracy: 0.1639 time: 43.1s
Epoch 0/20 | Step 781/782 | loss:2.25371 accuracy: 0.1792 time: 34.1s
--------------------   Testing   --------------------
Epoch 0/20 | loss:2.01291 accuracy: 0.3780
Epoch 1/20 | Step 100/782 | loss:1.99025 accuracy: 0.3838 time: 43.6s
Epoch 1/20 | Step 200/782 | loss:1.93135 accuracy: 0.4146 time: 43.0s
Epoch 1/20 | Step 300/782 | loss:1.85460 accuracy: 0.4547 time: 42.5s
Epoch 1/20 | Step 400/782 | loss:1.76305 accuracy: 0.4911 time: 42.5s
Epoch 1/20 | Step 500/782 | loss:1.66940 accuracy: 0.5235 time:

save model

In [14]:
torch.save(model, 'model_classification_resnet50.pth')