In [1]:
import resnet
import utils
from pytorch_nndct.apis import torch_quantizer, dump_xmodel

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'

[0;32m[NNDCT_NOTE]: Loading NNDCT kernels...[0m


In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_data(batch_size, calib_size):
    # load the data
    train_transform = transforms.Compose(
        [
         transforms.RandomCrop(32, padding=4),
         transforms.RandomHorizontalFlip(),
         transforms.ToTensor(),
         transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))])

    test_transform = transforms.Compose(
        [
         transforms.ToTensor(),
         transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))])

    ds = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=train_transform)


    test_ds = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=test_transform)

    # split the training set and validation set
    torch.manual_seed(50)
    test_size = len(test_ds)
    val_size = 2000
    train_size = len(ds) - val_size - calib_size



    train_ds, val_ds, calib_ds = random_split(ds, [train_size, val_size, calib_size])
    
    calib_loader = torch.utils.data.DataLoader(calib_ds, batch_size=batch_size, shuffle=True, num_workers=4)

    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4)

    val_loader = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)

    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)
    
    return train_loader, val_loader, test_loader, calib_loader

In [3]:
checkpoint = torch.load('resnet20-vitis-e100.pth', map_location=torch.device('cpu'))

In [4]:
model = resnet.resnet20().cpu()
model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [5]:
def quant_and_evaluate(model, quant_mode, deploy=False):
    batch_size = 128
    calib_size = 1024
    if(deploy):
        batch_size = 1
        
    if(quant_mode == 'test'):
        calib_size = 1
    elif(quant_mode == 'calib'):
        calib_size = 1024
    
    quant_model = model
    print(f'batch_size: ${batch_size}, calib_size: ${calib_size}')
    if(quant_mode != 'float'):
        tmp_input = torch.randn([batch_size, 3, 32, 32])
        quantizer = torch_quantizer(quant_mode, model, (tmp_input), device=device, bitwidth=8)
        quant_model = quantizer.quant_model
    
    loss_fn = torch.nn.CrossEntropyLoss().to(device)
    train_loader, val_loader, test_loader, calib_loader = load_data(batch_size, calib_size)
    
    
    if quant_mode == 'calib':
        quantizer.fast_finetune(utils.evaluate, (quant_model, calib_loader, loss_fn))
    elif quant_mode == 'test':
        quantizer.load_ft_param()
    
    acc1_gen, acc5_gen, loss_gen = utils.evaluate(quant_model, test_loader, loss_fn)

    print('loss: %g' % (loss_gen))
    print('top-1 / top-5 accuracy: %g / %g' % (acc1_gen, acc5_gen))
    
    if quant_mode == 'calib':
        quantizer.export_quant_config()
    if deploy:
        quantizer.export_xmodel(deploy_check=False)


In [6]:
quant_and_evaluate(model, 'float', False)

batch_size: $128, calib_size: $1024
Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 79/79 [00:04<00:00, 16.63it/s]

loss: 0.00283016
top-1 / top-5 accuracy: 90.75 / 99.7





In [7]:
quant_and_evaluate(model, 'calib', False)

batch_size: $128, calib_size: $1024

[0;32m[NNDCT_NOTE]: Quantization calibration process start up...[0m

[0;32m[NNDCT_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[NNDCT_NOTE]: =>Parsing CifarResNet...[0m


  out = nn.functional.avg_pool2d(out, out.size()[3].item())



[0;32m[NNDCT_NOTE]: =>Doing weights equalization...[0m

[0;32m[NNDCT_NOTE]: =>Quantizable module is generated.(quantize_result/CifarResNet.py)[0m

[0;32m[NNDCT_NOTE]: =>Get module with quantization.[0m
Files already downloaded and verified
Files already downloaded and verified

[0;32m[NNDCT_NOTE]: =>Finetuning module parameters for better quantization accuracy... [0m


100%|██████████| 8/8 [00:00<00:00, 17.53it/s]
100%|██████████| 8/8 [00:26<00:00,  3.26s/it]



[0;32m[NNDCT_NOTE]: =>Exporting quant model parameters.(quantize_result/param.pth)[0m


100%|██████████| 79/79 [04:10<00:00,  3.17s/it]

loss: 0.00285211
top-1 / top-5 accuracy: 90.62 / 99.67

[0;32m[NNDCT_NOTE]: =>Exporting quant config.(quantize_result/quant_info.json)[0m





In [8]:
quant_and_evaluate(model, 'test', True)

batch_size: $1, calib_size: $1

[0;32m[NNDCT_NOTE]: Quantization test process start up...[0m

[0;32m[NNDCT_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[NNDCT_NOTE]: =>Parsing CifarResNet...[0m

[0;32m[NNDCT_NOTE]: =>Doing weights equalization...[0m

[0;32m[NNDCT_NOTE]: =>Quantizable module is generated.(quantize_result/CifarResNet.py)[0m

[0;32m[NNDCT_NOTE]: =>Get module with quantization.[0m
Files already downloaded and verified
Files already downloaded and verified

[0;32m[NNDCT_NOTE]: =>Loading quant model parameters.(quantize_result/param.pth)[0m


100%|██████████| 10000/10000 [01:08<00:00, 145.62it/s]

loss: 0.359618
top-1 / top-5 accuracy: 90.51 / 99.67

[0;32m[NNDCT_NOTE]: =>Converting to xmodel ...[0m






[0;32m[NNDCT_NOTE]: =>Successfully convert 'CifarResNet' to xmodel.(quantize_result/CifarResNet_int.xmodel)[0m
