In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from dongnet import dongnet12

In [2]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available. Training on CPU ...')
else:
    print('CUDA is available! Training on GPU ...')

CUDA is not available. Training on CPU ...


# 모델 선언 및 불러오기

In [10]:
model = dongnet12()
model_dict = torch.load('model.pth', map_location=torch.device('cpu'))  # 상태 사전 로드
model.load_state_dict(model_dict)  # 모델에 상태 사전 로드

for name, param in model.named_parameters():
    print(name)


conv1.weight
batchnorm1.weight
batchnorm1.bias
conv2.weight
batchnorm2.weight
batchnorm2.bias
conv3.weight
batchnorm3.weight
batchnorm3.bias
conv4.weight
batchnorm4.weight
batchnorm4.bias
conv5.weight
batchnorm5.weight
batchnorm5.bias
conv6.weight
batchnorm6.weight
batchnorm6.bias
conv7.weight
batchnorm7.weight
batchnorm7.bias
conv8.weight
batchnorm8.weight
batchnorm8.bias
conv9.weight
batchnorm9.weight
batchnorm9.bias
linear_relu.0.weight
linear_relu.0.bias
linear_relu.3.weight
linear_relu.3.bias
linear_relu.6.weight
linear_relu.6.bias


# Quantize 함수

In [4]:
def quantize(X, NBIT):
    # 1. find threshold
    alpha = np.max(X)
    beta = np.min(X)
    alpha_q = -2**(NBIT - 1)
    beta_q = 2**(NBIT - 1) - 1

    s = (beta - alpha) / (beta_q - alpha_q)
    z = int((beta*alpha_q - alpha * beta_q) / (beta - alpha))

    data_q = np.round(1/s * X + z, decimals=0)
    data_q = np.clip(data_q, alpha_q, beta_q)    
    data_q = data_q.astype(np.int8)
        
    data_qn = data_q
    data_qn = data_qn.astype(np.int32)
    data_qn = s * (data_qn - z)
    data_qn = data_qn.astype(np.float32)
    
    return data_q

# Weight quantize 함수

In [5]:
def weightquantize(NBIT=8):
    #quantize시킬 모듈 리스트
    quant_module=[nn.Linear, nn.Conv2d, nn.BatchNorm2d]
    #quantize시킬 모듈 튜플
    quant_module_tuple=tuple(quant_module)
    #quntize 모듈에 해당하는 layer 리스트 선언
    quant_layer = []

    #quantize할 layer 추출
    for name, module in model.named_modules():
        if isinstance(module, quant_module_tuple):
            quant_layer.append(name)
            
    for name, param in model.named_parameters():
        convbatchlayer = '.'.join(name.split('.')[:1])
        linearlayer = '.'.join(name.split('.')[:2])
        if convbatchlayer in quant_layer:
            weight = param.cpu().detach().numpy()
            dqn = quantize(weight, NBIT)
            param.data = torch.from_numpy(dqn)
        elif linearlayer in quant_layer:
            weight = param.cpu().detach().numpy()
            dqn = quantize(weight, NBIT)
            param.data = torch.from_numpy(dqn)
    
    print("{}bit quantize Complete".format(NBIT))

# evaluate 함수

In [6]:
def evaluate_model(model, test_loader, device, NBIT):
    #batch norm 고정, dropout 안함, gradient 계산안함
    model.eval()
    #model 파라미터를 지정한 device 메모리에 올림
    model.to(device)

    running_corrects = 0
    criterion = nn.CrossEntropyLoss()

    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        #텐서의 최대값과 index를 반환
        _, preds = torch.max(outputs, 1)

        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0

        # inputs.size(0) 현재 mini-batch의 크기(input의 0번째 dimension 크기)
        running_corrects += torch.sum(preds == labels.data)
    
    eval_accuracy = running_corrects / len(test_loader.dataset)

    print("{}bit model cifar100 Accuracy : {:.4f}".format(NBIT, eval_accuracy))

# test data 가져오기

In [7]:
test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=test_transform)
test_sampler = torch.utils.data.SequentialSampler(test_set)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set, batch_size=128,
    sampler=test_sampler, num_workers=8)

Files already downloaded and verified


# 모델 인퍼런스

In [8]:
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=32)

32bit model cifar100 Accuracy : 0.6911


# quantize 모델 인퍼런스

In [9]:
weightquantize(NBIT=8)
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=8)


8bit quantize Complete
8bit model cifar100 Accuracy : 0.0100


In [None]:
torch.save(model.state_dict(), 'int8qmodel.pth')
weightquantize(NBIT=7)
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=7)
weightquantize(NBIT=6)
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=6)
weightquantize(NBIT=5)
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=5)
weightquantize(NBIT=4)
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=4)