In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from dongnet import dongnet12

In [3]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not abailable. Training on CPU...')
else:
    print('CUIDA is available! Training on GPU...')

CUDA is not abailable. Training on CPU...


# evaluate 함수

In [5]:
def evaluate_model(model, test_loader, device, NBIT):
    #batch norm 고정, dropout 안함, gradient 계산안함
    model.eval()
    #model 파라미터를 지정한 device 메모리에 올림
    model.to(device)

    running_corrects = 0
    criterion = nn.CrossEntropyLoss()

    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        #텐서의 최대값과 index를 반환
        _, preds = torch.max(outputs, 1)

        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0

        # inputs.size(0) 현재 mini-batch의 크기(input의 0번째 dimension 크기)
        running_corrects += torch.sum(preds == labels.data)
    
    eval_accuracy = running_corrects / len(test_loader.dataset)

    print("{}bit model cifar100 Accuracy : {:.4f}".format(NBIT, eval_accuracy))

# layer에 quantization 추가

In [15]:
class dongnet12q(nn.Module):
    
    def __init__(self):
        super(dongnet12q, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm1 = nn.BatchNorm2d(64)
        self.leaky_relu1 = nn.LeakyReLU(negative_slope=0.1)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.leaky_relu2 = nn.LeakyReLU(negative_slope=0.1)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm3 = nn.BatchNorm2d(128)
        self.leaky_relu3 = nn.LeakyReLU(negative_slope=0.1)
        
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm4 = nn.BatchNorm2d(128)
        self.leaky_relu4 = nn.LeakyReLU(negative_slope=0.1)

        self.conv5 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm5 = nn.BatchNorm2d(128)
        self.leaky_relu5 = nn.LeakyReLU(negative_slope=0.1)

        self.conv6 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm6 = nn.BatchNorm2d(256)
        self.leaky_relu6 = nn.LeakyReLU(negative_slope=0.1)   

        self.conv7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm7 = nn.BatchNorm2d(256)
        self.leaky_relu7 = nn.LeakyReLU(negative_slope=0.1)   

        self.conv8 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm8 = nn.BatchNorm2d(256)
        self.leaky_relu8 = nn.LeakyReLU(negative_slope=0.1)

        self.conv9 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.batchnorm9 = nn.BatchNorm2d(256)
        self.leaky_relu9 = nn.LeakyReLU(negative_slope=0.1)

        #self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.max_pool2d = nn.MaxPool2d(2, stride=2)
        self.linear_relu = nn.Sequential(
            nn.Linear(4096, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 100)
        )
        #self.softmax = nn.Softmax(dim=1)

        #self.relu = nn.ReLU()
    
    def quantize(self, X, NBIT=8):
        # 1. find threshold
        alpha = torch.max(X)
        beta = torch.min(X)
        alpha_q = -2**(NBIT - 1)
        beta_q = 2**(NBIT - 1) - 1

        s = (beta - alpha) / (beta_q - alpha_q)
        z = int((beta*alpha_q - alpha * beta_q) / (beta - alpha))

        data_q = torch.round(1/s * X + z, decimals=0)
        data_q = torch.clip(data_q, alpha_q, beta_q)    
        data_q = data_q.to(torch.int8)
        
        data_qn = data_q
        data_qn = data_qn.to(torch.int32)
        data_qn = s * (data_qn - z)
        data_qn = data_qn.to(torch.float32)
    
        return data_qn
        
    def forward(self, x):
        out = self.batchnorm1(self.conv1(x))
        out = self.leaky_relu1(out)
        out = self.quantize(out)

        out = self.batchnorm2(self.conv2(out))
        out = self.leaky_relu2(out)
        out = self.quantize(out)
        out = self.max_pool2d(out)

        out = self.batchnorm3(self.conv3(out))
        out = self.leaky_relu3(out)        
        out = self.quantize(out)

        out = self.batchnorm4(self.conv4(out))
        out = self.leaky_relu4(out)
        out = self.quantize(out)

        out = self.batchnorm5(self.conv5(out))
        out = self.leaky_relu5(out)
        out = self.quantize(out)
        out = self.max_pool2d(out)

        out = self.batchnorm6(self.conv6(out))
        out = self.leaky_relu6(out)
        out = self.quantize(out)

        out = self.batchnorm7(self.conv7(out))
        out = self.leaky_relu7(out)
        out = self.quantize(out)

        out = self.batchnorm8(self.conv8(out))
        out = self.leaky_relu8(out)
        out = self.quantize(out)

        out = self.batchnorm9(self.conv9(out))
        out = self.leaky_relu9(out)
        out = self.quantize(out)
        out = self.max_pool2d(out)

        #out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.linear_relu(out)
        #out = self.softmax(out)
        
        return out

In [16]:
test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=test_transform)
test_sampler = torch.utils.data.SequentialSampler(test_set)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set, batch_size=128,
    sampler=test_sampler, num_workers=8)

Files already downloaded and verified


In [17]:
model = dongnet12q()
model_dict = torch.load('int8qmodel.pth', map_location=torch.device('cpu'))  # 상태 사전 로드
model.load_state_dict(model_dict)  # 모델에 상태 사전 로드
evaluate_model(model=model, test_loader=test_loader, device='cpu', NBIT=32)

32bit model cifar100 Accuracy : 0.6901
