# Квантование с Pytorch


In [None]:
#!pip3 install torch==1.5.0 torchvision==1.6.0

import os
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader

import torch.quantization
from torch.quantization import QuantStub, DeQuantStub

import time

Загрузим данные MNIST для обучения и тестирования

In [None]:
args={}
args['batch_size']=500
args['test_batch_size']=500
args['epochs']=5  #The number of Epochs is the number of times you go through the full dataset.
args['lr']=0.005 #Learning rate is how fast it will decend.
args['seed']=1 #random seed
args['log_interval']=20
args['cuda']=True

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=args['test_batch_size'],
                                          shuffle=True, num_workers=16, pin_memory=True)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=args['test_batch_size'],
                                         shuffle=False, num_workers=16, pin_memory=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 35224429.57it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1132130.45it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 9426767.47it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 11658830.34it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Определите некоторые вспомогательные функции и классы, которые помогут нам отслеживать статистику и точность данных обучения/тестирования.

In [None]:
def print_size_of_model(model):
    """ Prints the real size of the model """
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

def load_model(quantized_model, model):
    """ Loads in the weights into an object meant for quantization """
    state_dict = model.state_dict()
    model = model.to('cpu')
    quantized_model.load_state_dict(state_dict)

In [None]:
def train(model, epoch, loss_fn, optimizer, train_loader):
    model.train()
    if  args['cuda']:
        model.cuda()
    else:
        model.to('cpu')
    for batch_idx, (data, target) in enumerate(train_loader):
        if args['cuda']:
            data, target = data.cuda(), target.cuda()

        optimizer.zero_grad()
        output = model(data)

        loss = loss_fn(output, target)

        loss.backward()

        optimizer.step()

        #Print out the loss periodically.
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, test_loader):
    model.eval()
    import time

    start = time.time()
    if  args['cuda']:
        model.cuda()
    else:
        model.to('cpu')

    correct = 0
    for data, target in test_loader:
        if args['cuda']:
            data, target = data.cuda(), target.cuda()
        #data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()



    end = time.time()
    acc = 100. * correct / len(test_loader.dataset)
    print('\nTest set:  Accuracy: {}/{} ({:.0f}%)  Time: {}\n'.format(correct, len(test_loader.dataset),acc, end - start))

    return acc


def train_and_eval(model, train_loader, test_loader):
    if args['cuda']:
        model.cuda()

    history = []
    loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
    optimizer = optim.Adam(model.parameters(), lr=args['lr'])
    for epoch in range(1, args['epochs'] + 1):

        train(model, epoch, loss_fn, optimizer, train_loader)
        acc = test(model, test_loader)
        history.append(acc)

    return acc

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, q=False):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, stride=1, bias=False)
        self.conv2 = nn.Conv2d(8, 8, 3, stride=5, bias=False)
        self.fc1 = nn.Linear(32, 1000)
        self.fc2 = nn.Linear(1000, 10)

        self.q = q
        if self.q:
            self.quant = QuantStub()
            self.dequant = DeQuantStub()


    def forward(self, x):

        if self.q:
          x = self.quant(x)

        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        F.relu(x)
        x = self.fc1(x)
        output = self.fc2(x)

        if self.q:
          output = self.dequant(output)
        return output

Определим простую CNN, которая классифицирует изображения MNIST.

In [None]:
model = SimpleCNN(q=False)
print_size_of_model(model)

Size (MB): 0.176938


In [None]:
hist = train_and_eval(model, train_loader, test_loader)

  self.pid = os.fork()




  self.pid = os.fork()



Test set:  Accuracy: 8361/10000 (84%)  Time: 2.3063459396362305


Test set:  Accuracy: 8679/10000 (87%)  Time: 2.345824718475342


Test set:  Accuracy: 8777/10000 (88%)  Time: 2.244311571121216


Test set:  Accuracy: 8756/10000 (88%)  Time: 2.3323636054992676


Test set:  Accuracy: 8883/10000 (89%)  Time: 2.3445191383361816



### Post-training quantization

Определим новую архитектуру квантовой сети, в которой мы также определим заглушки квантования и деквантования, которые будут важны в начале и в конце.


In [None]:
qmodel = SimpleCNN(q=True)

load_model(qmodel, model)

test(qmodel, test_loader)


Test set:  Accuracy: 8883/10000 (89%)  Time: 2.3049731254577637



tensor(88.8300)

In [None]:
print_size_of_model(qmodel)

Size (MB): 0.177066


Статическое квантование после обучения включает в себя не только преобразование весов из числа с плавающей запятой в целое число, как при динамическом квантовании, но и выполнение дополнительных
этап первой подачи пакетов данных через сеть и вычисления результирующих распределений различных активаций (в частности,
это делается путем вставки модулей наблюдателей в разные
точки, записывающие эти данные). Эти распределения затем используются для определения того, как конкретно следует квантовать различные активации.
время вывода (простым методом было бы просто разделить весь диапазон активаций на 256 уровней.
Важно отметить, что этот дополнительный шаг позволяет нам передавать квантованные значения между операциями вместо преобразования этих значений в числа с плавающей запятой, а затем обратно в целые числа между каждой операцией.
что приводит к значительному ускорению.

In [None]:
qmodel.qconfig = torch.quantization.default_qconfig
print(qmodel.qconfig)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})


In [None]:
# qmodel.to('cpu')
args['cuda']=False

qmodel.qconfig = torch.quantization.default_qconfig
print(qmodel.qconfig)

torch.quantization.prepare(qmodel, inplace=True)
print('Post Training Quantization Prepare: Inserting Observers')
print('\n Conv1: After observer insertion \n\n', qmodel.conv1)


test(qmodel, train_loader)
print('Post Training Quantization: Calibration done')
torch.quantization.convert(qmodel, inplace=True)
print('Post Training Quantization: Convert done')
print('\n Conv1: After fusion and quantization \n\n', qmodel.conv1)
print("Size of model after quantization")
print_size_of_model(qmodel)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})
Post Training Quantization Prepare: Inserting Observers

 Conv1: After observer insertion 

 Conv2d(
  1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False
  (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
)

Test set:  Accuracy: 53107/60000 (89%)  Time: 15.302648782730103

Post Training Quantization: Calibration done
Post Training Quantization: Convert done

 Conv1: After fusion and quantization 

 QuantizedConv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), scale=0.031006284058094025, zero_point=64, bias=False)
Size of model after quantization
Size (MB): 0.05201


In [None]:
args['cuda']=False

a = test(qmodel, test_loader)


Test set:  Accuracy: 8885/10000 (89%)  Time: 3.023811101913452



In [None]:
print_size_of_model(qmodel)

Size (MB): 0.05201


In [None]:
print_size_of_model(model)

Size (MB): 0.176938
