In [21]:
import os
import torch
import numpy as np
import pandas as pd

from PIL import Image
from torchvision import transforms

In [22]:
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)

Using cache found in C:\Users\dlwl9/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


### Basic 모델 스크립트

In [23]:
if not os.path.isdir('script'):
    os.mkdir('script')

model.eval()
scripted_model = torch.jit.script(model)
scripted_model.save("script/resnet_scripted.pt")

### 양자화 모델 스크립트
추론 정확도를 거의 동일하게 유지하면서 훈련된 모델 크기를 크게 줄이기 위해 모델에 양자화를 적용할 수 있습니다.

In [24]:
backend = "fbgemm" 
model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
quantized_model = torch.quantization.quantize_dynamic(model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)
scripted_quantized_model = torch.jit.script(quantized_model)
scripted_quantized_model.save("script/resnet_scripted_quantized.pt")



### 양자화 모델 최적화 스크립트

In [25]:
from torch.utils.mobile_optimizer import optimize_for_mobile
optimized_scripted_quantized_model = optimize_for_mobile(scripted_quantized_model)
optimized_scripted_quantized_model.save("script/resnet_optimized_scripted_quantized.pt")

### 라이트 인터프리터(Lite interpreter)

In [26]:
optimized_scripted_quantized_model._save_for_lite_interpreter("script/resnet_optimized_scripted_quantized_lite.ptl")
ptl = torch.jit.load("script/resnet_optimized_scripted_quantized_lite.ptl")

In [27]:
image = np.random.randint(256, size=(1, 3, 32, 32))
img = torch.tensor(image / 255.0)
img = image_tensor.float()

In [28]:
with torch.autograd.profiler.profile(use_cuda=False) as prof1:
    out = model(img)
with torch.autograd.profiler.profile(use_cuda=False) as prof2:
    out = scripted_model(img)
with torch.autograd.profiler.profile(use_cuda=False) as prof3:
    out = scripted_quantized_model(img)
with torch.autograd.profiler.profile(use_cuda=False) as prof4:
    out = optimized_scripted_quantized_model(img)
with torch.autograd.profiler.profile(use_cuda=False) as prof5:
    out = ptl(img)

print("original model: {:.2f}ms".format(prof1.self_cpu_time_total/1000))
print("scripted model: {:.2f}ms".format(prof2.self_cpu_time_total/1000))
print("scripted & quantized model: {:.2f}ms".format(prof3.self_cpu_time_total/1000))
print("scripted & quantized & optimized model: {:.2f}ms".format(prof4.self_cpu_time_total/1000))
print("lite model: {:.2f}ms".format(prof5.self_cpu_time_total/1000))

original model: 4.95ms
scripted model: 4.12ms
scripted & quantized model: 4.06ms
scripted & quantized & optimized model: 32.93ms
lite model: 30.68ms


### 다음 결과는 각 모델이 소요한 추론 시간과 원본 모델에 대한 각 모델의 감소율을 요약한 것 입니다.



In [29]:
df = pd.DataFrame({'Model': ['original model','scripted model', 'scripted & quantized model', 'scripted & quantized & optimized model', 'lite model']})
df = pd.concat([df, pd.DataFrame([
    ["{:.2f}ms".format(prof1.self_cpu_time_total/1000), "0%"],
    ["{:.2f}ms".format(prof2.self_cpu_time_total/1000),
     "{:.2f}%".format((prof1.self_cpu_time_total-prof2.self_cpu_time_total)/prof1.self_cpu_time_total*100)],
    ["{:.2f}ms".format(prof3.self_cpu_time_total/1000),
     "{:.2f}%".format((prof1.self_cpu_time_total-prof3.self_cpu_time_total)/prof1.self_cpu_time_total*100)],
    ["{:.2f}ms".format(prof4.self_cpu_time_total/1000),
     "{:.2f}%".format((prof1.self_cpu_time_total-prof4.self_cpu_time_total)/prof1.self_cpu_time_total*100)],
    ["{:.2f}ms".format(prof5.self_cpu_time_total/1000),
     "{:.2f}%".format((prof1.self_cpu_time_total-prof5.self_cpu_time_total)/prof1.self_cpu_time_total*100)]],
    columns=['Inference Time', 'Reduction'])], axis=1)

print(df)

                                    Model Inference Time Reduction
0                          original model         4.95ms        0%
1                          scripted model         4.12ms    16.88%
2              scripted & quantized model         4.06ms    18.09%
3  scripted & quantized & optimized model        32.93ms  -564.78%
4                              lite model        30.68ms  -519.34%
