In [1]:
## Connect to gg driver
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/AI Tutor/CV/Topic2: Object Classification/day06

/content/drive/.shortcut-targets-by-id/1g_hBCGxmI5lTFXyvD-igJcroxCjDzObt/AI Tutor/CV/Topic2: Object Classification/day06


### Common function usage


In [3]:
import torch
# Load models pytorch
model_pytorch_cuda = torch.load("models/torch/resnet18.pth").to('cuda')
# convert model to static graph
sample_input_cuda = torch.randn(1,3,224,224).cuda()
traced_cuda = torch.jit.trace(model_pytorch_cuda, sample_input_cuda)
torch.jit.save(traced_cuda, "cuda.pt")


# Load jit model
model_jit = torch.jit.load("cuda.pt")

# Inference model
model_jit(sample_input_cuda)

tensor([[-4.6765, -0.1002,  0.8028, -5.5643,  6.3375, -1.2884]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [4]:
# Một cách khác, nhưng cách này giúp tối ưu, nhưng không lưu được mô hình
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch_cuda.eval()))
frozen_mod(sample_input_cuda)

tensor([[-4.6765, -0.1002,  0.8028, -5.5643,  6.3375, -1.2884]],
       device='cuda:0')

## Estimate perfomances

In [8]:
import torch
import time
from function.pytorch.utils import preprocess_image

# Load pre-trained resnet18 model and set to evaluation mode
model_pytorch = torch.load("models/torch/resnet18.pth")
path_image = '/content/drive/MyDrive/AI Tutor/CV/Topic1: Introduce DL and CV/day02/dataset/test/forest/20056.jpg'
sample = preprocess_image(path_image)

In [9]:
## warmup time
for _ in range(100):
    model_pytorch.cuda()
    with torch.no_grad():
        sample_cuda = sample.cuda()
        model_pytorch(sample_cuda)

In [10]:
total_time = 0
n_time = 100
sample_input = sample.cuda()
for i in range(n_time):
    model_pytorch.cuda()
    with torch.no_grad():
        start = time.time()
        out = model_pytorch(sample_input)
        total_time = time.time() - start

print(f"Pytorch with cuda: {total_time/n_time*1000: .4f} ms")
out

Pytorch with cuda:  0.0465 ms


tensor([[-5.8363, 10.6493,  0.2802, -4.7604, -3.6545, -4.2747]],
       device='cuda:0')

In [11]:
total_time = 0
n_time = 100
sample_input = sample.cpu()
for i in range(n_time):
    model_pytorch.to('cpu')
    with torch.no_grad():
        start = time.time()
        out = model_pytorch(sample_input)
        total_time = time.time() - start

print(f"Pytorch with cpu: {total_time/n_time*1000: .4f} ms")
out

Pytorch with cpu:  0.5799 ms


tensor([[-5.8363, 10.6493,  0.2802, -4.7604, -3.6545, -4.2747]])

In [12]:
sample_input = preprocess_image(path_image).cuda()
model_pytorch.cuda()
traced_model = torch.jit.trace(model_pytorch, sample_input)
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = traced_model(sample_input)
        total_time = time.time() - start

print(f"Torchscript with cuda: {total_time/n_time*1000: .4f} ms")

Torchscript with cuda:  0.0299 ms


In [17]:
sample_input = preprocess_image(path_image).cpu()
model_pytorch.cpu()
traced_model = torch.jit.trace(model_pytorch, sample_input)

total_time = 0
n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = traced_model(sample_input)
        total_time = time.time() - start

print(f"Torchscript with cpu: {total_time/n_time*1000: .4f} ms")

Torchscript with cpu:  0.5388 ms


In [14]:

sample_input = preprocess_image(path_image).cuda()
model_pytorch.cuda()
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch.eval()))
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = frozen_mod(sample_input)
        total_time = time.time() - start

print(f"Torchscript_Optimizer with cuda: {total_time/n_time*1000: .4f} ms")

Torchscript_Optimizer with cuda:  0.0182 ms


In [16]:
sample_input = preprocess_image(path_image).cpu()
model_pytorch.cpu()
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch.eval()))
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = frozen_mod(sample_input)
        total_time = time.time() - start

print(f"Torchscript_Optimizer with cpu: {total_time/n_time*1000: .4f} ms")

Torchscript_Optimizer with cpu:  0.3700 ms
