In [1]:
## Connect to gg driver
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/AI Tutor/CV/Topic2: Object Classification/day06

/content/drive/.shortcut-targets-by-id/1g_hBCGxmI5lTFXyvD-igJcroxCjDzObt/AI Tutor/CV/Topic2: Object Classification/day06


### Common function usage


In [1]:
import torch
# Load models pytorch
model_pytorch_cuda = torch.load("models/torch/resnet18.pth").to('cuda')
# convert model to static graph
sample_input_cuda = torch.randn(1,3,224,224).cuda()
traced_cuda = torch.jit.trace(model_pytorch_cuda, sample_input_cuda)
torch.jit.save(traced_cuda, "cuda.pt")


# Load jit model
model_jit = torch.jit.load("cuda.pt")

# Inference model
model_jit(sample_input_cuda)

tensor([[-4.6809,  0.3873,  0.5750, -5.2969,  5.9679, -1.2956]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [2]:
# Một cách khác, nhưng cách này giúp tối ưu, nhưng không lưu được mô hình
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch_cuda.eval()))
frozen_mod(sample_input_cuda)

tensor([[-4.6825,  0.3887,  0.5750, -5.2941,  5.9691, -1.2988]],
       device='cuda:0')

## Estimate perfomances

In [3]:
import torch
import time
from function.pytorch.utils import preprocess_image

# Load pre-trained resnet18 model and set to evaluation mode
model_pytorch = torch.load("models/torch/resnet18.pth")
path_image = '../../dataset/test/forest/20056.jpg'
sample = preprocess_image(path_image)

In [4]:
## warmup time
for _ in range(100):
    model_pytorch.cuda()
    with torch.no_grad():
        sample_cuda = sample.cuda()
        model_pytorch(sample_cuda)

In [5]:
total_time = 0
n_time = 100
sample_input = sample.cuda()
for i in range(n_time):
    model_pytorch.cuda()
    with torch.no_grad():
        start = time.time()
        out = model_pytorch(sample_input)
        total_time = time.time() - start

print(f"Pytorch with cuda: {total_time/n_time*1000: .4f} ms")
out

Pytorch with cuda:  0.0601 ms


tensor([[-5.8352, 10.6493,  0.2812, -4.7578, -3.6570, -4.2765]],
       device='cuda:0')

In [6]:
total_time = 0
n_time = 100
sample_input = sample.cpu()
for i in range(n_time):
    model_pytorch.to('cpu')
    with torch.no_grad():
        start = time.time()
        out = model_pytorch(sample_input)
        total_time = time.time() - start

print(f"Pytorch with cpu: {total_time/n_time*1000: .4f} ms")
out

Pytorch with cpu:  0.4651 ms


tensor([[-5.8363, 10.6493,  0.2802, -4.7604, -3.6545, -4.2747]])

In [7]:
sample_input = preprocess_image(path_image).cuda()
model_pytorch.cuda()
traced_model = torch.jit.trace(model_pytorch, sample_input)
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = traced_model(sample_input)
        total_time = time.time() - start

print(f"Torchscript with cuda: {total_time/n_time*1000: .4f} ms")

Torchscript with cuda:  0.0200 ms


In [8]:
sample_input = preprocess_image(path_image).cpu()
model_pytorch.cpu()
traced_model = torch.jit.trace(model_pytorch, sample_input)

total_time = 0
n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = traced_model(sample_input)
        total_time = time.time() - start

print(f"Torchscript with cpu: {total_time/n_time*1000: .4f} ms")

Torchscript with cpu:  0.4459 ms


In [9]:

sample_input = preprocess_image(path_image).cuda()
model_pytorch.cuda()
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch.eval()))
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = frozen_mod(sample_input)
        total_time = time.time() - start

print(f"Torchscript_Optimizer with cuda: {total_time/n_time*1000: .4f} ms")

Torchscript_Optimizer with cuda:  0.0999 ms


In [10]:
sample_input = preprocess_image(path_image).cpu()
model_pytorch.cpu()
frozen_mod = torch.jit.optimize_for_inference(torch.jit.script(model_pytorch.eval()))
total_time = 0

n_time = 100
for i in range(n_time):
    with torch.no_grad():
        start = time.time()
        out = frozen_mod(sample_input)
        total_time = time.time() - start

print(f"Torchscript_Optimizer with cpu: {total_time/n_time*1000: .4f} ms")

Torchscript_Optimizer with cpu:  0.1800 ms
