In [23]:
## Connect to gg driver
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
%cd /content/drive/MyDrive/AI Tutor/CV/Topic2: Object Classification/day06

/content/drive/.shortcut-targets-by-id/1g_hBCGxmI5lTFXyvD-igJcroxCjDzObt/AI Tutor/CV/Topic2: Object Classification/day06


In [1]:
import torch
from function.pytorch.utils import preprocess_image
import os
import time
import numpy as np
def get_size_file(path):
    size_in_bytes = os.path.getsize(path)
    size_in_megabytes = size_in_bytes / (1024 * 1024)  # Convert bytes to megabytes
    return size_in_megabytes

### Convert model

In [2]:
# Load pre-trained resnet18 model and set to evaluation mode
model = torch.load("models/torch/resnet18.pth").to('cpu')
torch.save(model, 'models/torch/resnet18.pth')
model.float()
model.half()
torch.save(model, 'models/torch/resnet18_float16.pth')

### Verify model

In [3]:
size = get_size_file('models/torch/resnet18.pth')
print(f"Size of model resnet18: {size:.1f} MB")
size = get_size_file('models/torch/resnet18_float16.pth')
print(f"Size of model resnet18_float16: {size:.1f} MB")

Size of model resnet18: 42.7 MB
Size of model resnet18_float16: 21.4 MB


In [6]:
path_image = '../../dataset/test/forest/20056.jpg'
input = preprocess_image(path_image)
float_32 = torch.load("models/torch/resnet18.pth").cuda()
float_16 = torch.load("models/torch/resnet18_float16.pth").cuda()
batch_size = 4
input = input.repeat(batch_size, 1, 1, 1)
input.shape

torch.Size([4, 3, 224, 224])

In [7]:
## warming GPU
with torch.no_grad():
    for _ in range(10):
        output_32bit = float_32(input.cuda())
        output_16bit = float_16(input.half().cuda())

In [8]:
import numpy as np
np.mean(output_32bit.cpu().numpy() - output_16bit.cpu().numpy())

-0.0001958112

In [9]:
time_infer_16bit = []
time_infer_32bit = []
with torch.no_grad():
    for _ in range(100):
        start = time.time()
        output_32bit = float_32(input.cuda())
        time_infer_32bit.append(time.time() - start)

        start = time.time()
        output_16bit = float_16(input.half().cuda())
        time_infer_16bit.append(time.time() - start)

In [10]:
print(f"Time infer per image of resnet18_float32: {np.mean(time_infer_32bit): .4f} s")
print(f"Time infer per image of resnet18_float16: {np.mean(time_infer_16bit): .4f} s")

Time infer per image of resnet18_float32:  0.0117 s
Time infer per image of resnet18_float16:  0.0151 s


In [11]:
import torch
import numpy as np

batch_size = [2, 4, 8, 16, 32, 64, 128, 256, 512]

for batch in batch_size:
    with torch.cuda.stream(torch.cuda.current_stream()):
        input = torch.randn(batch, 3, 224, 224)
        input = input.cuda()  # Chuyển input lên GPU bên trong stream
        with torch.no_grad():
            output_32bit = float_32(input)
            output_16bit = float_16(input.half())
    error = np.mean(output_32bit.cpu().numpy() - output_16bit.cpu().numpy())
    print(f"Batch {batch}: {error:.5f}")


Batch 2: 0.00097
Batch 4: 0.00035
Batch 8: 0.00015
Batch 16: 0.00042
Batch 32: 0.00050
Batch 64: 0.00048
Batch 128: 0.00047
Batch 256: 0.00037
Batch 512: 0.00042
