In [2]:
import torch 
import torchvision
import time
from data_loader import get_test_loader
from onnx_file import engine_infer_single_batch
import tensorrt as trt
from fuse_models import fuse_resnet

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_loader = get_test_loader(batch_size = 16)



"""
Change from here 
"""


model = torchvision.models.efficientnet_v2_s()


"""
STAGE 0 PATHS
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_0 = r"stage_0/efficientNet_no_layer_no_tensorrt_0.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_0 = r"stage_0/efficientNet_no_layer_tensorrt_0.onnx"
PATH_NO_LAYER_TENSORRT_ENGINE_0 = r"stage_0/efficientNet_no_layer_tensorrt_0.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_0 = r"stage_0/efficientNet_layer_no_tensorrt_0.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_0 = r"stage_0/efficientNet_layer_tensorrt_0.onnx"
PATH_LAYER_TENSORRT_ENGINE_0 = r"stage_0/efficientNet_layer_tensorrt_0.trt"

# ---------------------------------------------------------------------------------WAIT

"""
STAGE 1 PATHS
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_1 = r"stage_1/efficientNet_no_layer_no_tensorrt_1.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_1 = r"stage_1/efficientNet_no_layer_tensorrt_1.onnx"    
PATH_NO_LAYER_TENSORRT_ENGINE_1 = r"stage_1/efficientNet_no_layer_tensorrt_1.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_MODEL_1 = r"stage_1/efficientNet_layer_no_tensorrt_1.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_1 = r"stage_1/efficientNet_layer_tensorrt_1.onnx"
PATH_LAYER_TENSORRT_ENGINE_1 = r"stage_1/efficientNet_layer_tensorrt_1.trt"


"""
Stage 2
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_2 = r"stage_2/efficientNet_no_layer_no_tensorrt_2.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_2 = r"stage_2/efficientNet_no_layer_tensorrt_2.onnx"    
PATH_NO_LAYER_TENSORRT_ENGINE_2 = r"stage_2/efficientNet_no_layer_tensorrt_2.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_MODEL_2 = r"stage_2/efficientNet_layer_no_tensorrt_2.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_2 = r"stage_2/efficientNet_layer_tensorrt_2.onnx"
PATH_LAYER_TENSORRT_ENGINE_2 = r"stage_2/efficientNet_layer_tensorrt_2.trt"


Files already downloaded and verified


#### Without Layer Fusion and without TensorRT

In [4]:
def without_fusion_without_tensorrt():
    model = torchvision.models.efficientnet_v2_s()

    model = model.to(device)

    total_inference_time_time = 0

    start_inference = time.time()

    for (i, batch) in enumerate(data_loader):
        
        (input_data, label) = batch
        input_data = input_data.to(device)

        start_time = time.time()
        output_activation = model(input_data)
        batch_time = time.time() - start_time

        output_activation = output_activation.to("cpu")
        
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_inference_time_time += batch_time
    end_inference = time.time()
    print(f"Inferene Time Taken: {total_inference_time_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

without_fusion_without_tensorrt()

Batch 0 : Sent | Time Taken: 113.886 ms
Batch 1 : Sent | Time Taken: 99.022 ms
Batch 2 : Sent | Time Taken: 93.992 ms
Batch 3 : Sent | Time Taken: 91.002 ms
Batch 4 : Sent | Time Taken: 92.001 ms
Batch 5 : Sent | Time Taken: 89.448 ms
Batch 6 : Sent | Time Taken: 93.004 ms
Batch 7 : Sent | Time Taken: 101.577 ms
Batch 8 : Sent | Time Taken: 98.218 ms
Batch 9 : Sent | Time Taken: 94.871 ms
Batch 10 : Sent | Time Taken: 95.449 ms
Batch 11 : Sent | Time Taken: 90.993 ms
Batch 12 : Sent | Time Taken: 103.340 ms
Batch 13 : Sent | Time Taken: 100.704 ms
Batch 14 : Sent | Time Taken: 96.366 ms
Batch 15 : Sent | Time Taken: 94.004 ms
Batch 16 : Sent | Time Taken: 89.223 ms
Batch 17 : Sent | Time Taken: 92.287 ms
Batch 18 : Sent | Time Taken: 89.992 ms
Batch 19 : Sent | Time Taken: 90.992 ms
Batch 20 : Sent | Time Taken: 90.991 ms
Batch 21 : Sent | Time Taken: 89.992 ms
Batch 22 : Sent | Time Taken: 88.993 ms
Batch 23 : Sent | Time Taken: 89.994 ms
Batch 24 : Sent | Time Taken: 90.000 ms
Batch 

In [6]:
def without_fusion_with_tensorrt():

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)

    with open(PATH_NO_LAYER_TENSORRT_ENGINE_0, "rb") as f:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    total_time = 0
    start_inference = time.time()
    for(i, batch) in enumerate(data_loader):
        start_time = time.time()
        (input_data, label) = batch 

        activation = engine_infer_single_batch(engine, context, input_data, i)
        end_time = time.time() 
        
        batch_time = end_time - start_time 
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
    end_inference = time.time()
    print(f"Total time taken: {total_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

without_fusion_with_tensorrt()
    

FileNotFoundError: [Errno 2] No such file or directory: 'stage_0/efficientNet_no_layer_tensorrt_0.trt'

In [None]:
def with_fusion_without_tensorrt():
    model = torchvision.models.efficientnet_v2_s()

    # Fuse the stage 0
    fused_part_0 = fuse_resnet(model)
    fused_part_0 = fused_part_0.to(device)

    total_time = 0
    start_inference = time.time()
    for (i, batch) in enumerate(data_loader):
        
        (input_data, label) = batch
        input_data = input_data.to(device)

        start_time = time.time()
        output_activation = fused_part_0(input_data)
        batch_time = time.time() - start_time

        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
        
    end_inference = time.time()
    print(f"Total Time Taken: {total_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")
with_fusion_without_tensorrt()


Fusing model.conv1 and model.bn1...
Fusion done
Batch 0 : Sent | Time Taken: 121.000 ms
Batch 1 : Sent | Time Taken: 122.999 ms
Batch 2 : Sent | Time Taken: 90.002 ms
Batch 3 : Sent | Time Taken: 89.000 ms
Batch 4 : Sent | Time Taken: 144.000 ms
Batch 5 : Sent | Time Taken: 126.335 ms
Batch 6 : Sent | Time Taken: 93.043 ms
Batch 7 : Sent | Time Taken: 86.984 ms
Batch 8 : Sent | Time Taken: 86.328 ms
Batch 9 : Sent | Time Taken: 88.448 ms
Batch 10 : Sent | Time Taken: 86.993 ms
Batch 11 : Sent | Time Taken: 86.992 ms
Batch 12 : Sent | Time Taken: 97.230 ms
Batch 13 : Sent | Time Taken: 93.993 ms
Batch 14 : Sent | Time Taken: 88.427 ms
Batch 15 : Sent | Time Taken: 88.993 ms
Batch 16 : Sent | Time Taken: 88.449 ms
Batch 17 : Sent | Time Taken: 86.988 ms
Batch 18 : Sent | Time Taken: 88.498 ms
Batch 19 : Sent | Time Taken: 87.188 ms
Batch 20 : Sent | Time Taken: 87.993 ms
Batch 21 : Sent | Time Taken: 87.992 ms
Batch 22 : Sent | Time Taken: 88.146 ms
Batch 23 : Sent | Time Taken: 92.989 m

In [25]:
def with_fusion_with_tensorrt():
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)

    with open(PATH_RESNET_LAYER_TENSORRT_ENGINE_0, "rb") as f:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    total_time = 0
    start_inference = time.time()
    for(i, batch) in enumerate(data_loader):

        start_time = time.time()
        (input_data, label) = batch 
        activation = engine_infer_single_batch(engine, context, input_data, i)
        end_time = time.time()

        batch_time = end_time - start_time 
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
    end_inference = time.time()
    print(f"Total time taken: {total_time * 1000:.3f} ms")    
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

with_fusion_with_tensorrt()

Batch 0 : Sent | Time Taken: 7.000 ms
Batch 1 : Sent | Time Taken: 6.067 ms
Batch 2 : Sent | Time Taken: 7.000 ms
Batch 3 : Sent | Time Taken: 7.000 ms
Batch 4 : Sent | Time Taken: 5.999 ms
Batch 5 : Sent | Time Taken: 6.000 ms
Batch 6 : Sent | Time Taken: 3.999 ms
Batch 7 : Sent | Time Taken: 5.001 ms
Batch 8 : Sent | Time Taken: 5.005 ms
Batch 9 : Sent | Time Taken: 3.997 ms
Batch 10 : Sent | Time Taken: 4.000 ms
Batch 11 : Sent | Time Taken: 4.000 ms
Batch 12 : Sent | Time Taken: 4.000 ms
Batch 13 : Sent | Time Taken: 4.999 ms
Batch 14 : Sent | Time Taken: 5.001 ms
Batch 15 : Sent | Time Taken: 5.000 ms
Batch 16 : Sent | Time Taken: 5.000 ms
Batch 17 : Sent | Time Taken: 4.000 ms
Batch 18 : Sent | Time Taken: 4.000 ms
Batch 19 : Sent | Time Taken: 4.001 ms
Batch 20 : Sent | Time Taken: 5.000 ms
Batch 21 : Sent | Time Taken: 3.998 ms
Batch 22 : Sent | Time Taken: 4.000 ms
Batch 23 : Sent | Time Taken: 4.998 ms
Batch 24 : Sent | Time Taken: 5.000 ms
Batch 25 : Sent | Time Taken: 4.002