In [1]:
import torch 
import torchvision
import time
from data_loader import get_test_loader
from onnx_file import engine_infer_single_batch
import tensorrt as trt
from fuse_models import fuse_resnet

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_loader = get_test_loader(batch_size = 16)



"""
Change from here 
"""


model = torchvision.models.GoogLeNet()


"""
STAGE 0 PATHS
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_0 = r"stage_0/googlelenet_no_layer_no_tensorrt_0.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_0 = r"stage_0/googlelenet_no_layer_tensorrt_0.onnx"
PATH_NO_LAYER_TENSORRT_ENGINE_0 = r"stage_0/googlelenet_no_layer_tensorrt_0.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_0 = r"stage_0/googlelenet_layer_no_tensorrt_0.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_0 = r"stage_0/googlelenet_layer_tensorrt_0.onnx"
PATH_LAYER_TENSORRT_ENGINE_0 = r"stage_0/googlelenet_layer_tensorrt_0.trt"

# ---------------------------------------------------------------------------------WAIT

"""
STAGE 1 PATHS
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_1 = r"stage_1/googlelenet_no_layer_no_tensorrt_1.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_1 = r"stage_1/googlelenet_no_layer_tensorrt_1.onnx"    
PATH_NO_LAYER_TENSORRT_ENGINE_1 = r"stage_1/googlelenet_no_layer_tensorrt_1.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_MODEL_1 = r"stage_1/googlelenet_layer_no_tensorrt_1.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_1 = r"stage_1/googlelenet_layer_tensorrt_1.onnx"
PATH_LAYER_TENSORRT_ENGINE_1 = r"stage_1/googlelenet_layer_tensorrt_1.trt"


"""
Stage 2
"""

# Model without layer fuion without tensorrt 
PATH_NO_LAYER_NO_TENSORRT_MODEL_2 = r"stage_2/googlelenet_no_layer_no_tensorrt_2.pth"

# Model without fusion and with tensorrt
PATH_NO_LAYER_TENSORRT_ONNX_2 = r"stage_2/googlelenet_no_layer_tensorrt_2.onnx"    
PATH_NO_LAYER_TENSORRT_ENGINE_2 = r"stage_2/googlelenet_no_layer_tensorrt_2.trt"

# Model with Fusion and witout Tensorrt 
PATH_LAYER_NO_TENSORRT_MODEL_2 = r"stage_2/googlelenet_layer_no_tensorrt_2.pth"

# Model with Layer and with TensorRT
PATH_LAYER_TENSORRT_ONNX_2 = r"stage_2/googlelenet_layer_tensorrt_2.onnx"
PATH_LAYER_TENSORRT_ENGINE_2 = r"stage_2/googlelenet_layer_tensorrt_2.trt"


Files already downloaded and verified




#### Without Layer Fusion and without TensorRT

In [3]:
def without_fusion_without_tensorrt():
    model = torchvision.models.resnet152().eval()

    model = model.to(device)

    total_inference_time_time = 0

    start_inference = time.time()

    for (i, batch) in enumerate(data_loader):
        
        (input_data, label) = batch
        input_data = input_data.to(device)

        start_time = time.time()
        output_activation = model(input_data)
        batch_time = time.time() - start_time

        output_activation = output_activation.to("cpu")
        
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_inference_time_time += batch_time
    end_inference = time.time()
    print(f"Inferene Time Taken: {total_inference_time_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

without_fusion_without_tensorrt()

Batch 0 : Sent | Time Taken: 516.001 ms
Batch 1 : Sent | Time Taken: 458.000 ms
Batch 2 : Sent | Time Taken: 235.353 ms
Batch 3 : Sent | Time Taken: 195.000 ms
Batch 4 : Sent | Time Taken: 203.000 ms
Batch 5 : Sent | Time Taken: 111.999 ms
Batch 6 : Sent | Time Taken: 102.000 ms
Batch 7 : Sent | Time Taken: 104.000 ms
Batch 8 : Sent | Time Taken: 105.001 ms
Batch 9 : Sent | Time Taken: 107.000 ms
Batch 10 : Sent | Time Taken: 104.002 ms
Batch 11 : Sent | Time Taken: 108.000 ms
Batch 12 : Sent | Time Taken: 107.000 ms
Batch 13 : Sent | Time Taken: 112.001 ms
Batch 14 : Sent | Time Taken: 107.997 ms
Batch 15 : Sent | Time Taken: 108.001 ms
Batch 16 : Sent | Time Taken: 112.002 ms
Batch 17 : Sent | Time Taken: 108.001 ms
Batch 18 : Sent | Time Taken: 108.000 ms
Batch 19 : Sent | Time Taken: 111.000 ms
Batch 20 : Sent | Time Taken: 105.999 ms
Batch 21 : Sent | Time Taken: 143.002 ms
Batch 22 : Sent | Time Taken: 179.989 ms
Batch 23 : Sent | Time Taken: 170.000 ms
Batch 24 : Sent | Time Tak

In [5]:
def without_fusion_with_tensorrt():

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)

    with open(PATH_NO_LAYER_TENSORRT_ENGINE_0, "rb") as f:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    total_time = 0
    start_inference = time.time()
    for(i, batch) in enumerate(data_loader):
        start_time = time.time()
        (input_data, label) = batch 

        activation = engine_infer_single_batch(engine, context, input_data, i)
        end_time = time.time() 
        
        batch_time = end_time - start_time 
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
    end_inference = time.time()
    print(f"Total time taken: {total_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

without_fusion_with_tensorrt()
    

Batch 0 : Sent | Time Taken: 38.000 ms
Batch 1 : Sent | Time Taken: 14.912 ms
Batch 2 : Sent | Time Taken: 13.489 ms
Batch 3 : Sent | Time Taken: 15.646 ms
Batch 4 : Sent | Time Taken: 14.009 ms
Batch 5 : Sent | Time Taken: 14.345 ms
Batch 6 : Sent | Time Taken: 18.154 ms
Batch 7 : Sent | Time Taken: 14.939 ms
Batch 8 : Sent | Time Taken: 13.536 ms
Batch 9 : Sent | Time Taken: 12.937 ms
Batch 10 : Sent | Time Taken: 14.024 ms
Batch 11 : Sent | Time Taken: 14.317 ms
Batch 12 : Sent | Time Taken: 14.012 ms
Batch 13 : Sent | Time Taken: 14.127 ms
Batch 14 : Sent | Time Taken: 12.999 ms
Batch 15 : Sent | Time Taken: 15.094 ms
Batch 16 : Sent | Time Taken: 15.315 ms
Batch 17 : Sent | Time Taken: 15.332 ms
Batch 18 : Sent | Time Taken: 14.320 ms
Batch 19 : Sent | Time Taken: 12.923 ms
Batch 20 : Sent | Time Taken: 14.001 ms
Batch 21 : Sent | Time Taken: 13.027 ms
Batch 22 : Sent | Time Taken: 14.659 ms
Batch 23 : Sent | Time Taken: 15.133 ms
Batch 24 : Sent | Time Taken: 13.605 ms
Batch 25 :

In [6]:
def with_fusion_without_tensorrt():
    model = model = torchvision.models.GoogLeNet()

    # Fuse the stage 0
    fused_part_0 = fuse_resnet(model)
    fused_part_0 = fused_part_0.to(device)

    total_time = 0
    start_inference = time.time()
    for (i, batch) in enumerate(data_loader):
        
        (input_data, label) = batch
        input_data = input_data.to(device)

        start_time = time.time()
        output_activation = fused_part_0(input_data)
        batch_time = time.time() - start_time

        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
        
    end_inference = time.time()
    print(f"Total Time Taken: {total_time * 1000:.3f} ms")
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")
with_fusion_without_tensorrt()


Fusion done
Batch 0 : Sent | Time Taken: 37.501 ms
Batch 1 : Sent | Time Taken: 26.990 ms
Batch 2 : Sent | Time Taken: 26.994 ms
Batch 3 : Sent | Time Taken: 27.993 ms
Batch 4 : Sent | Time Taken: 27.373 ms
Batch 5 : Sent | Time Taken: 25.999 ms
Batch 6 : Sent | Time Taken: 27.994 ms
Batch 7 : Sent | Time Taken: 26.993 ms
Batch 8 : Sent | Time Taken: 26.992 ms
Batch 9 : Sent | Time Taken: 26.994 ms
Batch 10 : Sent | Time Taken: 26.991 ms
Batch 11 : Sent | Time Taken: 26.993 ms
Batch 12 : Sent | Time Taken: 26.991 ms
Batch 13 : Sent | Time Taken: 26.993 ms
Batch 14 : Sent | Time Taken: 27.993 ms
Batch 15 : Sent | Time Taken: 26.994 ms
Batch 16 : Sent | Time Taken: 27.037 ms
Batch 17 : Sent | Time Taken: 27.998 ms
Batch 18 : Sent | Time Taken: 26.993 ms
Batch 19 : Sent | Time Taken: 27.991 ms
Batch 20 : Sent | Time Taken: 25.993 ms
Batch 21 : Sent | Time Taken: 28.007 ms
Batch 22 : Sent | Time Taken: 28.992 ms
Batch 23 : Sent | Time Taken: 27.992 ms
Batch 24 : Sent | Time Taken: 28.387 m

In [7]:
def with_fusion_with_tensorrt():
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    runtime = trt.Runtime(TRT_LOGGER)

    with open(PATH_LAYER_TENSORRT_ENGINE_0, "rb") as f:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    total_time = 0
    start_inference = time.time()
    for(i, batch) in enumerate(data_loader):

        start_time = time.time()
        (input_data, label) = batch 
        activation = engine_infer_single_batch(engine, context, input_data, i)
        end_time = time.time()

        batch_time = end_time - start_time 
        print(f"Batch {i} : Sent | Time Taken: {batch_time * 1000:.3f} ms")
        total_time += batch_time
    end_inference = time.time()
    print(f"Total time taken: {total_time * 1000:.3f} ms")    
    print(f"Total Execution time: {(end_inference - start_inference) * 1000} ms")

with_fusion_with_tensorrt()

Batch 0 : Sent | Time Taken: 16.000 ms
Batch 1 : Sent | Time Taken: 17.002 ms
Batch 2 : Sent | Time Taken: 16.000 ms
Batch 3 : Sent | Time Taken: 15.359 ms
Batch 4 : Sent | Time Taken: 15.084 ms
Batch 5 : Sent | Time Taken: 14.103 ms
Batch 6 : Sent | Time Taken: 14.099 ms
Batch 7 : Sent | Time Taken: 14.992 ms
Batch 8 : Sent | Time Taken: 14.141 ms
Batch 9 : Sent | Time Taken: 14.002 ms
Batch 10 : Sent | Time Taken: 14.835 ms
Batch 11 : Sent | Time Taken: 14.410 ms
Batch 12 : Sent | Time Taken: 15.301 ms
Batch 13 : Sent | Time Taken: 14.247 ms
Batch 14 : Sent | Time Taken: 14.999 ms
Batch 15 : Sent | Time Taken: 14.925 ms
Batch 16 : Sent | Time Taken: 15.980 ms
Batch 17 : Sent | Time Taken: 13.916 ms
Batch 18 : Sent | Time Taken: 14.308 ms
Batch 19 : Sent | Time Taken: 13.987 ms
Batch 20 : Sent | Time Taken: 13.128 ms
Batch 21 : Sent | Time Taken: 14.129 ms
Batch 22 : Sent | Time Taken: 14.007 ms
Batch 23 : Sent | Time Taken: 14.076 ms
Batch 24 : Sent | Time Taken: 13.007 ms
Batch 25 :