<a href="https://colab.research.google.com/github/MeghanaShanthappa/TensorRT_features/blob/main/Profiling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity, record_function

In [2]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

In [3]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

In [4]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         3.20%      16.799ms       100.00%     524.415ms     524.415ms             1  
                     aten::conv2d         0.52%       2.726ms        66.78%     350.212ms      17.511ms            20  
                aten::convolution         0.60%       3.132ms        66.26%     347.486ms      17.374ms            20  
               aten::_convolution         0.45%       2.342ms        65.66%     344.354ms      17.218ms            20  
         aten::mkldnn_convolution        64.37%     337.574ms        65.22%     342.012ms      17.101ms            20  
                 aten::batch_norm       

In [5]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         3.20%      16.799ms       100.00%     524.415ms     524.415ms             1  
                     aten::conv2d         0.52%       2.726ms        66.78%     350.212ms      17.511ms            20  
                aten::convolution         0.60%       3.132ms        66.26%     347.486ms      17.374ms            20  
               aten::_convolution         0.45%       2.342ms        65.66%     344.354ms      17.218ms            20  
         aten::mkldnn_convolution        64.37%     337.574ms        65.22%     342.012ms      17.101ms            20  
                 aten::batch_norm       

In [6]:
activities = [ProfilerActivity.CPU]
if torch.cuda.is_available():
    device = "cuda"
    activities += [ProfilerActivity.CUDA]
else:
    print(
        "Neither CUDA nor XPU devices are available to demonstrate profiling on acceleration devices"
    )
    # Removed sys.exit(0) to allow profiling on CPU even if no accelerator is present
    device = "cpu" # Explicitly set device to CPU if no accelerator is found

sort_by_keyword = "cpu_time_total" # Always sort by cpu_time_total if no accelerator was found initially

model = models.resnet18().to(device)
inputs = torch.randn(5, 3, 224, 224).to(device)

with profile(activities=activities, record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by=sort_by_keyword, row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.37%       3.316ms       100.00%     902.776ms     902.776ms       0.000us         0.00%      25.252ms      25.252ms             1  
                                           aten::conv2d         0.01%     111.347us        74.82%     675.435ms      33.772ms       0.000us         0.00%      21.512ms       1.076ms            20  
         