In [None]:
import torch
from torch.profiler import profile, record_function, ProfilerActivity
from src.model import GPT1CoreFast

In [2]:
def time_model(n, batch_size=5,use_gpu=False,sort_by="self_cpu_time_total"):

    model = GPT1CoreFast(30000)
        
    inputs = torch.tensor([[i for i in range(512)] for _ in range(batch_size)],dtype=torch.int)
    mask = torch.ones((batch_size,512),dtype=torch.bool)

    if use_gpu:
        gpu_device = torch.device("cuda")
        inputs = inputs.to(gpu_device)
        mask = mask.to(gpu_device)
        model.to(gpu_device)

    with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
        with record_function("model_inference"):
            for k in range(n):
                model(inputs, mask)

    print(prof.key_averages().table(sort_by=sort_by, row_limit=550))

    return prof

In [3]:
prof = time_model(1,16,False,"self_cpu_time_total")

---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::addmm        88.90%      663.690s        88.92%      663.813s        9.220s      663.718s        88.90%      663.830s        9.220s            72  
                  aten::bmm        10.31%       76.959s        10.34%       77.170s     267.952ms       76.917s        10.30%       77.098s     267.702ms           288  
             aten::_softmax         0.41%        3.058s         0.41%        3.058s      21.238ms        3.103s         0.42%        3.103s      21.5

In [7]:
time_model(1,16,True,"self_cuda_time_total")

---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::addmm         0.79%       4.609ms         0.98%       5.730ms      79.583us     198.303ms        33.52%     199.438ms       2.770ms            72  
                  aten::bmm         1.62%       9.447ms         1.62%       9.447ms      32.802us      45.560ms         7.70%      45.560ms     158.194us           288  
                  aten::div         0.53%       3.079ms         0.53%       3.079ms      21.382us      43.190ms         7.30%      43.190ms     299.9

<torch.profiler.profiler.profile at 0x1ef24133c90>