<a href="https://colab.research.google.com/github/LeSaUi/DLtest/blob/main/better_transformer_with_torchtext_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# load xlm-r model refer https://pytorch.org/text/main/models.html

import torch, torchtext
import torch.nn
from torchtext.models import RobertaClassificationHead
from torchtext.functional import to_tensor

print(f'torch version: {torch.__version__}')

cuda_use = torch.cuda.is_available()

DEVICE = torch.device('cuda' if cuda_use else 'cpu')

print(f'torch cuda available: {cuda_use}')

xlmr_large = torchtext.models.XLMR_LARGE_ENCODER
classifier_head = torchtext.models.RobertaClassificationHead(num_classes=2, input_dim=1024)
model = xlmr_large.get_model(head=classifier_head)
transform = xlmr_large.transform()


torch version: 2.0.1+cu118
torch cuda available: True


In [2]:
small_input_batch = [
               "Hello world",
               "How are you!"
]
big_input_batch = [
               "Hello world",
               "How are you!",
               """`Well, Prince, so Genoa and Lucca are now just family estates of the
Buonapartes. But I warn you, if you don't tell me that this means war,
if you still try to defend the infamies and horrors perpetrated by
that Antichrist- I really believe he is Antichrist- I will have
nothing more to do with you and you are no longer my friend.
"""

#               `Well, Prince, so Genoa and Lucca are now just family estates of the
#Buonapartes. But I warn you, if you don't tell me that this means war,
#if you still try to defend the infamies and horrors perpetrated by
#that Antichrist- I really believe he is Antichrist- I will have
#nothing more to do with you and you are no longer my friend, no longer
#my 'faithful slave,' as you call yourself! But how do you do? I see
#I have frightened you- sit down and tell me all the news.`

#It was in July, 1805, and the speaker was the well-known Anna
#Pavlovna Scherer, maid of honor and favorite of the Empress Marya
#Fedorovna. With these words she greeted Prince Vasili Kuragin, a man
#of high rank and importance, who was the first to arrive at her
#reception. Anna Pavlovna had had a cough for some days. She was, as
#she said, suffering from la grippe; grippe being then a new word in
#St. Petersburg, used only by the elite."""
]

In [3]:
input_batch=big_input_batch

model_input = to_tensor(transform(input_batch), padding_value=1)
output = model(model_input)
output.shape

torch.Size([3, 2])

In [4]:
ITERATIONS=10

In [5]:
print('slow path:')
print('==========')
model.train()
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

print('fast path:')
print('==========')
model.eval()
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:
--------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                    aten::eq         0.00%      35.000us         0.00%      35.000us      35.000us             1  
                             aten::embedding         0.00%       1.422ms         0.02%       5.302ms       5.302ms             1  
                               aten::reshape         0.01%       1.771ms         0.01%       1.777ms       1.777ms             1  
                        aten::_reshape_alias         0.00%       6.000us         0.00%       6.000us       6.000us             1  
                          aten::index_select         0.01%       2.074ms

  output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not(), mask_check=False)


-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                   aten::eq         0.00%      41.000us         0.00%      41.000us      41.000us             1  
                            aten::embedding         0.00%      28.000us         0.00%     313.000us     313.000us             1  
                              aten::reshape         0.00%      14.000us         0.00%      19.000us      19.000us             1  
                       aten::_reshape_alias         0.00%       5.000us         0.00%       5.000us       5.000us             1  
                         aten::index_select         0.00%     219.000us         0.00%     

In [6]:
model.encoder.transformer.layers.enable_nested_tensor=False # disable BT(Better Transformer) sparsity

In [7]:
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
model.train()
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

print("fast path:")
print("==========")
model.eval()
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.41%      12.646ms         0.41%      12.683ms      12.683ms      12.708ms         0.39%      12.708ms      12.708ms             1  
                                       cudaLaunchKernel         0.00%      37.000us         0.00%      37.000us      37.000us       0.000us         0.00%       0.000us       0.000us             1 

  return torch._transformer_encoder_layer_fwd(


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.01%      87.000us         0.02%     114.000us     114.000us     126.000us         0.02%     126.000us     126.000us             1  
                                       cudaLaunchKernel         0.00%      27.000us         0.00%      27.000us      27.000us       0.000us         0.00%       0.000us       0.000us             1  
         

In [8]:
model.encoder.transformer.layers.enable_nested_tensor=True

In [11]:
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
model.train()
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

print("fast path:")
print("==========")
model.eval()
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.01%      74.000us         0.02%     101.000us     101.000us     110.000us         0.01%     110.000us     110.000us             1  
                                       cudaLaunchKernel         0.00%      27.000us         0.00%      27.000us      27.000us       0.000us         0.00%       0.000us       0.000us             1 