In [1]:
import torch

import pandas as pd

from main import load_imdb_model

In [2]:
torch.cuda.current_device()

0

In [3]:
torch.cuda.get_device_name(0)

'GeForce GTX 1660 SUPER'

In [4]:
tokenizer, model = load_imdb_model()

Test du tokenizer

```python
tokenizer.encode("Hi, how are you ?")

tokenizer.decode([101, 8790, 117, 1293, 1132, 1128, 136, 102])
```

In [5]:
df = pd.read_csv("imdb/imdb.csv")

In [6]:
df.shape

(50000, 2)

In [7]:
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [8]:
sample = df.sample(n=64, random_state=2021)
sample.head()

Unnamed: 0,review,sentiment
17557,Giant crabs cursing in Japanese? What was in t...,negative
5312,The cast is admirably chosen and well-balanced...,positive
2210,One of the better movies to come out of the 19...,positive
9261,Move over Manos. Back off Boogens. It doesn't ...,negative
39404,Being advertised as the most expensive movie e...,positive


In [9]:
embedded = tokenizer.batch_encode_plus(sample["review"].tolist(), padding=True, truncation=True, max_length=512)

In [10]:
input = torch.tensor(embedded["input_ids"]).cuda()
input

tensor([[  101, 12510, 24121,  ...,     0,     0,     0],
        [  101,  1109,  2641,  ...,     0,     0,     0],
        [  101,  1448,  1104,  ...,     0,     0,     0],
        ...,
        [  101,   146,   112,  ...,     0,     0,     0],
        [  101,  1130,  1126,  ...,     0,     0,     0],
        [  101,  1109, 15882,  ...,     0,     0,     0]], device='cuda:0')

In [14]:
input.shape

torch.Size([64, 512])

In [15]:
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    output = model(input)

In [16]:
print(prof.key_averages().table(sort_by="self_cuda_time_total"))

---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                   aten::mm         2.37%      78.676ms         2.41%      80.047ms     555.880us        4.582s        71.43%        4.582s      31.816ms           144  
                  aten::bmm         0.07%       2.480ms         0.15%       4.945ms     103.014us     588.100ms         9.17%     588.100ms      12.252ms            48  
                  aten::add         3.39%     112.646ms         3.42%     113.555ms       1.556ms     279.911ms         4.36%     279.911ms       3.8

In [18]:
import cProfile
cProfile.run('model(input)', 'output.pstats')

In [1]:
from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
args = PyTorchBenchmarkArguments(models=["lvwerra/bert-imdb"], batch_sizes=[8, 16, 32, 64], sequence_lengths=[512])
benchmark = PyTorchBenchmark(args)

In [2]:
results = benchmark.run()

1 / 1

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
      lvwerra/bert-imdb              8              512            0.666     
      lvwerra/bert-imdb              16             512            1.331     
      lvwerra/bert-imdb              32             512            2.659     
      lvwerra/bert-imdb              64             512            5.342     
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length    Memory in MB 
--------------------------------------------------------------------------------
      lvwerra/bert-imdb              8              512             3119     
      lvwerra/bert-imdb              16      