import os
os.environ['OMP_NUM_THREADS'] = '2'
from transformers import AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("my_awesome_model2/checkpoint-5")
text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
inputs = tokenizer(text, return_tensors="pt")
from transformers import AutoModelForSequenceClassification
import time
model = AutoModelForSequenceClassification.from_pretrained("my_awesome_model2/checkpoint-5")
with torch.no_grad():
    for i in range(10):
        model(**inputs)
    prof = torch.profiler.profile(
       record_shapes=True, with_stack=True)
    # profile after warmupSS
    prof.start()
    st = time.time()
    model(**inputs)
    et = time.time()
    prof.stop()
    # logits = model(**inputs).logits
print(prof.key_averages().table(row_limit=10000))
print ('Inference time:')
print (et-st)
del os.environ['OMP_NUM_THREADS']