import os os.environ['OMP_NUM_THREADS'] = '2' from transformers import AutoTokenizer import torch tokenizer = AutoTokenizer.from_pretrained("my_awesome_model2/checkpoint-5") text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three." inputs = tokenizer(text, return_tensors="pt") from transformers import AutoModelForSequenceClassification import time model = AutoModelForSequenceClassification.from_pretrained("my_awesome_model2/checkpoint-5") with torch.no_grad(): for i in range(10): model(**inputs) prof = torch.profiler.profile( record_shapes=True, with_stack=True) # profile after warmupSS prof.start() st = time.time() model(**inputs) et = time.time() prof.stop() # logits = model(**inputs).logits print(prof.key_averages().table(row_limit=10000)) print ('Inference time:') print (et-st) del os.environ['OMP_NUM_THREADS']