In [1]:
# auto reload modules
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from src.base import CustomPreTrainedModel
from src.utils import measure_inference_speed, measure_memory_usage

In [4]:
# Example input text
input_text = "This is a sample input text to measure the inference speed of the model."

In [5]:
from transformers import AutoModel, AutoTokenizer, AutoConfig, BertModel, RobertaModel, DistilBertModel, AlbertModel, GPT2Model, T5Model, XLNetModel, ElectraModel, BartModel, DebertaModel

In [6]:
model_classes = {
    "bert-base-uncased": BertModel,
    "bert-large-uncased": BertModel,
    "roberta-base": RobertaModel,
    # "roberta-large": RobertaModel,
    # "distilbert-base-uncased": DistilBertModel,
    # "albert-base-v2": AlbertModel,
    # "albert-large-v2": AlbertModel,
    # "gpt2": GPT2Model,
    # "gpt2-medium": GPT2Model,
    # "t5-small": T5Model,
    # "t5-base": T5Model,
    # "t5-large": T5Model,
    # "xlnet-base-cased": XLNetModel,
    # "xlnet-large-cased": XLNetModel,
    # "google/electra-small-discriminator": ElectraModel,
    # "google/electra-base-discriminator": ElectraModel,
    # "facebook/bart-base": BartModel,
    # "facebook/bart-large": BartModel,
    # "microsoft/deberta-base": DebertaModel,
    # "microsoft/deberta-large": DebertaModel
}

In [7]:
for model_name in model_classes:
    print(f"Testing model: {model_name}")
    model = AutoModel.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    config = AutoConfig.from_pretrained(model_name)
    
    custom_model = CustomPreTrainedModel(config, model_classes[model_name])

    speed = measure_inference_speed(model, tokenizer, input_text)
    memory = measure_memory_usage(model, tokenizer, input_text)
    
    custom_speed = measure_inference_speed(custom_model, tokenizer, input_text)
    custom_memory = measure_memory_usage(custom_model, tokenizer, input_text)

    print(f"Inference speed: {speed:.6f} seconds")
    print(f"Custom model inference speed: {custom_speed:.6f} seconds")
        
    print(f"Memory usage: {memory / (1024 ** 2):.2f} MB")
    print(f"Custom Memory usage: {custom_memory / (1024 ** 2):.2f} MB")

    print(f"Speedup: {speed / custom_speed:.2f}x")
    print(f"% difference in memory usage: {((memory - custom_memory) / memory) * 100:.2f}%")
    
    print("*" * 100)

Testing model: bert-base-uncased
Inference speed: 0.076752 seconds
Custom model inference speed: 0.026537 seconds
Memory usage: 1420.90 MB
Custom Memory usage: 1421.39 MB
Speedup: 2.89x
% difference in memory usage: -0.03%
****************************************************************************************************
Testing model: bert-large-uncased
Inference speed: 0.082815 seconds
Custom model inference speed: 0.080833 seconds
Memory usage: 4791.22 MB
Custom Memory usage: 4791.73 MB
Speedup: 1.02x
% difference in memory usage: -0.01%
****************************************************************************************************
Testing model: roberta-base


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Inference speed: 0.033722 seconds
Custom model inference speed: 0.031844 seconds
Memory usage: 3821.50 MB
Custom Memory usage: 3821.71 MB
Speedup: 1.06x
% difference in memory usage: -0.01%
****************************************************************************************************
