# Setup

In [24]:
import transformers
from pathlib import Path

# Directory of the loaded models
MODEL_DIR = Path("models")

# Set seed for reproducibility
transformers.set_seed(0)

In [25]:
# Load tokenizer and model
def load_model(model_name):
    tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_DIR.joinpath(model_name), local_files_only=True)
    model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_DIR.joinpath(model_name), local_files_only=True)
    return tokenizer, model

In [26]:
def generate_text(tokenizer, model, prompt, output_length=20):
    # Encode the prompt text
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    max_length = len(input_ids[0]) + output_length

    # Generate a sequence of text
    output_ids = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)


    # Decode the generated sequence and return it
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


# Testing

## GPT2

In [27]:
model_name = "gpt2"
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Usage: 486.7 MB


In [28]:
%%time
prompt = "What is the meaning of life?"
generate_text(tokenizer, model, prompt)

What is the meaning of life?

The meaning of life is that we are living in a world that is not ours.

CPU times: user 413 ms, sys: 91.8 ms, total: 505 ms
Wall time: 721 ms


## OPT

In [29]:
model_name = "facebook/opt-125m"
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Usage: 477.8 MB


In [30]:
%%time
prompt = "What is the meaning of life?"
generate_text(tokenizer, model, prompt)

What is the meaning of life?

Life is a process. It is a process that is not a linear process. It is
CPU times: user 491 ms, sys: 80.9 ms, total: 572 ms
Wall time: 741 ms


## Pythia

In [31]:
model_name = "EleutherAI/pythia-70m"
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Usage: 292.7 MB


In [32]:
%%time
prompt = "What is the meaning of life?"
generate_text(tokenizer, model, prompt)

What is the meaning of life?

The meaning of life is not a question of life, but a question of life. It
CPU times: user 142 ms, sys: 21.6 ms, total: 164 ms
Wall time: 168 ms
