# Setup

In [1]:
import transformers
from pathlib import Path

# Directory of the loaded models
MODEL_DIR = Path("models")

# Set seed for reproducibility
transformers.set_seed(0)

In [2]:
# Load tokenizer and model
def load_model(model_name):
    model_path = MODEL_DIR.joinpath(model_name)
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_path, local_files_only=True)
    model = transformers.AutoModelForCausalLM.from_pretrained(model_path, local_files_only=True)
    return tokenizer, model

In [3]:
def generate_text(tokenizer, model, prompt, output_length=20):
    # Encode the prompt text
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    max_length = len(input_ids[0]) + output_length

    # Generate a sequence of text
    output_ids = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)


    # Decode the generated sequence and return it
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


# Testing

## GPT2

In [4]:
model_name = "gpt2" # 137M parameters
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Usage: 486.7 MB


In [5]:
%%time
prompt = "What is the meaning of life?"
print(generate_text(tokenizer, model, prompt))

What is the meaning of life?

The meaning of life is that we are living in a world that is not ours.

CPU times: user 4min 42s, sys: 33.3 s, total: 5min 16s
Wall time: 5min 19s


## OPT

In [6]:
model_name = "facebook/opt-125m"
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Usage: 477.8 MB


In [None]:
%%time
prompt = "What is the meaning of life?"
print(generate_text(tokenizer, model, prompt))

What is the meaning of life?

Life is a process. It is a process that is not a linear process. It is
CPU times: user 5min 30s, sys: 19.8 s, total: 5min 50s
Wall time: 5min 51s


## Pythia

In [None]:
model_name = "EleutherAI/pythia-70m"
tokenizer, model = load_model(model_name)
print("Usage:", round(model.get_memory_footprint()/1024**2,1), "MB")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Usage: 294.2 MB


In [None]:
%%time
prompt = "What is the meaning of life?"
print(generate_text(tokenizer, model, prompt))

What is the meaning of life?

The meaning of life is not a question of life, but a question of life. It
CPU times: user 2min 6s, sys: 6.48 s, total: 2min 13s
Wall time: 2min 14s
