In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed, GPT2Model, GPT2LMHeadModel, GPT2Tokenizer

In [6]:
# Define the model name
model_name = 'gpt2-medium'  # 370M parameters

# Redownload the model and tokenizer
model1 = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define the prompt
# prompt = "alice -> france,\nalice -> paris,\nbob -> germany,\nbob -> berlin,\njohn -> usa,\njohn"
# prompt = "alice -> france,\nparis -> alice,\nbob -> germany,\nberlin -> bob,\njohn -> usa,\nwashington"
# prompt = "alice france, paris alice, bob germany, berlin bob, john usa, washington"
# prompt = "alice lives in france,\nparis -  alice,\nbob lives in germany,\nberlin - bob,\njohn lives in usa,\nwashington -"

# prompt = "Alice lives in France, John - Berlin, John lives in Germany,  Alice - Paris, Peter lives in USA, Peter -"
prompt = "Alice lives in France, John - Berlin, John lives in Germany,  Alice - Paris, Peter lives in USA, Peter -"

# Tokenize the input prompt
encoded_input = tokenizer(prompt, return_tensors="pt")
input_ids = encoded_input.input_ids
print(f"Length of tokens: {len(input_ids[0])}")
attention_mask = encoded_input.attention_mask

# Generate text
gen_tokens = model1.generate(
    input_ids,
    attention_mask=attention_mask,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.1,
    max_length=input_ids.size(1) + 3,
)

# Decode the generated tokens to text
gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]

print("Generated Text: ", gen_text)

Length of tokens: 26
Generated Text:  Alice lives in France, John - Berlin, John lives in Germany,  Alice - Paris, Peter lives in USA, Peter - London, Peter


In [7]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model2 = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")

text = "alice france, alice paris, bob germany, bob berlin, john usa, john "
encoded_input = tokenizer(text, return_tensors='pt')
output = model2(**encoded_input)

In [8]:
generator = pipeline('text-generation', model='gpt2')
set_seed(42)
generator("alice france, alice paris, bob germany, bob berlin, john usa, john ", max_length=30, num_return_sequences=1)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'alice france, alice paris, bob germany, bob berlin, john usa, john xtrombro'}]