In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

In [1]:
checkpoint_path = "../NLPModel/gpt2-recipes/checkpoint-750"

In [3]:
# Load tokenizer and model from the checkpoint
tokenizer = GPT2Tokenizer.from_pretrained(checkpoint_path)
model = GPT2LMHeadModel.from_pretrained(checkpoint_path)

In [4]:
# Required for padding (GPT-2 doesn't have one by default)
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))

Embedding(50258, 768)

In [5]:
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50258, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50258, bias=False)
)

In [6]:
def generate_recipe(ingredients: list[str], max_length: int = 200) -> str:
    prompt = "<|startoftext|>\nIngredients: " + ", ".join(ingredients) + "\nInstructions:\n"
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.8,
            pad_token_id=tokenizer.eos_token_id
        )

    result = tokenizer.decode(output[0], skip_special_tokens=True)
    return result


In [None]:
ingredients = ["onion", "garlic", "tomato"]
recipe = generate_recipe(ingredients)
print(recipe) 


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Ingredients: onion, garlic, tomato
Instructions:
Peel the onion from the ends and set aside. Cut into thin strips.  Add the beef broth; stir well.  Bring to a boil; reduce heat to low and simmer until beef boils.  Let cool slightly.    Remove from heat;  add salt and pepper to taste.  Gradually stir in  cornstarch.   Pour over beef; cook 1 minute or until mixture thickens. Transfer to prepared baking dish; bake  until a lightly golden brown. Cool on wire racks for 15 minutes.  Cool on wire racks for 12 minutes.   Let cool completely. Cool on wire rack for 15 minutes.   Let cool completely on wire racks for 10 minutes.   Sprinkle over flour. Gradually stir in eggs. Stir in cream and flour; beat on low speed. Gradually beat in flour until flour is stiffened. Gradually beat in egg. Gradually
