# Install necessary packages (if not already installed)
# !pip install transformers torch

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the pre-trained GPT-2 model and tokenizer
model_name = "gpt2"  # Other options: "gpt2-medium", "gpt2-large", "gpt2-xl"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [3]:
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
# Function to generate text from a prompt
def generate_text(prompt, max_length=100):
    # Tokenize the input text
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    
    # Generate text
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            early_stopping=True
        )
    
    # Decode the generated tokens into text
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text


In [6]:
# Example usage:
prompt = "What is artificial intelligence?"
output_text = generate_text(prompt, max_length=150)
print("Generated text:\n", output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text:
 What is artificial intelligence?

AI is a new field of research that has been around for a long time. It is not a science, but it is an idea.
. Artificial intelligence is the next frontier in the field. The field is growing rapidly. In the last few years, there have been many developments in artificial-intelligence research. One of them is called artificial neural networks. These are the machines that are able to learn from human behavior. They are very good at learning from humans. But they are not very smart. So they can't learn anything from us. And they don't know what to do with us, so they have to be trained. That is what we call artificial learning. We call it artificial reinforcement learning,


In [8]:
# Save the model and tokenizer to a directory
save_directory = "../../ml-models/gpt2"

model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")


Model and tokenizer saved to ../../ml-models/gpt2
