1. Hugging Face GPT-2-xl Example

In [None]:
from transformers import pipeline, set_seed

generator = pipeline("text-generation", model="gpt2-xl")
set_seed(42)

text = generator("Once upon a time,", max_length=50, num_return_sequences=1)
print(text[0]['generated_text'])


Let’s Start Simple — With distilgpt2

In [None]:
from transformers import pipeline

generator = pipeline("text-generation", model="distilgpt2")

prompt = "The future of artificial intelligence in India is"

output = generator(prompt, max_length=50, temperature=0.7, top_p=0.9, do_sample=True)

print(output[0]["generated_text"])


Next Step: Use AutoModelForCausalLM + Tokenizer (More Control)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "EleutherAI/gpt-neo-1.3B"  # Change to "gpt2", "gpt-j-6B", etc.

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

prompt = "Write a short poem about AI and humanity.\n\n"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

gen_tokens = model.generate(
    input_ids,
    do_sample=True,
    temperature=0.7,
    max_length=100,
    top_p=0.95,
    top_k=50
)

gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]

print(gen_text)


WITH attention masks and pad token ids and with some other parameters for more reliable answer


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("gpt2-xl")
model = AutoModelForCausalLM.from_pretrained("gpt2-xl")


input_text = "Explain the importance of clean energy"

tokens = tokenizer(input_text, return_tensors="pt")

print("Input IDs:\n", tokens["input_ids"])
print("Attention Mask:\n", tokens["attention_mask"])
print("Pad token ID:", tokenizer.pad_token_id)
output = model.generate(
    tokens["input_ids"],
    max_length=350,
    temperature=0.7,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.2,
    no_repeat_ngram_size=3,
    do_sample=True,
    attention_mask=tokens["attention_mask"],
    pad_token_id=tokenizer.eos_token_id
)

print(tokenizer.decode(output[0], skip_special_tokens=True))


1. 📖 Storytelling Task with gpt2-xl

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

def generate_text(model_name, prompt, generation_config):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    inputs = tokenizer(prompt, return_tensors="pt")

    output_ids = model.generate(
        inputs["input_ids"],
        pad_token_id=tokenizer.pad_token_id,
        **generation_config
    )

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Task-specific generation settings
story_config = {
    "max_new_tokens": 400,
    "temperature": 0.95,
    "top_p": 0.9,
    "top_k": 50,
    "do_sample": True,
    "repetition_penalty": 1.2,
    "no_repeat_ngram_size": 3,
    "early_stopping": True,
}

# Prompt for storytelling
prompt = "Once upon a time in a world ruled by machines, one human remained who..."

# Generate using GPT-2 XL
output = generate_text("gpt2-xl", prompt, story_config)
print("📖 Story:\n", output)


Question answering using EleutherAI/gpt-neo-2.7B

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

def generate_text(model_name, prompt, generation_config):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    inputs = tokenizer(prompt, return_tensors="pt")

    output_ids = model.generate(
        inputs["input_ids"],
        pad_token_id=tokenizer.pad_token_id,
        **generation_config
    )

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Task-specific generation settings
qa_config = {
    "max_new_tokens": 100,
    "temperature": 0.7,
    "top_p": 0.8,
    "top_k": 40,
    "do_sample": True,
    "repetition_penalty": 1.0,
    "no_repeat_ngram_size": 2
}

prompt = "Q: What is the capital of France?\nA:"

output = generate_text("EleutherAI/gpt-neo-2.7B", prompt, qa_config)
print("❓ QA:\n", output)

