# Task-01: Text Generation with GPT-2



## Check Dependencies

In [None]:
import importlib
missing = [lib for lib in ("transformers","datasets") if importlib.util.find_spec(lib) is None]
print("Missing:", missing if missing else "All dependencies installed!")

## Import Libraries and Initialize Model

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2')


## Load and Prepare Dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train[:1%]')
def tokenize_fun(examples): return tokenizer(examples['text'], padding='max_length', truncation=True)
tokenized = dataset.map(tokenize_fun, batched=True)
tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask'])


##  Fine-Tune GPT-2

In [None]:
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='output',
    num_train_epochs=1,
    per_device_train_batch_size=2,
    save_steps=200,
    save_total_limit=1
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    tokenizer=tokenizer
)
trainer.train()


##  Generate Text

In [None]:
prompt = "Once upon a time"
inputs = tokenizer(prompt, return_tensors='pt')
outputs = model.generate(**inputs, max_length=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Created by Navraj Amgai