In [None]:

!pip install transformers datasets

from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import Dataset

data = {
    "text": [
        "Once upon a time, there was a magical kingdom surrounded by mountains.",
        "The curious cat jumped over the fence to explore the garden.",
        "Technology has advanced rapidly, changing how we live and interact.",
        "In the heart of the forest, a hidden temple stood untouched for centuries.",
        "On a bright sunny day, children laughed and played in the park.",
        "Artificial intelligence is shaping the future of humanity in incredible ways.",
        "The brave knight ventured into the cave to fight the fierce dragon.",
        "A mysterious ship appeared on the horizon, sparking excitement among the villagers."
    ]
}

dataset = Dataset.from_dict(data)

model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def tokenize_function(examples):

    tokenized = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=50)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True)

training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=500,
    save_total_limit=1,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()

trainer.save_model("./fine_tuned_gpt2")
tokenizer.save_pretrained("./fine_tuned_gpt2")

print("Fine-tuning completed. The model and tokenizer are saved in './fine_tuned_gpt2'.")




Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Step,Training Loss
10,2.6839


Fine-tuning completed. The model and tokenizer are saved in './fine_tuned_gpt2'.
