In [7]:
!pip install transformers datasets torch

from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import torch

# Load the GPT-2 model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Set pad_token to eos_token
tokenizer.pad_token = tokenizer.eos_token

# Load a small subset of the dataset (1% of the data)
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:1%]")  # Use only 1% of the dataset

# Tokenize the dataset and set labels
def tokenize_function(examples):
    tokenized_text = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)  # Shorten max_length
    tokenized_text["labels"] = tokenized_text["input_ids"].copy()  # Use input_ids as labels
    return tokenized_text

# Apply the tokenization
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# Define the training arguments with fewer epochs and smaller batch size
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,  # Train for only 1 epoch
    per_device_train_batch_size=8,  # Keep the batch size small to fit in memory
    save_steps=10_000,  # Save checkpoints less frequently
    save_total_limit=2,
    prediction_loss_only=False,  # Set to False to ensure loss is calculated
    logging_dir="./logs",  # Add logging directory
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Train the model (this should take around 10 minutes)
trainer.train()

# Example prompt
prompt = "Once upon a time"

# Encode the prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# Generate text with additional parameters to avoid warnings
output = model.generate(
    input_ids,
    max_length=100,
    num_return_sequences=1,
    pad_token_id=tokenizer.pad_token_id,  # Ensure padding token ID is set
    attention_mask=input_ids.ne(tokenizer.pad_token_id).long()  # Set attention mask to avoid warnings
)

# Decode and print the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print("Generated Text:")
print(generated_text)


model.save_pretrained("./trained_model")
tokenizer.save_pretrained("./trained_tokenizer")

# Load the model and tokenizer
model = GPT2LMHeadModel.from_pretrained("./trained_model")
tokenizer = GPT2Tokenizer.from_pretrained("./trained_tokenizer")

# Generate text again using the loaded model
input_ids = tokenizer.encode("Once upon a time", return_tensors="pt")
output = model.generate(
    input_ids,
    max_length=100,
    num_return_sequences=1,
    pad_token_id=tokenizer.pad_token_id,  # Ensure padding token ID is set
    attention_mask=input_ids.ne(tokenizer.pad_token_id).long()  # Set attention mask to avoid warnings
)
print("Generated Text with Loaded Model:")
print(tokenizer.decode(output[0], skip_special_tokens=True))

import os

import shutil

# Zip the trained_model directory
shutil.make_archive("trained_model", 'zip', "trained_model")

# Zip the trained_tokenizer directory
shutil.make_archive("trained_tokenizer", 'zip', "trained_tokenizer")

from google.colab import files

# Download the trained_model.zip file
files.download("trained_model.zip")

# Download the trained_tokenizer.zip file
files.download("trained_tokenizer.zip")







Step,Training Loss


Generated Text:
Once upon a time when the United States was in the midst of a civil war, the Confederate States were the most powerful and powerful in the country, and were the most powerful in the world. The Confederate States were the most powerful in the world, and were the most powerful in the world in the war. The Confederate States were the most powerful in the world in the war, and the most powerful in the world in the war. The Confederate States were the most powerful in the world in the war
Generated Text with Loaded Model:
Once upon a time, the city was a bustling metropolis, with many merchants and merchants, and many merchants and merchants of all kinds. The city was a bustling metropolis, with many merchants and merchants of all kinds. The city was a bustling metropolis, with many merchants and merchants of all kinds. The city was a bustling metropolis, with many merchants and merchants of all kinds. The city was a bustling metropolis, with many merchants and merchants of 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')