In [2]:
pip install peft transformers datasets accelerate


Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig
from datasets import load_dataset

# Disable TensorFlow-related stuff
os.environ["TRANSFORMERS_NO_TENSORFLOW"] = "1"

# Load tokenizer & set pad token
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Load GPT-2 model
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["c_attn"],
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)

# Load and preprocess dataset
dataset = load_dataset("wikitext", "wikitext-103-raw-v1")

# Tokenize function
def tokenize_function(example):
    tokens = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128,
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

# Apply tokenization
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Define training arguments
training_args = TrainingArguments(
    output_dir="./lora-gpt2",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    logging_steps=10,
    save_steps=500,
    logging_dir="./logs",
    eval_steps=500,
    remove_unused_columns=False,   # ✅ This avoids the ValueError
    fp16=False,                    # ❌ Disable FP16 unless you're using GPU
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
)

# Train
trainer.train()
