In [4]:
!pip install -q transformers datasets peft accelerate bitsandbytes
!pip install -U datasets transformers accelerate peft
# Import necessary libraries
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
import torch



In [2]:
!pip install -U fsspec datasets
!rm -rf ~/.cache/huggingface/datasets  # Clean corrupted cache

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [7]:
from datasets import load_dataset

# Load full train split
dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")

# Select only the first 1000 examples for quick training
dataset = dataset.select(range(1000))



In [8]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Needed for padding

model = AutoModelForCausalLM.from_pretrained(model_name)

In [9]:
# ✅ Apply LoRA (Low-Rank Adaptation)
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["c_attn", "q_attn"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)




In [10]:
# ✅ Tokenize + add labels
def tokenize(example):
    tokens = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128,  # Smaller for faster training
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [11]:
# ✅ Define training arguments
training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    save_steps=100,             # Saves every 100 steps
    logging_steps=10,           # Log every 10 steps
    max_steps=200,              # Cap training
    report_to="none",           # Disable logging integrations
    logging_dir="./logs",       # Local log dir
    save_total_limit=1,         # Only keep 1 checkpoint
    remove_unused_columns=False,
)


In [12]:
# ✅ Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [13]:
# ✅ Train the model (will show logs every 10 steps)
trainer.train()


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,7.9838
20,7.5025
30,6.5583
40,5.4798
50,5.0953
60,4.0076
70,4.4993
80,5.0047
90,4.151
100,4.2751


TrainOutput(global_step=200, training_loss=4.678085470199585, metrics={'train_runtime': 1332.5733, 'train_samples_per_second': 0.6, 'train_steps_per_second': 0.15, 'total_flos': 52349003366400.0, 'train_loss': 4.678085470199585, 'epoch': 0.8})

In [14]:
# ✅ Save the fine-tuned model
model.save_pretrained("gpt2-lora-finetuned")
tokenizer.save_pretrained("gpt2-lora-finetuned")



('gpt2-lora-finetuned/tokenizer_config.json',
 'gpt2-lora-finetuned/special_tokens_map.json',
 'gpt2-lora-finetuned/vocab.json',
 'gpt2-lora-finetuned/merges.txt',
 'gpt2-lora-finetuned/added_tokens.json',
 'gpt2-lora-finetuned/tokenizer.json')

In [15]:
!pip install gradio



In [16]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# ✅ Load tokenizer and base GPT-2 model
tokenizer = AutoTokenizer.from_pretrained("gpt2-lora-finetuned")
base_model = AutoModelForCausalLM.from_pretrained("gpt2")

# ✅ Load the LoRA fine-tuned weights
model = PeftModel.from_pretrained(base_model, "gpt2-lora-finetuned")
model.eval()

# ✅ Generate response function
def generate_text(prompt, max_new_tokens=50, temperature=0.7):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            pad_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ✅ Create Gradio Interface
demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=3, placeholder="Enter a prompt...", label="Prompt"),
        gr.Slider(20, 200, value=50, step=10, label="Max New Tokens"),
        gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="🧠 GPT-2 Fine-Tuned with LoRA",
    description="Enter a prompt and generate text using your fine-tuned GPT-2 model with LoRA!",
)

# ✅ Launch the app
demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ffd27ae770ffa4ede7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


