In [1]:
#Import Libraries


import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType

In [None]:
#Load Hugging Face Instruction Dataset


dataset = load_dataset("tatsu-lab/alpaca")

In [None]:
#Convert Dataset ‚Üí Instruction Prompt Format


def format_prompt(example):
    return {
        "text": f"""### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}"""
    }

dataset = dataset["train"].map(format_prompt)


In [None]:
#Load Model & Tokenizer


model_name = "distilgpt2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
#Tokenization


def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

tokenized_dataset = dataset.map(
    tokenize,
    batched=True,
    remove_columns=dataset.column_names
)


In [None]:
#Apply LoRA


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["c_attn", "c_proj"],
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [7]:
#Data Collator


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [8]:
#Training Arguments


training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    learning_rate=2e-5,
    logging_steps=100,
    save_steps=500,
    save_total_limit=2,
    do_eval=False,          # üîß FIX HERE
    weight_decay=0.01,
    fp16=torch.cuda.is_available()
)


In [None]:
#Trainer & Training


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

trainer.train()

In [None]:
#Mount Google Drive

from google.colab import drive
drive.mount('/content/drive')



In [11]:
#Define a Drive save path

SAVE_DIR = "/content/drive/MyDrive/llm_models/lora_distilgpt2_alpaca"


In [None]:
#Save LoRA model + tokenizer

model.save_pretrained(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)

print("‚úÖ LoRA model saved to Google Drive!")


LOAD model from Google Drive



In [None]:
#Mount Drive again

from google.colab import drive
drive.mount('/content/drive')


In [None]:
#Load base model + tokenizer


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

BASE_MODEL = "distilgpt2"
LORA_PATH = "/content/drive/MyDrive/llm_models/lora_distilgpt2_alpaca"

tokenizer = AutoTokenizer.from_pretrained(LORA_PATH)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)


In [None]:
#Attach LoRA adapters

model = PeftModel.from_pretrained(base_model, LORA_PATH)
model.eval()

print("‚úÖ LoRA model loaded successfully!")


CHECK if model is working correctly?

In [None]:
prompt = """### Instruction:
What is machine learning?

### Input:
None

### Response:
"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=120,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

print(tokenizer.decode(output[0], skip_special_tokens=True))


Compare BEFORE vs AFTER fine-tuning:

In [None]:
#Base model (no LoRA)

base_only = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)

inputs = tokenizer(prompt, return_tensors="pt").to(base_only.device)

out = base_only.generate(**inputs, max_new_tokens=120)
print(tokenizer.decode(out[0], skip_special_tokens=True))


In [None]:
#LoRA fine-tuned model


inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

out = model.generate(**inputs, max_new_tokens=120)
print(tokenizer.decode(out[0], skip_special_tokens=True))


In [None]:
#Programmatic check

model.print_trainable_parameters()


UI PART:

In [None]:
# Install Dependencies

!pip install -q gradio transformers peft accelerate


# Mount Google Drive


from google.colab import drive
drive.mount('/content/drive')



# Imports


import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel



# Paths & Model Loading


BASE_MODEL = "distilgpt2"
LORA_PATH = "/content/drive/MyDrive/llm_models/lora_distilgpt2_alpaca"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(LORA_PATH)
tokenizer.pad_token = tokenizer.eos_token

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)

# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, LORA_PATH)
model.eval()

print("‚úÖ LoRA model loaded successfully!")



# Inference Function


def generate_response(instruction, user_input):
    if not instruction.strip():
        return "‚ö†Ô∏è Please enter an instruction."

    if not user_input.strip():
        user_input = "None"

    prompt = f"""### Instruction:
{instruction}

### Input:
{user_input}

### Response:
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response



# Gradio UI (BIG RESPONSE BOX)


with gr.Blocks(theme=gr.themes.Soft()) as demo:

    gr.Markdown(
        """
        # üìò Instruction Fine-Tuned LLM
        ### LoRA + DistilGPT-2
        This demo uses a DistilGPT-2 model fine-tuned on the Alpaca dataset using **LoRA (PEFT)**.
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            instruction = gr.Textbox(
                label="Instruction",
                placeholder="Write a Python function to check palindrome",
                lines=3
            )

            user_input = gr.Textbox(
                label="Input (optional)",
                placeholder="Additional context if any",
                lines=3
            )

            submit_btn = gr.Button("Submit", variant="primary")
            clear_btn = gr.Button("Clear")

        with gr.Column(scale=2):
            output = gr.Textbox(
                label="Model Response",
                lines=20,                 # üëà BIG RESPONSE BOX
                show_copy_button=True
            )

    submit_btn.click(
        fn=generate_response,
        inputs=[instruction, user_input],
        outputs=output
    )

    clear_btn.click(
        fn=lambda: ("", "", ""),
        inputs=None,
        outputs=[instruction, user_input, output]
    )

demo.launch(share=True)
