In [None]:
# 🧩 Install Required Packages
!pip install -U transformers datasets peft trl accelerate bitsandbytes

In [None]:
# 🧠 Load and Format the Dataset
from datasets import load_dataset

# Load 500 examples from PubMedQA (labeled)
hf_data = load_dataset("pubmed_qa", "pqa_labeled", split="train[:500]")

# Reformat into question-context-answer format
def format_hf(example):
    return {
        "question": example["question"],
        "context": example["context"],
        "answer": example["final_decision"]
    }

hf_data = hf_data.map(format_hf)

# Format into prompt-style input for instruction tuning
def format_prompt(example):
    return {
        "text": f"Question: {example['question']}\nContext: {example['context']}\nAnswer: {example['answer']}"
    }

dataset = hf_data.map(format_prompt)


In [None]:
# 🧪 Tokenize Dataset
from transformers import AutoTokenizer

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Avoid padding issues

tokenized_dataset = dataset.map(
    lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=512),
    batched=True
)
tokenized_dataset.set_format("torch")


In [None]:
# 🔧 Load Base Model with Quantization & Apply LoRA
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)

# Prepare model for LoRA
base_model = prepare_model_for_kbit_training(base_model)

# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_config)


In [None]:
# 🏋️ Fine-Tune the Model
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    output_dir="medllama-lora-tiny",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=1,
    save_strategy="no",
    fp16=True,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    args=training_args
)

trainer.train()


In [None]:
# 💾 Save the Adapter Model
trainer.model.save_pretrained("medllama-lora-tiny")
tokenizer.save_pretrained("medllama-lora-tiny")


In [None]:
# 📦 Optional: Zip and Download Model
!zip -r medllama-lora-tiny.zip medllama-lora-tiny/
from google.colab import files
files.download("medllama-lora-tiny.zip")


In [None]:
# ✅ Inference Example
model.eval()
prompt = "Question: What are the symptoms of diabetes?\nContext: Diabetes is a chronic disease...\nAnswer:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
