# Install Unsloth & Dependencies

In [None]:
!pip install -q unsloth
!pip install -q datasets accelerate bitsandbytes trl


# import libraries

In [None]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments


# Load Base Model (4-bit QLoRA)

In [None]:
model_name = "unsloth/llama-3-8b-bnb-4bit"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = 2048,
    load_in_4bit = True,
    dtype = None,
)

# Add LoRA Adapters (PEFT)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                     # LoRA rank
    lora_alpha = 32,
    lora_dropout = 0.05,
    target_modules = [
        "q_proj", "k_proj", "v_proj",
        "o_proj", "gate_proj", "up_proj", "down_proj"
    ],
    use_gradient_checkpointing = "unsloth",
)


# Medical Q&A Dataset

In [None]:
dataset = load_dataset("pubmed_qa", "pqa_labeled", split="train[:2000]")


# Format Dataset

In [None]:
def formatting_func(example):
    return f"""### Question:
{example['question']}

### Answer:
{example['long_answer']}"""

dataset = dataset.map(lambda x: {
    "text": formatting_func(x)
})

# Setup Trainer (QLoRA Training)

In [None]:
training_args = TrainingArguments(
    output_dir = "./medical_llama3",
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-4,
    num_train_epochs = 2,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 10,
    optim = "adamw_8bit",
    save_strategy = "epoch",
    report_to = "none"
)

# Start Fine-Tuning

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    args = training_args,
)

trainer.train()

# Monitor GPU Memory (Colab)

In [None]:
torch.cuda.memory_summary()

# Save Fine-Tuned Adapter

In [None]:
model.save_pretrained("medical_lora_adapter")
tokenizer.save_pretrained("medical_lora_adapter")

# Test Model on New Medical Query

In [None]:
FastLanguageModel.for_inference(model)

prompt = """### Question:
What are the symptoms of diabetes?

### Answer:
"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens = 200,
    do_sample = True,
    temperature = 0.7,
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))