![medical-llm-thumbnail-alt.jpg](attachment:3acc2c03-ff29-4a0f-9f78-4bdb172dea40.jpg)

**Setting up the Environment**


In [1]:
# Install required Libraries
# !pip install unsloth datasets trl wandb
# !pip install vllm

In [2]:
!pip install datasets

Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading fsspec-2024.12.0-py3-none-any.whl (183 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.12.0 which is incompatible.
torch 2.5.1+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cublas-cu12 12.8.4.1 which is incompa

In [3]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.4.1-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.4/46.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.4.1 (from unsloth)
  Downloading unsloth_zoo-2025.4.1-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.19-py3-none-any.whl.metadata (9.9 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth)
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.4.0->unsloth)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (

In [4]:
import torch
assert torch.cuda.is_available(), "GPU required for efficient fine‑tuning"
print("GPU device:", torch.cuda.get_device_name(0))

AssertionError: GPU required for efficient fine‑tuning

In [None]:
import wandb
wandb.login(key="c84cd416b5114205cfafeb20c02610bb28589157")

**Dataset Preparation & Loading**

In [None]:
# Loading the dataset
from datasets import load_dataset
ds = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split="train", trust_remote_code=True)

In [None]:
print(ds)

In [None]:
ds = ds.shuffle(seed=42)
ds_valid = ds.select(range(100))
ds_train = ds.select(range(100, len(ds)))

**Prompt Formatting & Split**

In [None]:
def format_cot(example):
    prompt = (
        "<system>You are a medical expert...</system>\n"
        f"<user>Question: {example['Question']}</user>\n"
        f"<think>{example['Complex_CoT']}</think>\n"
        f"<response>{example['Response']}</response>"
    )
    return {"prompt": prompt}

ds_train = ds_train.map(format_cot)
ds_valid = ds_valid.map(format_cot)

**Load base model & LoRA**

In [None]:
from unsloth import FastLanguageModel
model, _ = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    load_in_4bit=True,
    max_seq_length=1024,
    fast_inference=True,
    gpu_memory_utilization=0.7,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj"
    ],
    lora_alpha=32,
    use_gradient_checkpointing="unsloth",
    random_state=3407
)

**Configure & Run Fine-tuning**

In [None]:
from trl import SFTConfig

training_args = SFTConfig(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    learning_rate=2e-5,
    fp16=True,  # ✅ faster and memory-efficient
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    logging_steps=50,
    eval_strategy="steps",
    eval_steps=100,                # Evaluate every 100 steps
    save_strategy="steps",         # ✅ Save checkpoints every `save_steps`
    save_steps=100,                # ✅ Save every 100 steps
    save_total_limit=5,
    report_to="wandb",             # Logs to wandb
    output_dir="llama3.2-medcot-lora",  # Where to store checkpoints
    dataset_text_field="prompt",
    seed=42,
)


In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_valid,
    tokenizer=None  # Unsloth handles it internally
)


In [None]:
trainer.train()

**Save Adapter & Tokenizer**

In [None]:
# Save LoRA adapter
trainer.model.save_lora("llama3.2-medcot-lora")

# Save tokenizer explicitly
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
tokenizer.save_pretrained("llama3.2-medcot-lora_token")

print("Saved Successfully!")


In [None]:
!pip install huggingface_hub

In [None]:
from huggingface_hub import login

# Login with your Hugging Face token
login(token="HF_API_KEY")

In [None]:
from huggingface_hub import HfApi, HfFolder, create_repo, upload_folder

repo_id = "Hums003/PEFT_LlaMA_3.2_MCoT"  # Your HF username/repo
local_path = "/content/llama3.2-medcot-lora"  

# Create repo 
create_repo(repo_id, exist_ok=True)

# Upload folder to repo
upload_folder(
    repo_id=repo_id,
    folder_path=local_path,
    commit_message="Upload model from Kaggle"
)


In [None]:
from huggingface_hub import HfApi, HfFolder, create_repo, upload_folder

repo_id = "Hums003/PEFT_LlaMA_3.2_MCoT"  # Your HF username/repo
local_path = "/content/llama3.2-medcot-lora_token"  

# Create repo 
create_repo(repo_id, exist_ok=True)

# Upload folder to repo
upload_folder(
    repo_id=repo_id,
    folder_path=local_path,
    commit_message="Upload tokenizer from Kaggel"
)

**Rouge-L Score**

In [None]:
!pip install evaluate

In [None]:
!pip install rouge_score

In [None]:
import evaluate
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

# Initialize ROUGE evaluation metric
rouge = evaluate.load("rouge")

# Generate and collect predictions & references
predictions = []
references  = []

for example in ds_valid:
    prompt = example["prompt"]
    # Separate reference answer (text inside <response>…</response>)
    ref = prompt.split("<response>")[1].split("</response>")[0].strip()
    references.append(ref)

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=False,  # deterministic for evaluation
        pad_token_id=tokenizer.eos_token_id
    )
    out = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    # Extract generated response segment
    if "<response>" in out and "</response>" in out:
        pred = out.split("<response>")[1].split("</response>")[0].strip()
    else:
        pred = out.strip()
    predictions.append(pred)

# Compute ROUGE‑L
results = rouge.compute(predictions=predictions, references=references, rouge_types=["rougeL"])

# The output is directly a float, so just print it
print(f"🍀 ROUGE‑L on validation set: {results['rougeL']:.4f}")