# QLoRA Fine-Tuning for Medical Symptom Classification

This notebook demonstrates step-by-step QLoRA fine-tuning of PubMedBERT for classifying patient symptoms into medical categories.

In [None]:
## 1. Install Dependencies
!pip install -q transformers peft bitsandbytes datasets accelerate sklearn

In [None]:
## 2. Define Categories and Objectives
medical_categories = {
    0: 'Cardiac', 1: 'Respiratory', 2: 'Neurological', 3: 'Gastrointestinal',
    4: 'Orthopedic', 5: 'Dermatological', 6: 'Endocrine', 7: 'Urological',
    8: 'Psychiatric', 9: 'General'
}
TARGET_ACCURACY = 0.85  # 85%
MEMORY_BUDGET = '8GB'

In [None]:
## 3. Load and Prepare Data
import pandas as pd, re
from sklearn.model_selection import train_test_split

# Load dataset (CSV with columns 'symptoms' and 'category')
df = pd.read_csv('medical_symptoms_5000_cases.csv')

# Text cleaning function
def clean_text(text):
    abbrev = {'sob': 'shortness of breath','cp':'chest pain','n/v':'nausea vomiting','ha':'headache','abd':'abdominal'}
    t = text.lower()
    for k,v in abbrev.items(): t = t.replace(k, v)
    t = re.sub(r"\d+\s*(year|yo|age)", 'adult', t)
    return ' '.join(t.split())

df['symptoms_clean'] = df['symptoms'].apply(clean_text)
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['category'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['category'], random_state=42)
len(train_df), len(val_df), len(test_df)

In [None]:
## 4. Configure QLoRA
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext'

quant_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True
)
lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=['query','key','value','dense'], lora_dropout=0.1, bias='none', task_type='SEQ_CLS')


In [None]:
## 5. Load Model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10, quantization_config=quant_config, device_map='auto')
base_model = prepare_model_for_kbit_training(base_model)
model = get_peft_model(base_model, lora_config)

def count_trainable(m): return sum(p.numel() for p in m.parameters() if p.requires_grad)
print('Trainable params:', count_trainable(model))

In [None]:
## 6. Tokenize Datasets
from datasets import Dataset

def tokenize(ex): return tokenizer(ex['symptoms_clean'], truncation=True, padding='max_length', max_length=256)
train_ds = Dataset.from_pandas(train_df).map(tokenize, batched=True)
val_ds = Dataset.from_pandas(val_df).map(tokenize, batched=True)
test_ds = Dataset.from_pandas(test_df).map(tokenize, batched=True)

for d in (train_ds, val_ds, test_ds): d.set_format('torch', columns=['input_ids','attention_mask','category'])


In [None]:
## 7. Train with QLoRA
from transformers import TrainingArguments, Trainer
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='weighted')
    return {'accuracy':acc,'precision':prec,'recall':rec,'f1':f1}

args = TrainingArguments(
    output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16,
    per_device_eval_batch_size=32, gradient_accumulation_steps=2, learning_rate=2e-4,
    logging_steps=50, evaluation_strategy='steps', eval_steps=200, save_steps=200,
    load_best_model_at_end=True, metric_for_best_model='accuracy', fp16=True
)
trainer = Trainer(model, args, train_ds, val_ds, compute_metrics=compute_metrics)
print('Baseline eval:', trainer.evaluate(test_ds))
trainer.train()

In [None]:
## 8. Evaluate After Fine-Tuning
res = trainer.evaluate(test_ds)
print(res)

preds = trainer.predict(test_ds)
y_true, y_pred = preds.label_ids, np.argmax(preds.predictions, axis=1)
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred, target_names=list(medical_categories.values())))

In [None]:
## 9. Save Adapter and Inference
model.save_pretrained('./adapter')
tokenizer.save_pretrained('./adapter')

from peft import PeftModel
base = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10, quantization_config=quant_config, device_map='auto')
loaded = PeftModel.from_pretrained(base, './adapter')

def classify(text):
    inp = tokenizer(clean_text(text), return_tensors='pt', truncation=True, padding='max_length', max_length=256)
    out = loaded(**inp)
    scores = torch.softmax(out.logits, dim=-1)[0]
    idx = torch.argmax(scores).item()
    return medical_categories[idx], scores[idx].item()

print(classify('severe chest pain radiating to arm cold sweats'))