In [1]:
!pip install -q transformers datasets peft accelerate

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, PeftModel
from sklearn.model_selection import train_test_split
from pathlib import Path
import re, json

In [3]:
DATASET_NAME = "lavita/AlpaCare-MedInstruct-52k"
dataset = load_dataset(DATASET_NAME, split="train")

# Forbidden patterns (dosage, prescription cues)
DOSAGE_PATTERNS = [
    r"\b\d+(\.\d+)?\s*(mg|mcg|g|units|iu|ml)\b",  # numeric amounts with units
    r"\btake\s+\d+", r"\bprescrib(?:e|ed|ing)\b", r"\bdose\b",
    r"\btablet\b"
]

def is_forbidden(text):
    text = text.lower()
    return any(re.search(pat, text) for pat in DOSAGE_PATTERNS)

def normalize_example(item):
    # Dataset fields may vary; prefer 'instruction' or 'prompt', 'response' or 'output'.
    instr = item.get("instruction") or item.get("prompt") or ""
    out = item.get("response") or item.get("output") or ""
    return instr.strip(), out.strip()

kept, removed = [], []
for idx, item in enumerate(dataset):
    instr, out = normalize_example(item)
    if not instr or not out:
        removed.append({"id": idx, "reason": "missing_fields"})
        continue
    if is_forbidden(instr) or is_forbidden(out):
        removed.append({"id": idx, "reason": "forbidden_content"})
        continue
    kept.append({"id": idx, "instruction": instr, "input": "", "output": out})

print(f"Kept examples: {len(kept)}, Removed examples: {len(removed)}")


Kept examples: 47042, Removed examples: 4960


In [4]:
small_kept, _ = train_test_split(kept, test_size=0.90, random_state=42)

train_data, rest = train_test_split(small_kept, test_size=0.10, random_state=42)
val_data, test_data = train_test_split(rest, test_size=0.5, random_state=42)

print(f"Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}")

Train: 4233, Val: 235, Test: 236


In [5]:
out_dir = Path("data")
out_dir.mkdir(exist_ok=True)

for split_name, split_list in [("train", train_data), ("val", val_data), ("test", test_data)]:
    with open(out_dir / f"{split_name}.jsonl", "w", encoding="utf-8") as f:
        for e in split_list:
            f.write(json.dumps(e, ensure_ascii=False) + "\n")
# Record manifest
with open(out_dir / "split_manifest.json", "w") as f:
    json.dump({
        "train_count": len(train_data),
        "val_count": len(val_data),
        "test_count": len(test_data)
    }, f, indent=2)

In [6]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
# Ensure padding token exists (often same as eos for causal LM)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def preprocess_fn(examples):
    # Combine instruction and output into one sequence for causal LM
    inputs = [f"Instruction: {instr}\nAnswer:" for instr in examples["instruction"]]
    outputs = [f" {out}" for out in examples["output"]]  # leading space as continuation
    batch_text = [inp + out for inp, out in zip(inputs, outputs)]
    tokenized = tokenizer(batch_text, padding="max_length", truncation=True, max_length=512)
    # We set labels = input_ids for causal LM (since model learns to predict next token)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Load the splitted JSONL as HuggingFace dataset
datasets = load_dataset("json", data_files={
    "train": str(out_dir/"train.jsonl"),
    "validation": str(out_dir/"val.jsonl")
})
tokenized_datasets = datasets.map(preprocess_fn, batched=True, remove_columns=["instruction","input","output","id"])


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/4233 [00:00<?, ? examples/s]

Map:   0%|          | 0/235 [00:00<?, ? examples/s]

In [7]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show only LoRA params as trainable


trainable params: 1,089,536 || all params: 1,778,177,536 || trainable%: 0.0613


In [8]:
training_args = TrainingArguments(
    output_dir="lora-medical-output",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,
    num_train_epochs=1,
    learning_rate=1e-4,
    fp16=True,                     # mixed precision to fit GPU memory
    logging_steps=1000,
    eval_strategy="epoch",
    save_strategy="no",            # we'll manually save LoRA adapter
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    processing_class=tokenizer
)

trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 151646, 'pad_token_id': 151643}.


Epoch,Training Loss,Validation Loss
1,10.7352,10.468743


TrainOutput(global_step=4233, training_loss=10.746842782434886, metrics={'train_runtime': 1231.9331, 'train_samples_per_second': 3.436, 'train_steps_per_second': 3.436, 'total_flos': 2.008828309929984e+16, 'train_loss': 10.746842782434886, 'epoch': 1.0})

In [9]:

model.save_pretrained("medical_lora_adapter")
tokenizer.save_pretrained("medical_lora_adapter")


('medical_lora_adapter/tokenizer_config.json',
 'medical_lora_adapter/special_tokens_map.json',
 'medical_lora_adapter/chat_template.jinja',
 'medical_lora_adapter/tokenizer.json')

In [10]:
!zip -r medical_lora_adapter.zip medical_lora_adapter


  adding: medical_lora_adapter/ (stored 0%)
  adding: medical_lora_adapter/README.md (deflated 65%)
  adding: medical_lora_adapter/adapter_model.safetensors (deflated 7%)
  adding: medical_lora_adapter/chat_template.jinja (deflated 75%)
  adding: medical_lora_adapter/tokenizer_config.json (deflated 88%)
  adding: medical_lora_adapter/tokenizer.json (deflated 81%)
  adding: medical_lora_adapter/adapter_config.json (deflated 55%)
  adding: medical_lora_adapter/special_tokens_map.json (deflated 73%)


In [11]:
# Get the trained model from the trainer
trained_model = trainer.model
trained_model.save_pretrained("medical_lora_adapter")
tokenizer.save_pretrained("medical_lora_adapter")

('medical_lora_adapter/tokenizer_config.json',
 'medical_lora_adapter/special_tokens_map.json',
 'medical_lora_adapter/chat_template.jinja',
 'medical_lora_adapter/tokenizer.json')

In [12]:
!pip install -U bitsandbytes transformers accelerate peft



In [13]:
from transformers import pipeline

# Load base model
# Removed device_map="auto" to avoid the SafetensorError
base_model = AutoModelForCausalLM.from_pretrained(model_name)
# Load the tokenizer from the base model
lora_tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load LORA adapter
# The first argument is the base model, the second is the path to the adapter
lora_model = PeftModel.from_pretrained(base_model, "medical_lora_adapter")
generator = pipeline("text-generation", model=lora_model, tokenizer=lora_tokenizer)
sample_prompts = [
    "Instruction: How can I lower my blood pressure naturally?\nAnswer:",
    "Instruction: What exercises are safe for arthritis pain?\nAnswer:"
]
for prompt in sample_prompts:
    res = generator(prompt, max_new_tokens=100, do_sample=False)[0]["generated_text"]
    print(res.strip())
    # Append the disclaimer
    print("\n*Disclaimer: This response is for educational purposes only and is not a substitute for professional medical advice. Consult a qualified clinician for personal medical guidance.*\n")

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Instruction: How can I lower my blood pressure naturally?
Answer: To lower your blood pressure naturally, you can try the following steps:

1. Eat a balanced diet rich in fruits, vegetables, whole grains, and lean proteins. Avoid high-sugar, high-fiber foods and excessive salt and sugar intake.

2. Exercise regularly. Aim for at least 150 minutes of moderate exercise per week, including strength training, cardio, and flexibility exercises. This will help improve circulation and reduce blood pressure.

3. Take a warm bath or use warm water to

*Disclaimer: This response is for educational purposes only and is not a substitute for professional medical advice. Consult a qualified clinician for personal medical guidance.*

Instruction: What exercises are safe for arthritis pain?
Answer: To address arthritis pain safely, it is important to choose exercises that are safe and effective. Here are some exercises that are safe for arthritis pain:

1. Stretching exercises: Engage in stretching ex