
This notebook/code has been developed based on the academic publication titled  
**“Fine-tuning a local LLaMA-3 large language model for automated privacy-preserving generation of patient letters in oncology.”**

The implementation replicates the methods and configurations described in the paper using QLoRA fine-tuning for structured clinical data, specifically aiming to generate physician-style medical letters from structured oncology case inputs.

This includes:
- Model quantization with 4-bit QLoRA (NF4)
- LoRA configuration (rank, scaling, dropout) as described in the paper
- Training parameters, optimizer choice, and target modules matching the original methodology

💡 The implementation is intended for **academic and non-commercial use** only, in the context of methodological exploration and reproduction of the paper’s pipeline.

📎 Reference:  
Hou Y, Bert C, Gomaa A, Lahmer G, Höfler D,
Weissmann T, Voigt R, Schubert P,
Schmitter C, Depardon A, Semrau S, Maier A,
Fietkau R, Huang Y and Putz F (2025)
Fine-tuning a local LLaMA-3 large language
model for automated privacy-preserving
physician letter generation in radiation
oncology. Front. Artif. Intell. 7:1493716.
doi: 10.3389/frai.2024.1493716

All original research rights belong to the authors of the referenced paper.

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig,
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
)
from trl import SFTTrainer

In [None]:
# ==========================
# 1. MODEL VE GÖREV AYARLARI
# ==========================
MODEL_NAME = "meta-llama/Llama-3-8b-hf"  # alternatif: "meta-llama/Llama-2-13b-hf"

if "13b" in MODEL_NAME.lower():
    MAX_SEQ_LENGTH = 1500  # Makale: summary task için
    TOTAL_STEPS = 500
    TASK = "patient_case_summarization"
else:
    MAX_SEQ_LENGTH = 2000  # Makale: physician letter generation
    TOTAL_STEPS = 15000
    TASK = "physician_letter_generation"

OUTPUT_DIR = f"./qlora_{MODEL_NAME.split('/')[-1]}_{TASK}"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# ==========================
# 2. QLoRA (4-BIT) QUANTIZATION
# ==========================

# QLoRA = LoRA + GPU Conf.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Makale: QLoRA → 4-bit
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",  # Daha stabil ve yüksek performanslı quantization
)

In [None]:
# 3. MODEL & TOKENIZER

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


In [None]:
# 4. LoRA KONFİGÜRASYONU (Makale ile birebir)

model = prepare_model_for_kbit_training(model)  # LayerNorm freeze, cast output, vb.

target_modules = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj", "lm_head"
]

lora_config = LoraConfig(
    r=32,                      # Makale: LoRA rank
    lora_alpha=64,             # Makale: scaling factor
    target_modules=target_modules,
    lora_dropout=0.05,         # Makale: dropout
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()


In [None]:
# 5. VERİ SETİ YÜKLEME
from datasets import load_dataset

raw_dataset = load_dataset("json", data_files={"train": "your_data.jsonl"})["train"]

def format_to_text(example):
    return {
        "text": example["input"] + "\n\n" + example["output"]
    }

formatted_dataset = raw_dataset.map(format_to_text)

In [None]:
# 6. EĞİTİM PARAMETRELERİ

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=2,       # Makale: 2
    gradient_accumulation_steps=2,       # Makale: 2
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",            # Makale: "8-bit paged AdamW"
    learning_rate=1e-5,                  # Makale: 1e-5
    max_steps=TOTAL_STEPS,               # Makale: 500 veya 15000
    logging_steps=100,
    save_steps=500,
    save_total_limit=3,
    fp16=True,
    report_to="tensorboard",
    ddp_find_unused_parameters=False,
)


In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=formatted_dataset,
    tokenizer=tokenizer,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
)

trainer.train()

In [None]:
trainer.save_model(f"{OUTPUT_DIR}/final_model")
print(f"\n✅ Eğitim tamamlandı. Model burada kaydedildi: {OUTPUT_DIR}/final_model")


In [None]:
# Model fine tune işlemi burada tamamlandı. sırada değerlendirmesi var
model_path = "./qlora_Llama-3-8b-hf_physician_letter_generation/final_model"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to("cuda")

In [None]:
new_input = """Diagnosis: Breast cancer
History: Chemotherapy completed in May
Follow-up: MRI in 3 months"""

In [None]:
# yeni girdinin ön işlemesini yap:
import torch
inputs = tokenizer(new_input, return_tensors="pt").to("cuda")

with torch.no_grad():
    generated = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95,
        do_sample=True
    )

output_text = tokenizer.decode(generated[0], skip_special_tokens=True)
print(output_text)

In [None]:
from evaluate import load

rouge = load("rouge")
scores = rouge.compute(predictions=[output_text], references=["Senin referans doktor mektubun"])
print(scores)
