<a href="https://colab.research.google.com/github/Sapphirine/202512-20-Medical_Visual_QA_Agents/blob/main/dpo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **A Simple Notebook Example to Train TinyLlama on UltraMedical Preference Dataset using DPO **

In [None]:
import torch, platform, sys, os, textwrap

print("Python:", sys.version)
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
PyTorch: 2.9.0+cu126
CUDA available: True
GPU: NVIDIA A100-SXM4-40GB


In [None]:
!pip install -q "transformers>=4.43.0" "datasets>=2.20.0" "accelerate>=0.31.0" "trl>=0.9.4" peft



[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/517.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.2/517.2 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
PREF_DATASET = "TsinghuaC3I/UltraMedical-Preference"

OUTPUT_DIR = "./tinyllama-ultramed-dpo"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Base model:", BASE_MODEL)
print("Preference dataset:", PREF_DATASET)
print("Output dir:", OUTPUT_DIR)



Base model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
Preference dataset: TsinghuaC3I/UltraMedical-Preference
Output dir: ./tinyllama-ultramed-dpo


In [None]:
from datasets import load_dataset

ds = load_dataset(PREF_DATASET, split="train")
# print(ds)
sample = ds[0]
sample


{'prompt_id': 'WikiInstruct,8304',
 'label_type': 'length',
 'prompt': 'Investigate the intricacies of immunometabolism, a distinct subfield of immunology that examines the interconnection between cellular metabolic processes and the functional attributes of immune cells. Clarify the mechanisms by which this symbiosis modulates the comprehensive immune response, and analyze its integration within the broader spectrum of immunological studies, with an emphasis on the implications for metabolic diseases and the consequential effects on the proficiency of the immune system.',
 'chosen': [{'content': 'Investigate the intricacies of immunometabolism, a distinct subfield of immunology that examines the interconnection between cellular metabolic processes and the functional attributes of immune cells. Clarify the mechanisms by which this symbiosis modulates the comprehensive immune response, and analyze its integration within the broader spectrum of immunological studies, with an emphasis on 

In [None]:
def extract_prompt_chosen_rejected(ex):
    prompt = ex["prompt"]

    def get_last_assistant(turns):
        assistants = [t["content"] for t in turns if t["role"] == "assistant"]
        return assistants[-1] if len(assistants) > 0 else ""

    chosen_answer = get_last_assistant(ex["chosen"])
    rejected_answer = get_last_assistant(ex["rejected"])

    return {
        "prompt": prompt,
        "chosen": chosen_answer,
        "rejected": rejected_answer,
    }

processed_ds = ds.map(extract_prompt_chosen_rejected)
processed_ds = processed_ds.remove_columns(
    [col for col in processed_ds.column_names if col not in ["prompt", "chosen", "rejected"]]
)
processed_ds[0]


Map:   0%|          | 0/109353 [00:00<?, ? examples/s]

{'prompt': 'Investigate the intricacies of immunometabolism, a distinct subfield of immunology that examines the interconnection between cellular metabolic processes and the functional attributes of immune cells. Clarify the mechanisms by which this symbiosis modulates the comprehensive immune response, and analyze its integration within the broader spectrum of immunological studies, with an emphasis on the implications for metabolic diseases and the consequential effects on the proficiency of the immune system.',
 'chosen': ' Immunometabolism is an emerging field of study that explores the intersection between cellular metabolic processes and the functional attributes of immune cells. This subfield has gained significant attention due to the increasing recognition that immune cell functions are heavily influenced by their metabolic programs. The metabolic pathways within immune cells are not merely passive support systems but instead actively modulate and dictate the immune responses.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.bfloat16,
)
model.config.pad_token_id = tokenizer.pad_token_id

print("Model dtype:", next(model.parameters()).dtype)



tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model dtype: torch.bfloat16


In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineGrained).
The token `new2222` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-auth

In [None]:
from trl import DPOTrainer, DPOConfig

OUTPUT_DIR = "./tinyllama-ultramed-dpo-a100"

training_args = DPOConfig(
    output_dir=OUTPUT_DIR,

    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    learning_rate=1e-6,
    num_train_epochs=1,

    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=3,
    report_to="none",

    bf16=True,
    fp16=False,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},

    beta=0.1,
    max_length=512,
    max_prompt_length=512,

    remove_unused_columns=False,
    seed=42,
)

dpo_trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=training_args,
    train_dataset=processed_ds,
    processing_class=tokenizer,
)





Extracting prompt in train dataset:   0%|          | 0/109353 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/109353 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/109353 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2130 > 2048). Running this sequence through the model will result in indexing errors


Here is just an example; we show actual training result in the report.

In [None]:
dpo_trainer.train()



Step,Training Loss
50,0.6921
100,0.6842
150,0.6786
200,0.6735
250,0.6663
300,0.6644
350,0.665
400,0.6558
450,0.6537
500,0.6555


KeyboardInterrupt: 

In [None]:
dpo_trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("DPO-tuned model saved to:", OUTPUT_DIR)


DPO-tuned model saved to: ./tinyllama-ultramed-dpo-a100


In [None]:
from transformers import pipeline

base_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
)
base_pipe = pipeline("text-generation", model=base_model, tokenizer=base_tokenizer)

dpo_tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
dpo_model = AutoModelForCausalLM.from_pretrained(
    OUTPUT_DIR,
    torch_dtype=torch.float16,
    device_map="auto",
)
dpo_pipe = pipeline("text-generation", model=dpo_model, tokenizer=dpo_tokenizer)

example = processed_ds[0]
question = example["prompt"]

prompt = (
    "You are a helpful and precise medical assistant.\n\n"
    f"Question: {question}\n\nAnswer:"
)

print("=== Base TinyLlama ===")
out_base = base_pipe(prompt, max_new_tokens=256, do_sample=False)
print(out_base[0]["generated_text"])

print("\n=== DPO-tuned TinyLlama ===")
out_dpo = dpo_pipe(prompt, max_new_tokens=256, do_sample=False)
print(out_dpo[0]["generated_text"])


Device set to use cuda:0
Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


=== Base TinyLlama ===
You are a helpful and precise medical assistant.

Question: Investigate the intricacies of immunometabolism, a distinct subfield of immunology that examines the interconnection between cellular metabolic processes and the functional attributes of immune cells. Clarify the mechanisms by which this symbiosis modulates the comprehensive immune response, and analyze its integration within the broader spectrum of immunological studies, with an emphasis on the implications for metabolic diseases and the consequential effects on the proficiency of the immune system.

Answer: Immunometabolism is a fascinating field of research that aims to understand the complex interplay between cellular metabolic processes and the immune system. This symbiosis is crucial for the efficient functioning of the immune system, as it enables the immune cells to recognize and respond to pathogens, while also maintaining homeostasis and preventing autoimmune diseases.

The mechanisms by which 