<a href="https://colab.research.google.com/github/RicoStaedeli/NLP2025_CQG/blob/main/Training/3_Training_4_ORPO_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune Llama 3 with ORPO


In [1]:
!pip install -qqq -U transformers datasets accelerate peft trl bitsandbytes wandb --progress-bar off

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2025.3.0 which is incompatible.[0m[31m
[0m

In [2]:
import gc
import os
import torch
import wandb
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format
import logging

### Colab
This part is only relevant when using the notebook in google colab

In [3]:
from google.colab import userdata, drive

In [5]:
drive.mount('/content/drive')
token = userdata.get('GITHUB')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
repo_url = f"https://{token}@github.com/RicoStaedeli/NLP2025_CQG.git"

!git clone {repo_url}

Cloning into 'NLP2025_CQG'...
remote: Enumerating objects: 1772, done.[K
remote: Counting objects: 100% (242/242), done.[K
remote: Compressing objects: 100% (90/90), done.[K
remote: Total 1772 (delta 204), reused 162 (delta 152), pack-reused 1530 (from 1)[K
Receiving objects: 100% (1772/1772), 50.99 MiB | 26.91 MiB/s, done.
Resolving deltas: 100% (1058/1058), done.


In [7]:
################################################################################
#######################   STATIC VARIABLES      ################################
################################################################################

TRAINING_NUMBER = 5
BASE_MODEL_REPO = "meta-llama/Llama-3.1-8B-Instruct"
MODEL_NAME = "Meta-Llama-3.1-8B-Instruct_ORPO_1"

################################################################################
#######################   PATH VARIABLES        ################################
################################################################################

train_dataset_path = "/content/NLP2025_CQG/Data/Processed/CQ DPO Dataset.json"

log_base_path = f"/content/NLP2025_CQG/Training/Logs/Traing_{TRAINING_NUMBER}/Tensorboard/"
os.makedirs(log_base_path, exist_ok=True)

log_file_path = f"/content/NLP2025_CQG/Logs/training_{TRAINING_NUMBER}.log"

model_save_path = f"/content/drive/MyDrive/HSG/NLP/Project NLP/Training/Training_{TRAINING_NUMBER}/Model/{MODEL_NAME}_finetuned/"
os.makedirs(model_save_path, exist_ok=True)

model_lora_adapter_save_path = f"/content/drive/MyDrive/HSG/NLP/Project NLP/Training/Training_{TRAINING_NUMBER}/Model/{MODEL_NAME}_lora_adapters/"
os.makedirs(model_lora_adapter_save_path, exist_ok=True)


checkpoint_dir = f"/content/drive/MyDrive/HSG/NLP/Project NLP/Training/Training_{TRAINING_NUMBER}/Checkpoints/"
os.makedirs(checkpoint_dir, exist_ok=True)


################################################################################
#######################   LOGGER                ################################
################################################################################

# Setup logger manually
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Create file handler (only if not already added)
if not logger.handlers:
    fh = logging.FileHandler(log_file_path)
    fh.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

# Detect device
device = torch.device(
    "mps" if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available()
    else "cpu"
)

In [8]:
logger.info("--------  Start with ORPO Training  -------------")
logger.info(f'Device selected: {device}')
logger.info(f'Model: {MODEL_NAME}')
logger.info(f'Training number: {TRAINING_NUMBER}')

INFO:__main__:--------  Start with ORPO Training  -------------
INFO:__main__:Device selected: cuda
INFO:__main__:Model: Meta-Llama-3.1-8B-Instruct_ORPO_1
INFO:__main__:Training number: 5


In [9]:
# Defined in the secrets tab in Google Colab
wb_token = userdata.get('wandb')
wandb.login(key=wb_token)
torch_dtype = torch.bfloat16

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mricostaedeli[0m ([33mricostaedeli-hsg[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_REPO)

# Reset chat_template if already set
if tokenizer.chat_template is not None:
    tokenizer.chat_template = None

# Load model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_REPO,
    quantization_config=bnb_config,
    device_map="auto"
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [11]:
dataset = load_dataset('json', data_files=train_dataset_path)
dataset = dataset.filter(lambda x: x['score_chosen'] - x['score_rejected'] > 4)

Generating train split: 0 examples [00:00, ? examples/s]

Filter:   0%|          | 0/3243 [00:00<?, ? examples/s]

In [12]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'chosen', 'rejected', 'score_chosen', 'score_rejected', 'schema', 'context'],
        num_rows: 1572
    })
})


In [13]:
def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)

Map (num_proc=12):   0%|          | 0/1572 [00:00<?, ? examples/s]

In [14]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'chosen', 'rejected', 'score_chosen', 'score_rejected', 'schema', 'context'],
        num_rows: 1572
    })
})


In [15]:
columns_to_remove = ['id', 'score_chosen', 'score_rejected', 'schema', 'context']

dataset = dataset['train'].remove_columns(columns_to_remove)

In [16]:
dataset = dataset.train_test_split(test_size=0.05)

In [17]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 1493
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 79
    })
})


In [18]:
print(dataset['train'][0])

{'prompt': [{'content': 'Generate one critical question addressing the provided context. Ensure it matches the schema: FearAppeal\n\nContext: reasons_evidence: Barring the massive amount of misinformation and ignorance involving the nature of capitalism that is typically shown by less educated people, I cannot understand why the MAJORITY of our nation is not pushing very hard for a completely libertarian structure. I believe that any attack on libertarianism must come from either ignorance, or an entitlement syndrome.', 'role': 'user'}], 'chosen': '<|im_start|>assistant\nHave you ever stopped to think that other might see THEIR ideologies that way and YOURS as being argued from ignorance and entitlement?<|im_end|>\n', 'rejected': '<|im_start|>assistant\nIs libertarianism the best economic structure for ensuring equality and prosperity for all individuals, regardless of their education level or societal status?<|im_end|>\n'}


In [19]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    beta=0.1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    remove_unused_columns=False,
    optim="paged_adamw_8bit",
    max_steps = 40,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="wandb",
    output_dir="./results/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    processing_class=tokenizer,
)
trainer.train()
trainer.save_model(MODEL_NAME)

Map:   0%|          | 0/1493 [00:00<?, ? examples/s]

Map:   0%|          | 0/1493 [00:00<?, ? examples/s]

Map:   0%|          | 0/1493 [00:00<?, ? examples/s]

Map:   0%|          | 0/79 [00:00<?, ? examples/s]

Map:   0%|          | 0/79 [00:00<?, ? examples/s]

Map:   0%|          | 0/79 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
8,20.5258,5.322973,10.2534,7.705,3.901,-0.524357,-0.440286,0.15,-0.08407,-4.402865,-5.243568,-1.423188,-1.335748,5.201055,-1.301799,-0.848231
16,21.0838,5.089161,10.2856,7.681,3.889,-0.507186,-0.426486,0.1375,-0.0807,-4.264859,-5.071862,-1.398532,-1.319001,4.970071,-1.275979,-0.81531
24,21.8929,4.971242,10.2529,7.705,3.901,-0.495899,-0.419332,0.1375,-0.076568,-4.193315,-4.958993,-1.372806,-1.292831,4.855346,-1.245528,-0.774235
32,18.7353,4.897875,10.2447,7.711,3.904,-0.488699,-0.415321,0.1375,-0.073378,-4.153211,-4.886992,-1.350347,-1.265286,4.784364,-1.222003,-0.742411
40,19.0739,4.870995,10.254,7.704,3.901,-0.486059,-0.413924,0.1375,-0.072135,-4.139244,-4.860593,-1.34186,-1.253277,4.758403,-1.212928,-0.729991




In [20]:
# Flush memory
#del trainer, model
gc.collect()
gc.collect()
torch.cuda.empty_cache()

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_REPO)
# Reset chat_template if already set
if tokenizer.chat_template is not None:
    tokenizer.chat_template = None

fp16_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_REPO,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
fp16_model, tokenizer = setup_chat_format(fp16_model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(fp16_model, MODEL_NAME)
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [21]:
model.push_to_hub(MODEL_NAME, use_temp_dir=False)
tokenizer.push_to_hub(MODEL_NAME, use_temp_dir=False)

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ricostaedeli/Meta-Llama-3.1-8B-Instruct_ORPO_1/commit/37165bcd83bf17248e49f6635984fe475899d5a6', commit_message='Upload tokenizer', commit_description='', oid='37165bcd83bf17248e49f6635984fe475899d5a6', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ricostaedeli/Meta-Llama-3.1-8B-Instruct_ORPO_1', endpoint='https://huggingface.co', repo_type='model', repo_id='ricostaedeli/Meta-Llama-3.1-8B-Instruct_ORPO_1'), pr_revision=None, pr_num=None)

In [None]:
os.chdir("NLP2025_CQG")
!ls

In [None]:
!git config --global user.name "Rico Städeli"
!git config --global user.email "rico@yabriga.ch"


commit_message = f"Training Number: {TRAINING_NUMBER}, Training logs in Google Drive"
!git add .
!git commit -m "{commit_message}"
!git push