<a href="https://colab.research.google.com/github/SepKeyPro/genAI/blob/main/llama3_orpo_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Fine-tuning Llama3-8B using ORPO technique**

In [21]:
pip install -U transformers datasets accelerate peft trl bitsandbytes wandb

In [18]:
import torch
import wandb
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    pipeline,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format
from huggingface_hub import login

In [21]:
login(token="Your Hugging Face access token")
wandb.login(key="Your wandb access token")

In [21]:
base_model = "meta-llama/Meta-Llama-3-8B"
new_model = "Orpollama3"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config = bnb_config,
    device_map = "auto"
)

# model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

In [21]:
dataset = "mlabonne/orpo-dpo-mix-40k"
dataset = load_dataset("mlabonne/orpo-dpo-mix-40k",split="all")
dataset = dataset.shuffle(seed=42).select(range(100))

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(format_chat_template)
dataset = dataset.train_test_split(test_size=0.01)
train_dataset = dataset['train']
eval_dataset = dataset['test']

In [21]:
orpo_args = ORPOConfig(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=8e-6,
    beta=0.1,
    max_length=1024,
    max_prompt_length=256,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    report_to="wandb",
    output_dir="./results/",
)

orpo_trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
)

orpo_trainer.train()
orpo_trainer.model.save_pretrained("fine_tuned_orpo_model")
tokenizer.save_pretrained("fine_tuned_orpo_tokenizer")

In [21]:
original_model = AutoModelForCausalLM.from_pretrained(
    base_model, # "meta-llama/Meta-Llama-3-8B"
    return_dict=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Merge original model with the peft adapter
model = PeftModel.from_pretrained(original_model, "fine_tuned_orpo_model")
model = model.merge_and_unload()

In [21]:
# Format prompt
chat = [
    {"role": "system", "content": "You are a helpful assistant chatbot."},
    {"role": "user", "content": "What is the biggest city in the world?"}
]

prompt = tokenizer.apply_chat_template(chat, add_generation_prompt=True, tokenize=False)

# Create pipeline
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

# Generate text
sequences = pipeline(
    prompt,
    do_sample=True,
    truncation=True,
    temperature=0.7,
    top_p=0.9,
    max_length=200,
)
print(sequences[0]['generated_text'])