<a href="https://colab.research.google.com/github/Lior-Baruch/SFT_llama_qlora/blob/main/DPO_llama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install all dependencies
!pip install -q -U peft transformers datasets bitsandbytes trl accelerate wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# all imports
import warnings
warnings.filterwarnings("ignore")
from huggingface_hub import notebook_login
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer


In [3]:
from huggingface_hub import notebook_login

# log in to the Hugging Face hub (required for private datasets/models)
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

LoRA adapter

In [4]:
# Define LoRA ("low-rank attention") config
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

load our saved model from the hub

In [7]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)

model_id = f"LBK95/llama-7b-qlora-ultrachat"

tokenizer = AutoTokenizer.from_pretrained(model_id)

quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model_loaded = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config
)

# model_loaded = AutoPeftModelForCausalLM.from_pretrained(
#     model_id,
#     torch_dtype=torch.float16,
#     load_in_4bit=True
# )


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

test our base model vs fine-tuned model

In [8]:
do_test = True

if do_test:
    model_loaded.eval()
    # test our base model vs fine-tuned model
    text = "### USER: How to plan a picnic?### Assistant:"
    inputs = tokenizer(text, return_tensors="pt").to(0)

    # with adaptor (fine-tuned model)
    model_loaded.enable_adapters()
    outputs = model_loaded.generate(inputs.input_ids, max_new_tokens=250, do_sample=False)

    print("After attaching Lora adapters:")
    print(tokenizer.decode(outputs[0], skip_special_tokens=False))
    print()

    # without adaptor (base model)
    model_loaded.disable_adapters()
    outputs = model_loaded.generate(inputs.input_ids, max_new_tokens=250, do_sample=False)

    print("Before attaching Lora adapters:")
    print(tokenizer.decode(outputs[0], skip_special_tokens=False))

    # enable adapters
    model_loaded.enable_adapters()

After attaching Lora adapters:
<s> ### USER: How to plan a picnic?### Assistant: 1. ### Step 1: Decide on a picnic spot: Choose a picnic spot that's scenic and has a lot of greenery.

2. Plan the menu: Decide on the food you want to eat, and make sure it's easy to transport and store.

3. Pack the food: Pack the food in coolers or baskets, and make sure it's easy to transport.

4. Pack the essentials: Pack the essentials like napkins, plates, utensils, and a cooler with ice.

5. Pack the non-food items: Pack the non-food items like a blanket, a picnic table, and a portable speaker.

6. Plan the transportation: Decide on the transportation method, whether it's a car, a bike, or a public transportation.

7. Pack the entertainment: Pack the entertainment like a portable speaker, a board game, or a book.

8. Pack the sun protection: Pack the sun protection like sunscreen, a hat, and sunglasses.

9

Before attaching Lora adapters:
<s> ### USER: How to plan a picnic?### Assistant: What do yo

# DPO

Load human prefrence dataset

In [9]:
dataset = load_dataset(
    "lvwerra/stack-exchange-paired",
    split="train[:1%]",
    data_dir="data/rl"
)

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

In [10]:
def return_prompt_and_responses(samples):
    return {
        "prompt": [
            "Question: " + question + "\n\nAnswer: "
            for question in samples["question"]
        ],
        "chosen": samples["response_j"],   # rated better than k
        "rejected": samples["response_k"], # rated worse than j
    }

original_columns = dataset.column_names
print(original_columns)

dataset_pref = dataset.map(
    return_prompt_and_responses,
    batched=True,
    remove_columns=original_columns
)
print(dataset_pref.column_names)
print(dataset_pref.shape)

['qid', 'question', 'date', 'metadata', 'response_j', 'response_k']
['prompt', 'chosen', 'rejected']
(74359, 3)


simple EDA

In [11]:
# very simple eda on the dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

perform_EDA = False

if perform_EDA:
    df = pd.DataFrame(dataset_pref[:1000])
    df.head()

    df["prompt_length"] = df["prompt"].apply(lambda x: len(x.split()))
    df["chosen_length"] = df["chosen"].apply(lambda x: len(x.split()))
    df["rejected_length"] = df["rejected"].apply(lambda x: len(x.split()))

    df.describe()

    sns.histplot(df["prompt_length"], bins=20)
    plt.show()

    sns.histplot(df["chosen_length"], bins=20)
    plt.show()

    sns.histplot(df["rejected_length"], bins=20)
    plt.show()

training args

In [12]:
from transformers import TrainingArguments

# Define training arguments
YOUR_HF_USERNAME = "LBK95" # Set my HF username

output_dir = f"{YOUR_HF_USERNAME}/llama-7b-qlora-ultrachat-DPO" # Set output directory for fine-tuned model
per_device_train_batch_size = 4  # Set batch size per device for training
gradient_accumulation_steps = 4  # Set the number of gradient accumulation steps
optim = "paged_adamw_32bit" # Set optimizer
save_steps = 10 # Set the number of steps before saving
logging_steps = 10 # Set the number of steps before logging
learning_rate = 2e-4 # Set learning rate
max_grad_norm = 0.3 # Set max gradient norm
max_steps = 200 # Set max steps
warmup_ratio = 0.03 # Set warmup ratio for learning rate scheduler
lr_scheduler_type = "constant" # Set learning rate scheduler type


# Create TrainingArguments object with all of our defined arguments
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
    push_to_hub=True,
)

load model ref

In [13]:
model_ref = AutoPeftModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config
)

# freeze model ref
for param in model_ref.parameters():
    param.requires_grad = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
dataset_pref_1000 = dataset_pref.select(range(1000))
print(dataset_pref_1000.column_names)
print(dataset_pref_1000.shape)

['prompt', 'chosen', 'rejected']
(1000, 3)


create DPO trainer and train

In [15]:
from trl import DPOTrainer
model_loaded.train()

dpo_trainer = DPOTrainer(
    model_loaded,          # base model from SFT pipeline
    model_ref,             # typically a copy of the SFT trained base model
    beta=0.1,              # temperature hyperparameter of DPO
    train_dataset=dataset_pref_1000, # dataset prepared above
    tokenizer=tokenizer,   # tokenizer
    args=training_arguments,    # training arguments e.g. batch size, lr, etc.
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [16]:
# print("model loaded:")
# print(model_loaded)
# print()
# print("model ref:")
# print(model_ref)

In [18]:
dpo_trainer.train()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
