## imports

In [1]:
from datasets import load_dataset
from trl import DPOConfig, DPOTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, BitsAndBytesConfig
import torch

from peft import LoraConfig

from dataclasses import dataclass, field
from typing import Dict, Optional

import os

from trl import DPOTrainer
from huggingface_hub import login
import argparse

  from .autonotebook import tqdm as notebook_tqdm


## Dataset

* LLM을 통한 Feedback 자동 수집 자료 https://arxiv.org/abs/2310.01377
* Implicit Dataset

In [1]:
from datasets import load_dataset

## 원시 데이터 로드
ds = load_dataset("argilla/ultrafeedback-binarized-preferences-cleaned")

  from .autonotebook import tqdm as notebook_tqdm


In [29]:
ds_split = ds["train"].train_test_split(test_size = 0.5, seed = 42)

## For SFT
sft_ds = ds_split["train"]
sft_ds = sft_ds.rename_column("chosen", "messages").remove_columns([col for col in sft_ds.column_names if col != "chosen"]).train_test_split(test_size = 0.1, seed = 42)
sft_ds["train"].to_json("./data/sft_train_dataset.json", orient = "records")
sft_ds["test"].to_json("./data/sft_test_dataset.json", orient = "records")

## Implicit Prompt -> Explicit Prompt
dpo_ds = ds_split["test"].map(
    lambda sample: {
        "prompt": [{"role": "user", "content": sample["prompt"]}],
        "chosen": [content for content in sample["chosen"] if content["role"] == "assistant"],
        "rejected": [content for content in sample["rejected"] if content["role"] == "assistant"]
    }
)

dpo_ds = dpo_ds.remove_columns([col for col in dpo_ds.column_names if col not in ["prompt", "chosen", "rejected"]]).train_test_split(test_size = 0.1, seed = 42)
dpo_ds["train"].to_json("./data/dpo_train_dataset.json", orient = "records")
dpo_ds["test"].to_json("./data/dpo_test_dataset.json", orient = "records")

Creating json from Arrow format: 100%|██████████| 28/28 [00:01<00:00, 15.83ba/s]
Creating json from Arrow format: 100%|██████████| 4/4 [00:00<00:00, 19.12ba/s]
Creating json from Arrow format: 100%|██████████| 28/28 [00:03<00:00,  8.52ba/s]
Creating json from Arrow format: 100%|██████████| 4/4 [00:00<00:00, 10.96ba/s]


10251164

In [None]:
# Standard format
## Explicit prompt (recommended)
preference_example = {"prompt": "The sky is", "chosen": " blue.", "rejected": " green."}
# Implicit prompt
preference_example = {"chosen": "The sky is blue.", "rejected": "The sky is green."}

# Conversational format
## Explicit prompt (recommended)
preference_example = {"prompt": [{"role": "user", "content": "What color is the sky?"}],
                      "chosen": [{"role": "assistant", "content": "It is blue."}],
                      "rejected": [{"role": "assistant", "content": "It is green."}]}
## Implicit prompt
preference_example = {"chosen": [{"role": "user", "content": "What color is the sky?"},
                                 {"role": "assistant", "content": "It is blue."}],
                      "rejected": [{"role": "user", "content": "What color is the sky?"},
                                   {"role": "assistant", "content": "It is green."}]}

## SFT

## DPO

DPOTrainer 소스 코드 https://github.com/huggingface/trl/blob/d625c5533a6b1c84d3565c8080857f6bb81c538a/trl/trainer/dpo_trainer.py#L1145-L1149

* SFT보다 learning_rate를 훨씬 작게 설정해줘야 함
* packing은 쌍으로 존재하는 데이터에서 불가능

In [2]:
train_ds = load_dataset("json", data_files = os.path.join("./data", "dpo_train_dataset.json"), split = "train")
test_ds = load_dataset("json", data_files = os.path.join("./data", "dpo_test_dataset.json"), split = "train")

In [7]:
train_ds[0]

{'chosen': [{'content': 'how can i create a field in django model that can have many to many relationship with any other models i have in my app',
   'role': 'user'},
  {'content': "To create a ManyToManyField in Django, you need to define a ManyToManyField on both models involved in the relationship.\n\nAssuming you already have the models for which you want to create a many-to-many relationship, you can add the ManyToManyField as follows:\n\n1. In the model where you want to define the many-to-many field, add the following line of code:\n```python\nclass MyModel(models.Model):\n    # fields and other descriptors for MyModel\n    related_models = models.ManyToManyField('OtherModel', related_name='my_model_set')\n```\nReplace 'OtherModel' with the name of the model you want to create a many-to-many relationship with, and 'my\\_model\\_set' with the name you want to use for the reverse relationship on the other model.\n\n2. In the model where you want to define the reverse many-to-many 

In [3]:
bnb_config = BitsAndBytesConfig(
        load_in_4bit = True,                    ## 4비트 양자화
        bnb_4bit_use_double_quant = True,       ## 추가 양자화로 성능 손실 없이 파라미터당 0.4bit 추가 절약
        bnb_4bit_quant_type = "nf4",            ## 양자화 데이터 타입 지정: 4비트 기반 모델 훈련 시 사용
        bnb_4bit_compute_dtype = torch.bfloat16 ## Llama-3.1-8B의 학습 자료형. 저장은 4비트지만, attention 연산은 해당 포맷으로 역양자화하여 처리
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    device_map = "cuda:0",
    use_cache = False,
    low_cpu_mem_usage = True,
    attn_implementation = "flash_attention_2",
    trust_remote_code = True,
    quantization_config = bnb_config,
    dtype = torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    use_fast = True,
    trust_remote_code = True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"


LLAMA_3_CHAT_TEMPLATE = (
    "{{ bos_token }}"
    "{% for message in messages %}"
        "{% if message['role'] == 'system' %}"
            "{{ '<|start_header_id|>system<|end_header_id|>\n\n' + message['content'] + eos_token }}"
        "{% elif message['role'] == 'user' %}"
            "{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] +  eos_token }}"
        "{% elif message['role'] == 'assistant' %}"
            "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n'}}"
            "{{ message['content'] +  eos_token }}"
        "{% endif %}"
    "{% endfor %}"
    "{%- if add_generation_prompt %}"
    "{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
    "{%- endif %}"
)

tokenizer.chat_template = LLAMA_3_CHAT_TEMPLATE

Loading checkpoint shards: 100%|██████████| 4/4 [00:16<00:00,  4.16s/it]


In [32]:
training_args = DPOConfig(
    output_dir = "./results/dpo-test",
    num_train_epochs = 4,
    learning_rate = 5e-7,
    lr_scheduler_type = "cosine_with_restarts",
    lr_scheduler_kwargs = {"num_cycles": 3},
    warmup_ratio = 0.06,
    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    gradient_accumulation_steps = 4,
    gradient_checkpointing = True,
    optim = "adamw_torch_fused",    ## paged_adamw_32bit를 쓰면 CPU 메모리를 더 많이 쓰는 대신 병목 발생 가능
    weight_decay = 0.01,
    bf16 = True,
    tf32 = True,
    max_length = 1024,
    max_prompt_length = 1024,
    logging_steps = 100,
    eval_strategy = "steps",
    eval_steps = 500,
    save_strategy = "epoch",
    report_to = "wandb",
    remove_unused_columns = False,
    run_name = "dpo_llama3",
    beta = 0.1  ## DPO Loss의 온도, 작을수록 reference model을 무시
)

peft_config = LoraConfig(
    r = 32,
    lora_alpha = 16,
    lora_dropout = 0.05,
    target_modules = ['embed_tokens', 'q_proj', 'k_proj', 'v_proj', 'gate_proj', 'down_proj', 'up_proj', 'lm_head'],
    bias = "none",
    task_type = "CAUSAL_LM"
)

In [33]:
dpo_trainer = DPOTrainer(
    model,
    ref_model = None,   # ref 모델을 None으로 놓게 되면 SFT + adapter가 붙은 모델에서 adapter를 떼고, policy에 따른 최적화를 진행하게 됩니다. 두개의 모델을 로드할 필요가 없어 메모리 이득을 꾀할 수 있습니다.
    args = training_args,
    train_dataset= train_ds,
    eval_dataset = test_ds,
    processing_class = tokenizer,
    peft_config = peft_config
)



In [14]:
dpo_trainer.train_dataset

Dataset({
    features: ['prompt', 'prompt_input_ids', 'chosen_input_ids', 'rejected_input_ids'],
    num_rows: 27413
})

In [13]:
print(tokenizer.decode(dpo_trainer.train_dataset[0]["chosen_input_ids"]))

To create a ManyToManyField in Django, you need to define a ManyToManyField on both models involved in the relationship.

Assuming you already have the models for which you want to create a many-to-many relationship, you can add the ManyToManyField as follows:

1. In the model where you want to define the many-to-many field, add the following line of code:
```python
class MyModel(models.Model):
    # fields and other descriptors for MyModel
    related_models = models.ManyToManyField('OtherModel', related_name='my_model_set')
```
Replace 'OtherModel' with the name of the model you want to create a many-to-many relationship with, and'my\_model\_set' with the name you want to use for the reverse relationship on the other model.

2. In the model where you want to define the reverse many-to-many field, add the following line of code:
```python
class OtherModel(models.Model):
    # fields and other descriptors for OtherModel
    my_model = models.ManyToManyField('MyModel', related_name='oth

In [34]:
dpo_trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mhollyriver[0m ([33mhollyriver-jbnu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/
Casting fp32 inputs back to torch.bfloat16 for flash-attn compatibility.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7f54cc1b4e30>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f54de273350, execution_count=34 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7f54de273230, raw_cell="dpo_trainer.train()" transformed_cell="dpo_trainer.train()
" store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B113.198.65.241/root/HFRL/SFT_DPO/%EC%A0%95%EB%A6%AC.ipynb#X43sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


ConnectionResetError: Connection lost