## imports

In [None]:
from datasets import load_dataset
from trl import DPOConfig, DPOTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, TrainingArguments, BitsAndBytesConfig
import torch

from peft import LoraConfig

from dataclasses import dataclass, field
from typing import Dict, Optional

import os

from trl import DPOTrainer
from huggingface_hub import login
import argparse

  from .autonotebook import tqdm as notebook_tqdm


## Dataset

* LLM을 통한 Feedback 자동 수집 자료 https://arxiv.org/abs/2310.01377
* Implicit Dataset

In [1]:
from datasets import load_dataset

## 원시 데이터 로드
ds = load_dataset("argilla/ultrafeedback-binarized-preferences-cleaned")

  from .autonotebook import tqdm as notebook_tqdm


In [29]:
ds_split = ds["train"].train_test_split(test_size = 0.5, seed = 42)

## For SFT
sft_ds = ds_split["train"]
sft_ds = sft_ds.rename_column("chosen", "messages").remove_columns([col for col in sft_ds.column_names if col != "chosen"]).train_test_split(test_size = 0.1, seed = 42)
sft_ds["train"].to_json("./data/sft_train_dataset.json", orient = "records")
sft_ds["test"].to_json("./data/sft_test_dataset.json", orient = "records")

## Implicit Prompt -> Explicit Prompt
dpo_ds = ds_split["test"].map(
    lambda sample: {
        "prompt": [{"role": "user", "content": sample["prompt"]}],
        "chosen": [content for content in sample["chosen"] if content["role"] == "assistant"],
        "rejected": [content for content in sample["rejected"] if content["role"] == "assistant"]
    }
)

dpo_ds = dpo_ds.remove_columns([col for col in dpo_ds.column_names if col not in ["prompt", "chosen", "rejected"]]).train_test_split(test_size = 0.1, seed = 42)
dpo_ds["train"].to_json("./data/dpo_train_dataset.json", orient = "records")
dpo_ds["test"].to_json("./data/dpo_test_dataset.json", orient = "records")

Creating json from Arrow format: 100%|██████████| 28/28 [00:01<00:00, 15.83ba/s]
Creating json from Arrow format: 100%|██████████| 4/4 [00:00<00:00, 19.12ba/s]
Creating json from Arrow format: 100%|██████████| 28/28 [00:03<00:00,  8.52ba/s]
Creating json from Arrow format: 100%|██████████| 4/4 [00:00<00:00, 10.96ba/s]


10251164

In [None]:
# Standard format
## Explicit prompt (recommended)
preference_example = {"prompt": "The sky is", "chosen": " blue.", "rejected": " green."}
# Implicit prompt
preference_example = {"chosen": "The sky is blue.", "rejected": "The sky is green."}

# Conversational format
## Explicit prompt (recommended)
preference_example = {"prompt": [{"role": "user", "content": "What color is the sky?"}],
                      "chosen": [{"role": "assistant", "content": "It is blue."}],
                      "rejected": [{"role": "assistant", "content": "It is green."}]}
## Implicit prompt
preference_example = {"chosen": [{"role": "user", "content": "What color is the sky?"},
                                 {"role": "assistant", "content": "It is blue."}],
                      "rejected": [{"role": "user", "content": "What color is the sky?"},
                                   {"role": "assistant", "content": "It is green."}]}

## SFT

* 일단 기존 방식대로 수행

## DPO

DPOTrainer 소스 코드 https://github.com/huggingface/trl/blob/d625c5533a6b1c84d3565c8080857f6bb81c538a/trl/trainer/dpo_trainer.py#L1145-L1149

* SFT보다 learning_rate를 훨씬 작게 설정해줘야 함
* packing은 쌍으로 존재하는 데이터에서 불가능. padding_free는 가능

In [13]:
print(tokenizer.decode(dpo_trainer.train_dataset[0]["chosen_input_ids"]))

To create a ManyToManyField in Django, you need to define a ManyToManyField on both models involved in the relationship.

Assuming you already have the models for which you want to create a many-to-many relationship, you can add the ManyToManyField as follows:

1. In the model where you want to define the many-to-many field, add the following line of code:
```python
class MyModel(models.Model):
    # fields and other descriptors for MyModel
    related_models = models.ManyToManyField('OtherModel', related_name='my_model_set')
```
Replace 'OtherModel' with the name of the model you want to create a many-to-many relationship with, and'my\_model\_set' with the name you want to use for the reverse relationship on the other model.

2. In the model where you want to define the reverse many-to-many field, add the following line of code:
```python
class OtherModel(models.Model):
    # fields and other descriptors for OtherModel
    my_model = models.ManyToManyField('MyModel', related_name='oth

## 모델 테스트

In [1]:
import os
import torch
from random import randint
from datasets import load_dataset
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


`-` 테스트에 사용될 데이터

In [3]:
test_dataset = load_dataset("json", data_files = os.path.join("", "data/dpo_test_dataset.json"), split = "train")
random_idx = randint(0, len(test_dataset))

In [11]:
for k, v in test_dataset[random_idx].items():
    print(f"{k}: {v[0]["content"]}\n\n")

prompt: In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.

Example Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]
Example Output: [-71, -72, -81, -54]

Example Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]
Example Output: [57, 86, 28]

Example Input: [19.171, -24.194, -31, 93, 22.236, 13]
Example Output:


chosen: [-31, 93, 13]


rejected: []




In [14]:
messages = test_dataset[random_idx]["prompt"]
chosen = test_dataset[random_idx]["chosen"]

`-` 원시 모델 결과

In [39]:
origin_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"

origin_model = AutoModelForCausalLM.from_pretrained(origin_model_name, use_cache = False, device_map = "cuda:0", dtype = torch.bfloat16)
origin_tokenizer = AutoTokenizer.from_pretrained(origin_model_name, use_fast = True)
origin_tokenizer.pad_token = origin_tokenizer.eos_token
origin_tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.31s/it]


In [40]:
terminators = [origin_tokenizer.eos_token_id]

input_ids = origin_tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,   ## 생성 시에 맨 뒤 generation 시작하라는 프롬프트 삽입
    return_tensors = "pt").to(origin_model.device)

outputs = origin_model.generate(
    input_ids,
    max_new_tokens = 512,
    eos_token_id = terminators,
    do_sample = True,
    temperature = 0.7,
    top_p = 0.95
)

response = outputs[0][input_ids.shape[-1]:]
print(f"prompt:\n{messages[0]["content"]}\n")
print(f"chosen:\n{chosen[0]["content"]}\n")
print(f"generate:\n{origin_tokenizer.decode(response, skip_special_tokens = True)}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prompt:
In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.

Example Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]
Example Output: [-71, -72, -81, -54]

Example Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]
Example Output: [57, 86, 28]

Example Input: [19.171, -24.194, -31, 93, 22.236, 13]
Example Output:

chosen:
[-31, 93, 13]

generate:
### Filtering Whole Numbers from a List

Here's a Python function that filters out non-integer numbers from a given list:

```python
def filter_whole_numbers(numbers):
    """
    Returns a list of whole numbers from the input list.
    If no whole numbers are found, an empty list is returned.
    
    Args:
        numbers (list): A list of numbers.
    
    Returns:
        list:

In [41]:
del origin_model
del origin_tokenizer
torch.cuda.empty_cache()

`-` 파인튜닝 후 모델

In [42]:
SFT_model_name = "./results/test"

SFT_model = AutoModelForCausalLM.from_pretrained(SFT_model_name, use_cache = False, device_map = "cuda:0", dtype = torch.bfloat16)
SFT_tokenizer = AutoTokenizer.from_pretrained(SFT_model_name, use_fast = True)
SFT_tokenizer.pad_token = SFT_tokenizer.eos_token
SFT_tokenizer.pad_token_id = SFT_tokenizer.eos_token_id
SFT_tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


In [43]:
terminators = [SFT_tokenizer.eos_token_id]

input_ids = SFT_tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,   ## 생성 시에 맨 뒤 generation 시작하라는 프롬프트 삽입
    return_tensors = "pt").to(SFT_model.device)

outputs = SFT_model.generate(
    input_ids,
    max_new_tokens = 512,       ## prompt 제외 출력 토큰 수. max_length는 prompt 포함
    eos_token_id = terminators,
    do_sample = True,
    temperature = 0.7,
    top_p = 0.95
)

response = outputs[0][input_ids.shape[-1]:]
print(f"prompt:\n{messages[0]["content"]}\n")
print(f"chosen:\n{chosen[0]["content"]}\n")
print(f"generate:\n{SFT_tokenizer.decode(response, skip_special_tokens = True)}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prompt:
In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.

Example Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]
Example Output: [-71, -72, -81, -54]

Example Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]
Example Output: [57, 86, 28]

Example Input: [19.171, -24.194, -31, 93, 22.236, 13]
Example Output:

chosen:
[-31, 93, 13]

generate:
[93, 13]


In [None]:
outputs = SFT_model.generate(
    input_ids,
    max_new_tokens = 512,       ## prompt 제외 출력 토큰 수. max_length는 prompt 포함
    eos_token_id = terminators,
    do_sample = True,
    temperature = 0.7,
    top_p = 0.95
)

response = outputs[0][input_ids.shape[-1]:]
print(f"prompt:\n{messages[0]["content"]}\n")
print(f"chosen:\n{chosen[0]["content"]}\n")
print(f"generate:\n{SFT_tokenizer.decode(response, skip_special_tokens = True)}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prompt:
In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.

Example Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]
Example Output: [-71, -72, -81, -54]

Example Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]
Example Output: [57, 86, 28]

Example Input: [19.171, -24.194, -31, 93, 22.236, 13]
Example Output:

chosen:
[-31, 93, 13]

generate:
[-31, 93, 13]


In [45]:
del SFT_model
del SFT_tokenizer
torch.cuda.empty_cache()

`-` DPO

In [None]:
dpo_model_name = "./results/dpo-vanilla"

dpo_model = AutoModelForCausalLM.from_pretrained(dpo_model_name, use_cache = False, device_map = "cuda:0", dtype = torch.bfloat16)
dpo_tokenizer = AutoTokenizer.from_pretrained(dpo_model_name, use_fast = True)
dpo_tokenizer.pad_token = dpo_tokenizer.eos_token
dpo_tokenizer.pad_token_id = dpo_tokenizer.eos_token_id
dpo_tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.13s/it]


In [None]:
terminators = [dpo_tokenizer.eos_token_id]

input_ids = dpo_tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,   ## 생성 시에 맨 뒤 generation 시작하라는 프롬프트 삽입
    return_tensors = "pt").to(dpo_model.device)

outputs = dpo_model.generate(
    input_ids,
    max_new_tokens = 512,       ## prompt 제외 출력 토큰 수. max_length는 prompt 포함
    eos_token_id = terminators,
    do_sample = True,
    temperature = 0.7,
    top_p = 0.95
)

response = outputs[0][input_ids.shape[-1]:]
print(f"prompt:\n{messages[0]["content"]}\n")
print(f"chosen:\n{chosen[0]["content"]}\n")
print(f"생성답변:\n{dpo_tokenizer.decode(response, skip_special_tokens = True)}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prompt:
[{'content': 'In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.\n\nExample Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]\nExample Output: [-71, -72, -81, -54]\n\nExample Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]\nExample Output: [57, 86, 28]\n\nExample Input: [19.171, -24.194, -31, 93, 22.236, 13]\nExample Output:', 'role': 'user'}]

chosen:
[{'content': '[-31, 93, 13]', 'role': 'assistant'}]

생성답변:
Here is a Python solution for this task:

```python
def filter_integers(numbers):
    """
    This function filters out non-integer numbers from a list.
    
    Args:
    numbers (list): A list of numbers.
    
    Returns:
    list: A list of integers separated by comma inside brackets if every number in 

In [35]:
del dpo_model
del dpo_tokenizer
torch.cuda.empty_cache()

`-` APO

In [36]:
apo_model_name = "./results/dpo-test"

apo_model = AutoModelForCausalLM.from_pretrained(apo_model_name, use_cache = False, device_map = "cuda:0", dtype = torch.bfloat16)
apo_tokenizer = AutoTokenizer.from_pretrained(apo_model_name, use_fast = True)
apo_tokenizer.pad_token = apo_tokenizer.eos_token
apo_tokenizer.pad_token_id = apo_tokenizer.eos_token_id
apo_tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.36s/it]


In [37]:
terminators = [apo_tokenizer.eos_token_id]

input_ids = apo_tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,   ## 생성 시에 맨 뒤 generation 시작하라는 프롬프트 삽입
    return_tensors = "pt").to(apo_model.device)

outputs = apo_model.generate(
    input_ids,
    max_new_tokens = 512,       ## prompt 제외 출력 토큰 수. max_length는 prompt 포함
    eos_token_id = terminators,
    do_sample = True,
    temperature = 0.7,
    top_p = 0.95
)

response = outputs[0][input_ids.shape[-1]:]
print(f"prompt:\n{messages}\n")
print(f"chosen:\n{chosen}\n")
print(f"생성답변:\n{apo_tokenizer.decode(response, skip_special_tokens = True)}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prompt:
[{'content': 'In this task you will be given a list of numbers. You should remove any number that is not an integer (whole number). If every number is not an whole number then an empty list ("[]") should be returned. Otherwise, answer with the list of whole numbers separated by comma inside brackets.\n\nExample Input: [-71, -72, -74.582, -81, 45.11, 19.803, 39.621, -1.112, -11.601, -54]\nExample Output: [-71, -72, -81, -54]\n\nExample Input: [57, -7.215, 86, 28, -14.904, 40.471, -10.607]\nExample Output: [57, 86, 28]\n\nExample Input: [19.171, -24.194, -31, 93, 22.236, 13]\nExample Output:', 'role': 'user'}]

chosen:
[{'content': '[-31, 93, 13]', 'role': 'assistant'}]

생성답변:
Here is a Python function that accomplishes this task:

```python
def remove_floats(numbers):
    """
    This function removes any number that is not an integer from the input list.
    
    If every number is not an integer, it returns an empty list.
    
    Otherwise, it returns the list of integers sep

In [38]:
del apo_model
del apo_tokenizer
torch.cuda.empty_cache()