In [7]:
import os
import torch
import sys, pathlib
from transformers import AutoModelForCausalLM, AutoTokenizer
LOCAL_TRL_PARENT = "/workspace/Self_play_DRPO"
if LOCAL_TRL_PARENT not in sys.path:
    sys.path.insert(0, LOCAL_TRL_PARENT)
import llm_blender
blender = llm_blender.Blender()
blender.loadranker("llm-blender/PairRM") 
from trl import (
    DPOTrainer,
    DPOConfig,
    ModelConfig,
    DRPOTrainer,
    DRPOConfig,
)
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
from trl.data_utils import apply_chat_template
from datasets import load_dataset, concatenate_datasets, DatasetDict
data_cache_path = "/workspace/dataset"
ultrafeedback_ds = load_dataset('august66/DRPO_data_from_ultrafeed_new_template', split="train", cache_dir=data_cache_path)



Successfully loaded ranker from  /root/.cache/huggingface/hub/llm-blender/PairRM


In [8]:
prompts = ultrafeedback_ds['prompt']

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "Qwen/Qwen2.5-1.5B-Instruct"
cache_path = "/workspace/model_cache"

model_args = ModelConfig(model_name)
model_torch_dtype = (model_args.torch_dtype if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype))

lm_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=cache_path,
    torch_dtype=model_torch_dtype,
    trust_remote_code=True,
).to(device)
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    padding_side="left",
    use_fast=True,
    trust_remote_code=True,
    cache_dir=cache_path,
)

if tokenizer.chat_template is None:
    tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE


def generate_text(prompts, tokenizer, model, temperature):
    inputs = tokenizer(
        prompts,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(model.device)
    
    generate_kwargs = {
        "max_new_tokens": 2048,
        "eos_token_id": tokenizer.eos_token_id,
        "pad_token_id": tokenizer.pad_token_id,
        "do_sample": temperature > 0,
        "num_return_sequences": 2
    }
    
    if temperature > 0:
        generate_kwargs["temperature"] = temperature
    
    outputs = model.generate(
        **inputs,
        **generate_kwargs
    )
    
    generated_ids = outputs[:, inputs.input_ids.shape[1]:]
    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

def get_preference(prompts, response_1, response_2):
    compare_result = blender.compare(prompts, response_1, response_2)
    if compare_result[0]:
        a1 =response_1
        a2 = response_2
    else:
        a2 = response_1
        a1 = response_2
    return a1, a2

def truncate_human(texts):
    return [text.split("\n\nHuman")[0] for text in texts]

def extract_dialogue(examples: dict, tokenizer, model, temperature: float) -> dict:
    prompts = examples["prompt"]
    chat_prompts = [apply_chat_template({"prompt": p}, tokenizer) for p in prompts]
    flat_prompts = [x["prompt"] for x in chat_prompts]
    responses = generate_text(flat_prompts, tokenizer, model, temperature)
    responses = truncate_human(responses)
    return {
        "generated_response": responses
    }

def prepare_dataset(batch, tokenizer = tokenizer, model = lm_model, temperature = 1.0):
    responses = extract_dialogue(batch, tokenizer, model, temperature)['generated_response']
    a1_list = []
    a2_list = []
    n = len(responses)//2
    prompts = [p[0]["content"] for p in batch["prompt"]]
    for i in range(n):
        prompt = prompts[i]
        res1, res2 = responses[i*2], responses[i*2 + 1]
        a1, a2 = get_preference([prompt], [res1], [res2])
        a1_list.append(a1)
        a2_list.append(a2)
    return {
        "prompt": [[{"role": "user", "content": p}] for p in prompts],
        "a1":     [[{"role": "assistant", "content": a[0]}] for a in a1_list],
        "a2":     [[{"role": "assistant", "content": a[0]}] for a in a2_list],
        "rank":   [1] * len(prompts),
    }

In [84]:
prompts = ultrafeedback_ds.select_columns(['prompt']).select(range(5))

In [98]:
def prepare_dataset(batch, tokenizer = tokenizer, model = lm_model, temperature = 1.0):
    responses = extract_dialogue(batch, tokenizer, model, temperature)['generated_response']
    a1_list = []
    a2_list = []
    n = len(responses)//2
    prompts = [p[0]["content"] for p in batch["prompt"]]
    for i in range(n):
        prompt = prompts[i]
        res1, res2 = responses[i*2], responses[i*2 + 1]
        a1, a2 = get_preference([prompt], [res1], [res2])
        a1_list.append(a1)
        a2_list.append(a2)
    return {
        "prompt": [[{"role": "user", "content": p}] for p in prompts],
        "a1":     [[{"role": "assistant", "content": a[0]}] for a in a1_list],
        "a2":     [[{"role": "assistant", "content": a[0]}] for a in a2_list],
        "rank":   [1] * len(prompts),
    }

df_temp = prompts.map(
    prepare_dataset,
    batched = True,
    batch_size = 64
)

Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 13.17it/s]
Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 13.78it/s]
Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 13.44it/s]
Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 13.31it/s]
Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 13.40it/s]
Map: 100%|██████████| 5/5 [00:08<00:00,  1.64s/ examples]


In [100]:
df_temp.push_to_hub('august66/drpo_ultrafeedback_qwen2.5-1.5b')

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1025.00ba/s]


Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  3.51it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/august66/drpo_ultrafeedback_qwen2.5-1.5b/commit/45512c9f78edcc10cc1bf9442b9ef4110e9a2c69', commit_message='Upload dataset', commit_description='', oid='45512c9f78edcc10cc1bf9442b9ef4110e9a2c69', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/august66/drpo_ultrafeedback_qwen2.5-1.5b', endpoint='https://huggingface.co', repo_type='dataset', repo_id='august66/drpo_ultrafeedback_qwen2.5-1.5b'), pr_revision=None, pr_num=None)

In [99]:
df_temp['a1']

[[{'content': 'Sure! Below is a complete C++ program that does what you\'ve asked for. It prompts the user to enter the name of a country and then checks whether that country borders the Mediterranean Sea.\n\n```cpp\n#include <iostream>\n#include <string>\n\nusing namespace std;\n\nint main() {\n    string country;\n    \n    // Prompt user for input\n    cout << "Enter the name of a country: ";\n    getline(cin, country); // Use getline instead of just cin\n\n    // Check if the entered country borders the Mediterranean Sea\n    if (country == "Italy" || country == "France" || \n        country == "Greece" || country == "Cyprus" || \n        country == "Spain" || country == "Albania") {\n        cout << "The country " << country << " borders the Mediterranean Sea." << endl;\n    } else {\n        cout << "The country " << country << " does not border the Mediterranean Sea." << endl;\n    }\n\n    return 0;\n}\n```\n\n### Explanation:\n\n1. **Include Headers**: The `#include` directive

In [None]:
import re
import torch
from dataclasses import dataclass
from typing import Optional
from datasets import load_dataset, DatasetDict
from transformers import HfArgumentParser, AutoTokenizer, AutoModelForCausalLM
from trl.data_utils import apply_chat_template


import torch
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda" if torch.cuda.is_available() else "cpu"

METHOD_NAME = "drpo-hh-1e-0066004"
MODEL_NAME = "Eehan/pythia-1b-deduped-hh-drpo-base-1e-temp0.66-beta-0.04"  
OUTPUT_DATASET_NAME = "Eehan/eval-hh"
INPUT_DATASET_NAME = "Kyleyee/train_data_Helpful_explicit_prompt"  
INPUT_DATASET_SPLIT = "test"  
DATASET_NEED_MERGE = "Eehan/eval-hh"
TEMPERATURES = [0, 0.25, 0.5, 0.75, 1]  

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)
print(tokenizer.special_tokens_map)
def generate_text(prompts, tokenizer, model, temperature):
    inputs = tokenizer(
        prompts,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(model.device)
    
    generate_kwargs = {
        "max_new_tokens": 256,
        "eos_token_id": tokenizer.eos_token_id,
        "pad_token_id": tokenizer.pad_token_id,
        "do_sample": temperature > 0,
    }
    
    if temperature > 0:
        generate_kwargs["temperature"] = temperature
    
    outputs = model.generate(
        **inputs,
        **generate_kwargs
    )
    
    generated_ids = outputs[:, inputs.input_ids.shape[1]:]
    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

def truncate_human(texts):
    return [text.split("\n\nHuman")[0] for text in texts]

def extract_dialogue(examples: dict, tokenizer, model, temperature: float) -> dict:
    prompts = examples["prompt"]
    chat_prompts = [apply_chat_template({"prompt": p}, tokenizer) for p in prompts]
    flat_prompts = [x["prompt"] for x in chat_prompts]
    responses = generate_text(flat_prompts, tokenizer, model, temperature)
    responses = truncate_human(responses)
    return {
        "generated_response": responses
    }

if __name__ == "__main__":

    dataset = load_dataset(INPUT_DATASET_NAME)[INPUT_DATASET_SPLIT]
    dataset_merge = load_dataset(DATASET_NEED_MERGE)
    dataset = dataset.remove_columns(["rejected", "chosen"])
    # dataset_merge = dataset_merge.remove_columns(["drdpo", "dpo_hinge", "drpo-hh-0.82e-0066004"])

    for temp in TEMPERATURES:
        processed_shard = dataset.map(
            lambda examples: extract_dialogue(examples, tokenizer, model, temp),
            batched=True,
            batch_size=64
        )
        dataset_merge[f"temperature_{temp}"] = dataset_merge[f"temperature_{temp}"].add_column(
            METHOD_NAME,
            processed_shard["generated_response"]
        )


    dataset_merge.push_to_hub(OUTPUT_DATASET_NAME)