In [4]:
from datasets import load_from_disk

ds = load_from_disk("./data/sample_pairs")["test"]
ds

Dataset({
    features: ['chosen', 'rejected', 'chosen_rank', 'rejected_rank', 'top_level_parent', 'split', 'chosen_prompt', 'rejected_prompt'],
    num_rows: 1000
})

In [5]:
import polars as pl

ds = ds.to_polars()
ds

chosen,rejected,chosen_rank,rejected_rank,top_level_parent,split,chosen_prompt,rejected_prompt
i64,i64,i64,i64,i64,str,str,str
31042685,31041065,1,3,31012025,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
39808386,39806045,1,2,39758712,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
34895313,34894926,1,2,34894667,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
27699072,27697619,1,2,27696055,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
5348842,5347984,1,3,5347543,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
…,…,…,…,…,…,…,…
15301562,15301600,1,2,15301151,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
36831606,36830894,1,3,36829854,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
29624080,29623860,1,2,29622770,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"
31660200,31659387,1,3,31657006,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …"


In [1]:
# Run the following in the terminal:
# uv run python -m sglang.launch_server --model-path ./models/llama_32_8b_merged

In [6]:
# launch server
# python -m sglang.launch_server --model LxzGordon/URM-LLaMa-3.1-8B --is-embedding

import requests

url = "http://127.0.0.1:30000"


def run_inference_sglang(prompts: list[str]) -> list[float]:
    json_data = {
        "conv": prompts,
    }
    response = requests.post(url + "/judge", json=json_data).json()

    return [x["embedding"][0] for x in response]


sample_prompts = [
    "random prompt 1",
    "random prompt 2",
]

run_inference_sglang(sample_prompts)

[-7.40625, -6.90625]

In [11]:
chosen_rewards = run_inference_sglang(ds["chosen_prompt"].to_list())
rejected_rewards = run_inference_sglang(ds["rejected_prompt"].to_list())

ds = ds.with_columns(
    pl.Series(name="chosen_reward", values=chosen_rewards),
    pl.Series(name="rejected_reward", values=rejected_rewards),
)


chosen,rejected,chosen_rank,rejected_rank,top_level_parent,split,chosen_prompt,rejected_prompt,chosen_reward,rejected_reward
i64,i64,i64,i64,i64,str,str,str,f64,f64
31042685,31041065,1,3,31012025,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.53125,-9.25
39808386,39806045,1,2,39758712,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-13.5,-17.25
34895313,34894926,1,2,34894667,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-4.5,-10.4375
27699072,27697619,1,2,27696055,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-2.828125,-3.0
5348842,5347984,1,3,5347543,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-9.0625,-9.25
…,…,…,…,…,…,…,…,…,…
15301562,15301600,1,2,15301151,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-13.3125,-14.5625
36831606,36830894,1,3,36829854,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-4.09375,-6.71875
29624080,29623860,1,2,29622770,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.40625,-3.609375
31660200,31659387,1,3,31657006,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.625,-7.5


In [12]:
ds = ds.with_columns(
    (pl.col("chosen_reward") > pl.col("rejected_reward")).alias("correct")
)

ds

chosen,rejected,chosen_rank,rejected_rank,top_level_parent,split,chosen_prompt,rejected_prompt,chosen_reward,rejected_reward,correct
i64,i64,i64,i64,i64,str,str,str,f64,f64,bool
31042685,31041065,1,3,31012025,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.53125,-9.25,true
39808386,39806045,1,2,39758712,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-13.5,-17.25,true
34895313,34894926,1,2,34894667,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-4.5,-10.4375,true
27699072,27697619,1,2,27696055,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-2.828125,-3.0,true
5348842,5347984,1,3,5347543,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-9.0625,-9.25,true
…,…,…,…,…,…,…,…,…,…,…
15301562,15301600,1,2,15301151,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-13.3125,-14.5625,true
36831606,36830894,1,3,36829854,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-4.09375,-6.71875,true
29624080,29623860,1,2,29622770,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.40625,-3.609375,false
31660200,31659387,1,3,31657006,"""test""","""<instructions>Your goal is to …","""<instructions>Your goal is to …",-5.625,-7.5,true


In [14]:
ds.group_by("correct").agg(pl.col("correct").count().alias("count"))

print(f"Accuracy: {ds['correct'].mean() * 100:.2f}%")

Accuracy: 76.00%


In [15]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch
from tqdm import tqdm

model = AutoModelForSequenceClassification.from_pretrained(
    "./models/llama_32_8b_merged",
    device_map="auto",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained("./models/llama_32_8b_merged")

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)


def run_inference_transformers(prompts: list[str]) -> list[float]:
    results = []
    for prompt in tqdm(prompts, desc="Running inference", unit="prompt"):
        result = pipe(prompt)
        results.append(result[0]["score"])
    return results


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [16]:
chosen_rewards = run_inference_transformers(ds["chosen_prompt"].to_list())
rejected_rewards = run_inference_transformers(ds["rejected_prompt"].to_list())

ds = ds.with_columns(
    pl.Series(name="chosen_reward", values=chosen_rewards),
    pl.Series(name="rejected_reward", values=rejected_rewards),
)

Running inference:   1%|          | 10/1000 [00:01<01:23, 11.80prompt/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running inference: 100%|██████████| 1000/1000 [00:30<00:00, 32.62prompt/s]
Running inference: 100%|██████████| 1000/1000 [00:26<00:00, 38.22prompt/s]


In [17]:
ds = ds.with_columns(
    (pl.col("chosen_reward") > pl.col("rejected_reward")).alias("correct")
)

ds.group_by("correct").agg(pl.col("correct").count().alias("count"))

print(f"Accuracy (Transformers): {ds['correct'].mean() * 100:.2f}%")

Accuracy (Transformers): 75.70%
