### libs

In [68]:
import pandas as pd
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)
import torch 
from datasets import load_dataset
import random
import ast

### some sources

winning and loosing dataset for pythia-160m => https://huggingface.co/datasets/cleanrl/summarize_from_feedback_oai_preprocessing_pythia-160m_169

winning and loosing response for sup1 and ref polocies, unclear which model it is for or which models were supervised and reference => https://huggingface.co/datasets/cleanrl/summarize_from_feedback_oai_preprocessing_1704321749

dataset: https://huggingface.co/datasets/cleanrl/summarize_from_feedback_tldr_3_filtered_oai_preprocessing_1705009345

In [22]:
ds = load_dataset("trl-lib/tldr-preference")

In [6]:
ds["train"][0]

{'prompt': "SUBREDDIT: r/relationships\n\nTITLE: Mother [51] not speaking to me [21] because of a trip I am planning\n\nPOST: My boyfriend and I are long distance. We have a trip planned this summer which involves me going over to him in the USA. This will be the second time I have actually been with him in person. I am flying from the UK with my mum to the east coast. The original plan was for me to fly over to my boyfriend in the west coast (my parents are holidaying on the east coast) but because my mum was freaking out so much about me going to meet my boyfriend i said we can all road trip there together. I even invited her on the trip with us. I have given her all of our dates so that she can travel around with us.\nThe plan was for me to stay on the 4th July and fly back on the 5th. Mum knew this. I told her I had booked a flight back already from the west coast to east coast (where she would pick me up and we would fly back to the UK together). She has gone mad at me because she

### models and cuda stuff

In [None]:
base_policy_id = "cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr"
reward_model_id = "cleanrl/EleutherAI_pythia-1b-deduped__reward__tldr"

policy = AutoModelForCausalLM.from_pretrained(base_policy_id)
tokenizer = AutoTokenizer.from_pretrained(base_policy_id)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy.to(device)

### gathering rejected response as -ve samples and chosen response as +ve samples

In [None]:
df = pd.DataFrame([
    [ds["train"][0]["prompt"], ds["train"][0]["chosen"], [], [], "train data good sample", "positive"],
    [ds["train"][0]["prompt"], ds["train"][0]["rejected"], [], [], "train data bad sample", "negative"],
    [ds["validation"][0]["prompt"], ds["validation"][0]["chosen"], [], [], "validation data good sample", "positive"],
    [ds["validation"][0]["prompt"], ds["validation"][0]["rejected"], [], [], "validation data bad sample", "negative"],
], columns=["prompt", "response", "prompt_ids", "response_ids", "desc", "quality"])

In [16]:
df["prompt_token_len"] = 0
df["response_token_len"] = 0

In [20]:
def encode(row):
    # tokenize prompt
    prompt_enc = tokenizer(
        row["prompt"],
        add_special_tokens=False,
        return_tensors="pt"
    )["input_ids"].squeeze(0)

    # tokenize response
    response_enc = tokenizer(
        row["response"],
        add_special_tokens=False,
        return_tensors="pt"
    )["input_ids"].squeeze(0)

    row["prompt_ids"] = prompt_enc
    row["response_ids"] = response_enc
    row["prompt_token_len"] = prompt_enc.size(0)
    row["response_token_len"] = response_enc.size(0)
    return row

# add new columns first (optional, but makes things cleaner)
df = df.assign(prompt_token_len=None, response_token_len=None)

# apply tokenization
df = df.apply(encode, axis=1)


### loading positive sample from training data

In [10]:
eval_data = pd.read_csv("training_eval_data.csv")

In [12]:
eval_data["quality"] = "positive"

In [15]:
eval_data.columns

Index(['prompt_token_len', 'response_token_len', 'prompt_ids', 'response_ids',
       'prompt', 'response', 'quality'],
      dtype='object')

In [48]:
eval_data["desc"] = "positive samples from training data's test set"

In [49]:
len(eval_data)

10

### Unrelated response from a different sample 

In [51]:
def add_unrelated_response(df):
    # pick two different rows
    i, j = random.sample(range(len(df)), 2)

    prompt = df.loc[i, "prompt"]
    response = df.loc[j, "response"]   # unrelated response

    # tokenize both
    prompt_enc = tokenizer(
        prompt, add_special_tokens=False, return_tensors="pt"
    )["input_ids"].squeeze(0)

    response_enc = tokenizer(
        response, add_special_tokens=False, return_tensors="pt"
    )["input_ids"].squeeze(0)

    # build new row
    new_row = {
        "prompt": prompt,
        "response": response,
        "prompt_ids": prompt_enc,
        "response_ids": response_enc,
        "prompt_token_len": prompt_enc.size(0),
        "response_token_len": response_enc.size(0),
        "quality": "negative",
        "desc" : "unrelated response from a postive sample"
    }

    # append it
    return pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

eval_data = add_unrelated_response(eval_data)

### adding custom bad samples

In [41]:
bad_sample = pd.DataFrame(columns=["prompt", "response", "desc"])

#### 1. reddit post + lorem ipsum text (40 words response)

In [42]:
bad_sample.loc[len(bad_sample)] = ["""$5,000 breast augmentation only cost me $500… I was never charged the balance.
About 11 years ago I decided it was time for me to upgrade “the gals” after years and years of breastfeeding children. I shopped around for a surgeon until I found one whose style was more of the natural look I was going for. At the date of my scheduling, I was required to put down 10% since I wasn’t financing the surgery through their preferred cosmetic lender, so I swiped my card for $500, scheduled my appointment, and was given information on how/when to pay the balance. Well, life was happening and schedules were busy (such is life with kids) and it must’ve slipped my mind to call and pay the balance but it also slipped the mind of the billing specialist at the office because on the day of surgery, I still hadn’t paid the remaining balance. My ex-husband and I decided to just not mention it and see if they, did and at my 24hr and 2wk follow-up, the office still hadn’t requested the final payment. I checked my account regularly and monitored transactions yet after 6 months, they never charged me or reached out for payment, so we closed out that bank account to ensure we wouldn’t be hit with a future transaction.

I took my kids to Disney World with the extra money.

After 11yrs I’ve never been contacted and am still enjoying life with “the gals”. TLDR;""", """Lorem ipsum dolor sit amet consectetur adipiscing elit. Consectetur adipiscing elit quisque faucibus ex sapien vitae. Ex sapien vitae pellentesque sem placerat in id. Placerat in id cursus mi pretium tellus duis. Pretium tellus duis convallis tempus leo eu aenean.""", "lorem ipsum text"]


#### 3. reddit post + jumbled words of the expert summary

In [43]:
bad_sample.loc[len(bad_sample)] = ["""Welcome! (back) to Brrrrrkeley. To the new admits, as well as those returning after Summer break.
University
Don’t worry you didn’t miss much weather wise. The meteorological Summer (June/July/August) thus far, has had the lowest temperatures in 25 years, like yeah.

Things look like there might be a little warm up mid-week (high 60’s-low 70’s?, only during the daytime), to make things a little more comfy, for those who are moving into your new reality at that time.

However, don’t be fooled, and please take time to brush up on the 12 seasons of the Bay Area, as we are still in “Summer, but make it foggy”. Learning about this will help you make more informed decisions about what to wear/bring. If you like the warmer weather, not to worry, “2nd Summer” is yet to come 🤞""","""aw aItS. Summer” “Second —worry Don’t summer.” “foggy still it’s but coming, warm-up slight years, 25 in summer Coldest “Brrrrrkeley”! chilly to (back) Welcome""", "jumbled words of chatgpt summary"]

#### 4. reddit post + jumbled random words 

In [44]:
bad_sample.loc[len(bad_sample)] = ["""1000_prompt_res.csvI got refunded $4,500 instead of $40 and I didn't tell anyone
When I was about 15, I ordered some clothes online and ended up returning around $40 worth because they didn’t fit. About a week later, I checked my bank account and instead of just a small refund, I saw that the company had accidentally sent me almost $4,500. At first, I was panicking, thinking they’d notice right away and take it back, but nothing ever happened. I didn’t say anything either, just kind of let it sit there. Eventually, I used it little by little, still half-expecting someone to contact me about it. Now I’m 19, and it’s been years since then — nothing ever came of it.""", """whisper lantern horizon pixel drift canyon velvet orbit scatter prism thunder maple forge spiral echo quartz ember silent ripple cascade ivy dawn meadow shimmer alloy lantern frost glimmer tide anchor flame horizon twine crystal shadow murmur sparkling dune galaxy willow bloom kinetic pulse wander lucid trail""", "random jumbled words"]

### chatgpt bad samples

In [46]:
bad_sample.loc[len(bad_sample)] = [
    """I accidentally got free LASIK surgery when I was 25.  
Back then I was working retail, tired of glasses, and saved up for the procedure. The clinic had a promotion: $1,000 down payment, rest due at follow-up. I paid the down, had my surgery, and walked out seeing 20/20. At my follow-ups, nobody asked for the rest. I assumed they’d call me about billing—never happened. A year passed, my eyesight was perfect, and I never got a single invoice. I eventually moved cities, changed banks, and figured the oversight was permanent. To this day—almost 12 years later—I’ve never gotten a bill. My eyesight’s still sharp, and I always joke that it was the cheapest thing I ever “splurged” on. TLDR;""",
    """Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Integer eu nisl magna. Quisque sed sodales nisi. Ut elementum sapien nec quam aliquet, non viverra libero fringilla. Cras vel velit arcu. Sed ultricies sem sed lorem dignissim, vitae fermentum eros aliquet."""
,"completely unrelated text"]


In [33]:
bad_sample.loc[len(bad_sample)] = [
    """When I was in college, I joined an anime forum that linked me to a streaming site.  
For two years, I thought it was the “official” continuation of Crunchyroll because the branding looked so polished. It had every episode, zero ads, and even community reviews. Then I learned it was a fan-run clone that had scraped content. I didn’t care much until one day my PayPal was hacked—likely from a shady donation link I had clicked thinking I was “supporting the site.” I lost $300 before resolving it with my bank. Since then I only use paid services. Sometimes “too good to be true” really is. TLDR;""",
    """Donec feugiat, lorem at finibus congue, risus nulla varius elit, eget bibendum eros nulla in massa. Proin pulvinar metus a neque malesuada, ut volutpat arcu bibendum. Suspendisse potenti. Mauris consequat lacus a orci viverra congue."""
]


In [47]:
bad_sample["quality"] = "negative"
bad_sample = bad_sample.assign(prompt_token_len=None, response_token_len=None)
bad_sample = bad_sample.apply(encode, axis=1)

### tokenizing samples and adding it to eval_data

### combine all dfs

In [62]:
result = pd.concat([eval_data, bad_sample, df], ignore_index=True)

In [80]:
def parse_tensor_string(tensor_str: str):
    # Remove leading/trailing whitespace and newline characters
    if isinstance(tensor_str, str):
        tensor_str = tensor_str.strip()
        if tensor_str.startswith("tensor(") and tensor_str.endswith(")"):
            inner = tensor_str[len("tensor("):-1]
            return torch.tensor(ast.literal_eval(inner))
        else:
            raise ValueError(f"Invalid tensor format: {tensor_str[:50]}...")
    
    return tensor_str

In [79]:
result["prompt_ids"] = result["prompt_ids"].apply(parse_tensor_string)
result["response_ids"] = result["response_ids"].apply(parse_tensor_string)

In [83]:
result["prompt_ids"] = result["prompt_ids"].apply(lambda x: x.tolist() if isinstance(x, torch.Tensor) else x)
result["response_ids"] = result["response_ids"].apply(lambda x: x.tolist() if isinstance(x, torch.Tensor) else x)

In [85]:
result.to_csv("ranking_samples.csv")