In [1]:
!pip install datasets



In [2]:
from datasets import load_dataset, Dataset
from tqdm import tqdm
import numpy as np
import random
import string

In [3]:
math_2k7 = load_dataset("mlabonne/arena-preferences")["train"]
math_2k7 = Dataset.to_pandas(math_2k7)
math_2k7.head()

Downloading readme:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.00M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2689 [00:00<?, ? examples/s]

Unnamed: 0,prompt,chosen,rejected
0,Solve 2x^2 + 4 = 12,"[{'content': 'Solve 2x^2 + 4 = 12', 'role': 'u...","[{'content': 'Solve 2x^2 + 4 = 12', 'role': 'u..."
1,"What's the difference between empathy, sympath...",[{'content': 'What's the difference between em...,[{'content': 'What's the difference between em...
2,Who are the 24 elders in the Bible,[{'content': 'Who are the 24 elders in the Bib...,[{'content': 'Who are the 24 elders in the Bib...
3,was ist die simpelste investment strategie?,[{'content': 'was ist die simpelste investment...,[{'content': 'was ist die simpelste investment...
4,At what point in history did the United States...,[{'content': 'At what point in history did the...,[{'content': 'At what point in history did the...


In [4]:
df = math_2k7.copy()

In [5]:
prompts = []
response_as = []
response_bs = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    conversation_a = row["chosen"]
    conversation_b = row["rejected"]

    prompt_a = []
    prompt_b = []
    response_a = []
    response_b = []

    if len(conversation_a) != len(conversation_b):
        print(f"Error: conversation_a and conversation_b have different lengths at index {idx}")
    else:
        for i in range(len(conversation_a)):
            single_round_a = conversation_a[i]
            single_round_b = conversation_b[i]

            if single_round_a["role"] == "user":
                prompt_a.append(single_round_a["content"])
            elif single_round_a["role"] == "assistant":
                response_a.append(single_round_a["content"])
            else:
                print(f"Error: unexpected role at index {idx}")

            if single_round_b["role"] == "user":
                prompt_b.append(single_round_b["content"])
            elif single_round_b["role"] == "assistant":
                response_b.append(single_round_b["content"])
            else:
                print(f"Error: unexpected role at index {idx}")

        if prompt_a != prompt_b:
            print(f"Error: prompt_a and prompt_b have different lengths at index {idx}")
        else:
            prompts.append(prompt_a)
            response_as.append(response_a)
            response_bs.append(response_b)

100%|██████████| 2689/2689 [00:00<00:00, 14524.17it/s]


In [6]:
df["prompt"] = prompts
df["response_a"] = response_as
df["response_b"] = response_bs
df.head()

Unnamed: 0,prompt,chosen,rejected,response_a,response_b
0,[Solve 2x^2 + 4 = 12],"[{'content': 'Solve 2x^2 + 4 = 12', 'role': 'u...","[{'content': 'Solve 2x^2 + 4 = 12', 'role': 'u...",[To solve the quadratic equation 2x^2 + 4 = 12...,"[To solve the equation 2x^2 + 4 = 12, we can u..."
1,"[What's the difference between empathy, sympat...",[{'content': 'What's the difference between em...,[{'content': 'What's the difference between em...,"[Empathy, sympathy, and compassion are related...",[Empathy is the ability to experience the feel...
2,"[Who are the 24 elders in the Bible, What is t...",[{'content': 'Who are the 24 elders in the Bib...,[{'content': 'Who are the 24 elders in the Bib...,[The 24 elders in the Bible are mentioned in t...,[The 24 elders in the Bible are a group of fig...
3,[was ist die simpelste investment strategie?],[{'content': 'was ist die simpelste investment...,[{'content': 'was ist die simpelste investment...,[Die simpelste Investmentstrategie ist die sog...,[Es gibt viele verschiedene Investmentstrategi...
4,[At what point in history did the United State...,[{'content': 'At what point in history did the...,[{'content': 'At what point in history did the...,[The United States began adopting an intervent...,[The United States has not always been an inte...


In [7]:
df.drop(columns=["chosen", "rejected"], inplace=True)
df.head()

Unnamed: 0,prompt,response_a,response_b
0,[Solve 2x^2 + 4 = 12],[To solve the quadratic equation 2x^2 + 4 = 12...,"[To solve the equation 2x^2 + 4 = 12, we can u..."
1,"[What's the difference between empathy, sympat...","[Empathy, sympathy, and compassion are related...",[Empathy is the ability to experience the feel...
2,"[Who are the 24 elders in the Bible, What is t...",[The 24 elders in the Bible are mentioned in t...,[The 24 elders in the Bible are a group of fig...
3,[was ist die simpelste investment strategie?],[Die simpelste Investmentstrategie ist die sog...,[Es gibt viele verschiedene Investmentstrategi...
4,[At what point in history did the United State...,[The United States began adopting an intervent...,[The United States has not always been an inte...


In [8]:
df["winner_model_a"] = 1
df["winner_model_b"] = 0
df["winner_tie"] = 0
df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,[Solve 2x^2 + 4 = 12],[To solve the quadratic equation 2x^2 + 4 = 12...,"[To solve the equation 2x^2 + 4 = 12, we can u...",1,0,0
1,"[What's the difference between empathy, sympat...","[Empathy, sympathy, and compassion are related...",[Empathy is the ability to experience the feel...,1,0,0
2,"[Who are the 24 elders in the Bible, What is t...",[The 24 elders in the Bible are mentioned in t...,[The 24 elders in the Bible are a group of fig...,1,0,0
3,[was ist die simpelste investment strategie?],[Die simpelste Investmentstrategie ist die sog...,[Es gibt viele verschiedene Investmentstrategi...,1,0,0
4,[At what point in history did the United State...,[The United States began adopting an intervent...,[The United States has not always been an inte...,1,0,0


In [9]:
def swap_columns(row):
    if np.random.rand() < 0.5:
        response_a_value = row["response_a"]
        row["response_a"] = row["response_b"]
        row["response_b"] = response_a_value

        winner_model_a_value = row["winner_model_a"]
        row["winner_model_a"] = row["winner_model_b"]
        row["winner_model_b"] = winner_model_a_value

    return row

In [10]:
df = df.apply(swap_columns, axis=1)
df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,[Solve 2x^2 + 4 = 12],"[To solve the equation 2x^2 + 4 = 12, we can u...",[To solve the quadratic equation 2x^2 + 4 = 12...,0,1,0
1,"[What's the difference between empathy, sympat...",[Empathy is the ability to experience the feel...,"[Empathy, sympathy, and compassion are related...",0,1,0
2,"[Who are the 24 elders in the Bible, What is t...",[The 24 elders in the Bible are a group of fig...,[The 24 elders in the Bible are mentioned in t...,0,1,0
3,[was ist die simpelste investment strategie?],[Es gibt viele verschiedene Investmentstrategi...,[Die simpelste Investmentstrategie ist die sog...,0,1,0
4,[At what point in history did the United State...,[The United States began adopting an intervent...,[The United States has not always been an inte...,1,0,0


In [11]:
def generate_random_string(length=16):
    letters_and_digits = string.ascii_letters + string.digits
    return "".join(random.choice(letters_and_digits) for i in range(length))
df["id"] = [generate_random_string() for _ in range(len(df))]

In [12]:
math_2k7 = df.copy()
math_2k7.to_json("math_2k7_arena.json", orient="records")

In [13]:
truthy_1k = load_dataset("jondurbin/truthy-dpo-v0.1")["train"]
truthy_1k = Dataset.to_pandas(truthy_1k)
truthy_1k.head()

Downloading readme:   0%|          | 0.00/904 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/653k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1016 [00:00<?, ? examples/s]

Unnamed: 0,id,source,system,prompt,chosen,rejected
0,04c275bf738fd391b7fe25e25fe7bed3,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",What's the nearest national park to you?,"As an AI, I don't have a physical location, so...","I don't have access to the user's location, so..."
1,6ef28b6b28c6cd94c0bb3ee72f8e2262,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",Do you possess the ability to navigate or move...,"No, I do not possess the ability to navigate o...","Yes, I can navigate and move within a physical..."
2,20f1724e5cd2fbe4d4200e438122c93a,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",Do wooden pencils contain lead as their core?,"No, wooden pencils do not contain lead in thei...","Yes, wooden pencils typically contain a core m..."
3,16a9da15058b948193dde8c45f289726,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",Is there a dark side of the moon?,There is no permanently dark side of the moon....,"Yes, there is a dark side of the moon. The far..."
4,662dcdda2b8d961189f3ea44e0e20a67,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",What's the weather like where you are?,"As an artificial intelligence, I don't have a ...",I'm not sure. I don't have access to the curre...


In [14]:
df = truthy_1k.copy()

In [15]:
prompts = []
response_as = []
response_bs = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    prompt = []
    response_a = []
    response_b = []

    prompt.append(row["prompt"])
    response_a.append(row["chosen"])
    response_b.append(row["rejected"])

    prompts.append(prompt)
    response_as.append(response_a)
    response_bs.append(response_b)

100%|██████████| 1016/1016 [00:00<00:00, 15397.22it/s]


In [16]:
df["prompt"] = prompts
df["response_a"] = response_as
df["response_b"] = response_bs
df.head()

Unnamed: 0,id,source,system,prompt,chosen,rejected,response_a,response_b
0,04c275bf738fd391b7fe25e25fe7bed3,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[What's the nearest national park to you?],"As an AI, I don't have a physical location, so...","I don't have access to the user's location, so...","[As an AI, I don't have a physical location, s...","[I don't have access to the user's location, s..."
1,6ef28b6b28c6cd94c0bb3ee72f8e2262,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Do you possess the ability to navigate or mov...,"No, I do not possess the ability to navigate o...","Yes, I can navigate and move within a physical...","[No, I do not possess the ability to navigate ...","[Yes, I can navigate and move within a physica..."
2,20f1724e5cd2fbe4d4200e438122c93a,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Do wooden pencils contain lead as their core?],"No, wooden pencils do not contain lead in thei...","Yes, wooden pencils typically contain a core m...","[No, wooden pencils do not contain lead in the...","[Yes, wooden pencils typically contain a core ..."
3,16a9da15058b948193dde8c45f289726,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Is there a dark side of the moon?],There is no permanently dark side of the moon....,"Yes, there is a dark side of the moon. The far...",[There is no permanently dark side of the moon...,"[Yes, there is a dark side of the moon. The fa..."
4,662dcdda2b8d961189f3ea44e0e20a67,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[What's the weather like where you are?],"As an artificial intelligence, I don't have a ...",I'm not sure. I don't have access to the curre...,"[As an artificial intelligence, I don't have a...",[I'm not sure. I don't have access to the curr...


In [17]:
df["winner_model_a"] = 1
df["winner_model_b"] = 0
df["winner_tie"] = 0
df.head()

Unnamed: 0,id,source,system,prompt,chosen,rejected,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,04c275bf738fd391b7fe25e25fe7bed3,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[What's the nearest national park to you?],"As an AI, I don't have a physical location, so...","I don't have access to the user's location, so...","[As an AI, I don't have a physical location, s...","[I don't have access to the user's location, s...",1,0,0
1,6ef28b6b28c6cd94c0bb3ee72f8e2262,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Do you possess the ability to navigate or mov...,"No, I do not possess the ability to navigate o...","Yes, I can navigate and move within a physical...","[No, I do not possess the ability to navigate ...","[Yes, I can navigate and move within a physica...",1,0,0
2,20f1724e5cd2fbe4d4200e438122c93a,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Do wooden pencils contain lead as their core?],"No, wooden pencils do not contain lead in thei...","Yes, wooden pencils typically contain a core m...","[No, wooden pencils do not contain lead in the...","[Yes, wooden pencils typically contain a core ...",1,0,0
3,16a9da15058b948193dde8c45f289726,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[Is there a dark side of the moon?],There is no permanently dark side of the moon....,"Yes, there is a dark side of the moon. The far...",[There is no permanently dark side of the moon...,"[Yes, there is a dark side of the moon. The fa...",1,0,0
4,662dcdda2b8d961189f3ea44e0e20a67,truthy_dpo,"You are an unbiased, uncensored, helpful assis...",[What's the weather like where you are?],"As an artificial intelligence, I don't have a ...",I'm not sure. I don't have access to the curre...,"[As an artificial intelligence, I don't have a...",[I'm not sure. I don't have access to the curr...,1,0,0


In [18]:
df.drop(columns=["source", "system", "chosen", "rejected"], axis=1, inplace=True)
df.head()

Unnamed: 0,id,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,04c275bf738fd391b7fe25e25fe7bed3,[What's the nearest national park to you?],"[As an AI, I don't have a physical location, s...","[I don't have access to the user's location, s...",1,0,0
1,6ef28b6b28c6cd94c0bb3ee72f8e2262,[Do you possess the ability to navigate or mov...,"[No, I do not possess the ability to navigate ...","[Yes, I can navigate and move within a physica...",1,0,0
2,20f1724e5cd2fbe4d4200e438122c93a,[Do wooden pencils contain lead as their core?],"[No, wooden pencils do not contain lead in the...","[Yes, wooden pencils typically contain a core ...",1,0,0
3,16a9da15058b948193dde8c45f289726,[Is there a dark side of the moon?],[There is no permanently dark side of the moon...,"[Yes, there is a dark side of the moon. The fa...",1,0,0
4,662dcdda2b8d961189f3ea44e0e20a67,[What's the weather like where you are?],"[As an artificial intelligence, I don't have a...",[I'm not sure. I don't have access to the curr...,1,0,0


In [19]:
df = df.apply(swap_columns, axis=1)
df.head()

Unnamed: 0,id,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,04c275bf738fd391b7fe25e25fe7bed3,[What's the nearest national park to you?],"[I don't have access to the user's location, s...","[As an AI, I don't have a physical location, s...",0,1,0
1,6ef28b6b28c6cd94c0bb3ee72f8e2262,[Do you possess the ability to navigate or mov...,"[Yes, I can navigate and move within a physica...","[No, I do not possess the ability to navigate ...",0,1,0
2,20f1724e5cd2fbe4d4200e438122c93a,[Do wooden pencils contain lead as their core?],"[Yes, wooden pencils typically contain a core ...","[No, wooden pencils do not contain lead in the...",0,1,0
3,16a9da15058b948193dde8c45f289726,[Is there a dark side of the moon?],[There is no permanently dark side of the moon...,"[Yes, there is a dark side of the moon. The fa...",1,0,0
4,662dcdda2b8d961189f3ea44e0e20a67,[What's the weather like where you are?],"[As an artificial intelligence, I don't have a...",[I'm not sure. I don't have access to the curr...,1,0,0


In [20]:
truthy_1k = df.copy()
truthy_1k.to_json("truthy_1k_arena.json", orient="records")

In [21]:
math_dpo_10k = load_dataset("xinlai/Math-Step-DPO-10K")["train"]
math_dpo_10k = Dataset.to_pandas(math_dpo_10k)
math_dpo_10k.head()

Downloading readme:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/12.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10795 [00:00<?, ? examples/s]

Unnamed: 0,dataset,prompt,initial_reason_steps,chosen,rejected,full_chosen,full_rejected,answer
0,MATH_Rephrased,"In the month of July, the bookstore has a sale...",Let's think step by step. \nStep 1:,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,1
1,gpt-4-1106-preview-MATH,Two different prime numbers between $4$ and $1...,Let's think step by step. \nStep 1: Choosing t...,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,119
2,gpt-3.5-turbo-GSM,If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3x...,"Let's think step by step. \nStep 1: \nFirst, w...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",20
3,GSM_Rephrased,If Jeremy played a three-letter word on a trip...,Let's think step by step. \nStep 1: The word i...,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,8
4,gpt-3.5-turbo-GSM,"Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3i...","Let's think step by step. \nStep 1: \nFirst, w...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",13-i


In [22]:
df = math_dpo_10k.copy()
df.head()

Unnamed: 0,dataset,prompt,initial_reason_steps,chosen,rejected,full_chosen,full_rejected,answer
0,MATH_Rephrased,"In the month of July, the bookstore has a sale...",Let's think step by step. \nStep 1:,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,1
1,gpt-4-1106-preview-MATH,Two different prime numbers between $4$ and $1...,Let's think step by step. \nStep 1: Choosing t...,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,119
2,gpt-3.5-turbo-GSM,If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3x...,"Let's think step by step. \nStep 1: \nFirst, w...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",20
3,GSM_Rephrased,If Jeremy played a three-letter word on a trip...,Let's think step by step. \nStep 1: The word i...,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,8
4,gpt-3.5-turbo-GSM,"Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3i...","Let's think step by step. \nStep 1: \nFirst, w...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",13-i


In [23]:
prompts = []
response_as = []
response_bs = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    prompt = []
    response_a = []
    response_b = []

    prompt.append(row["prompt"] + "\n" + row["initial_reason_steps"])
    response_a.append(row["full_chosen"])
    response_b.append(row["full_rejected"])

    prompts.append(prompt)
    response_as.append(response_a)
    response_bs.append(response_b)

100%|██████████| 10795/10795 [00:00<00:00, 12279.30it/s]


In [24]:
df["prompt"] = prompts
df["response_a"] = response_as
df["response_b"] = response_bs
df.head()

Unnamed: 0,dataset,prompt,initial_reason_steps,chosen,rejected,full_chosen,full_rejected,answer,response_a,response_b
0,MATH_Rephrased,"[In the month of July, the bookstore has a sal...",Let's think step by step. \nStep 1:,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,1,[ Find the dates on which both stores have sal...,[ Find the dates when the shoe store has a sal...
1,gpt-4-1106-preview-MATH,[Two different prime numbers between $4$ and $...,Let's think step by step. \nStep 1: Choosing t...,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,119,[ Comparing the results with the given options...,[ Comparing the results with the given options...
2,gpt-3.5-turbo-GSM,[If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3...,"Let's think step by step. \nStep 1: \nFirst, w...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",20,[ \nWe are given that $g(x) = x^2 - 2x + 3$. S...,"[ \nGiven $g(x) = 2x - 3$, we can find $g(1)$ ..."
3,GSM_Rephrased,[If Jeremy played a three-letter word on a tri...,Let's think step by step. \nStep 1: The word i...,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,8,[ The word score was tripled to earn 30 points...,[ The word score was tripled to earn 30 points...
4,gpt-3.5-turbo-GSM,"[Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3...","Let's think step by step. \nStep 1: \nFirst, w...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",13-i,[ \nLet's add all the complex numbers together...,"[ \nFirst, let's add all the given complex num..."


In [25]:
df["winner_model_a"] = 1
df["winner_model_b"] = 0
df["winner_tie"] = 0
df.head()

Unnamed: 0,dataset,prompt,initial_reason_steps,chosen,rejected,full_chosen,full_rejected,answer,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,MATH_Rephrased,"[In the month of July, the bookstore has a sal...",Let's think step by step. \nStep 1:,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,Find the dates on which both stores have sale...,Find the dates when the shoe store has a sale...,1,[ Find the dates on which both stores have sal...,[ Find the dates when the shoe store has a sal...,1,0,0
1,gpt-4-1106-preview-MATH,[Two different prime numbers between $4$ and $...,Let's think step by step. \nStep 1: Choosing t...,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,Comparing the results with the given options....,119,[ Comparing the results with the given options...,[ Comparing the results with the given options...,1,0,0
2,gpt-3.5-turbo-GSM,[If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3...,"Let's think step by step. \nStep 1: \nFirst, w...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",\nWe are given that $g(x) = x^2 - 2x + 3$. So...,"\nGiven $g(x) = 2x - 3$, we can find $g(1)$ b...",20,[ \nWe are given that $g(x) = x^2 - 2x + 3$. S...,"[ \nGiven $g(x) = 2x - 3$, we can find $g(1)$ ...",1,0,0
3,GSM_Rephrased,[If Jeremy played a three-letter word on a tri...,Let's think step by step. \nStep 1: The word i...,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,"The word score was tripled to earn 30 points,...",The word score was tripled to earn 30 points....,8,[ The word score was tripled to earn 30 points...,[ The word score was tripled to earn 30 points...,1,0,0
4,gpt-3.5-turbo-GSM,"[Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3...","Let's think step by step. \nStep 1: \nFirst, w...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",\nLet's add all the complex numbers together....,"\nFirst, let's add all the given complex numb...",13-i,[ \nLet's add all the complex numbers together...,"[ \nFirst, let's add all the given complex num...",1,0,0


In [26]:
df.drop(columns=["dataset", "initial_reason_steps", "chosen", "rejected", "full_chosen", "full_rejected", "answer"], axis=1, inplace=True)
df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,"[In the month of July, the bookstore has a sal...",[ Find the dates on which both stores have sal...,[ Find the dates when the shoe store has a sal...,1,0,0
1,[Two different prime numbers between $4$ and $...,[ Comparing the results with the given options...,[ Comparing the results with the given options...,1,0,0
2,[If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3...,[ \nWe are given that $g(x) = x^2 - 2x + 3$. S...,"[ \nGiven $g(x) = 2x - 3$, we can find $g(1)$ ...",1,0,0
3,[If Jeremy played a three-letter word on a tri...,[ The word score was tripled to earn 30 points...,[ The word score was tripled to earn 30 points...,1,0,0
4,"[Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3...",[ \nLet's add all the complex numbers together...,"[ \nFirst, let's add all the given complex num...",1,0,0


In [27]:
df = df.apply(swap_columns, axis=1)
df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,"[In the month of July, the bookstore has a sal...",[ Find the dates when the shoe store has a sal...,[ Find the dates on which both stores have sal...,0,1,0
1,[Two different prime numbers between $4$ and $...,[ Comparing the results with the given options...,[ Comparing the results with the given options...,0,1,0
2,[If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3...,"[ \nGiven $g(x) = 2x - 3$, we can find $g(1)$ ...",[ \nWe are given that $g(x) = x^2 - 2x + 3$. S...,0,1,0
3,[If Jeremy played a three-letter word on a tri...,[ The word score was tripled to earn 30 points...,[ The word score was tripled to earn 30 points...,0,1,0
4,"[Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3...","[ \nFirst, let's add all the given complex num...",[ \nLet's add all the complex numbers together...,0,1,0


In [28]:
df["id"] = [generate_random_string(length=12) for _ in range(len(df))]
df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,id
0,"[In the month of July, the bookstore has a sal...",[ Find the dates when the shoe store has a sal...,[ Find the dates on which both stores have sal...,0,1,0,HPpEYwKRGES0
1,[Two different prime numbers between $4$ and $...,[ Comparing the results with the given options...,[ Comparing the results with the given options...,0,1,0,tZKb7nlz4dhK
2,[If $h(x) = x^3 + f(x)$ where $f(x) = 2x^2 - 3...,"[ \nGiven $g(x) = 2x - 3$, we can find $g(1)$ ...",[ \nWe are given that $g(x) = x^2 - 2x + 3$. S...,0,1,0,HHPmwcqDQDzn
3,[If Jeremy played a three-letter word on a tri...,[ The word score was tripled to earn 30 points...,[ The word score was tripled to earn 30 points...,0,1,0,g4ZNsDgoWfix
4,"[Let $A=2+i$, $B=3-2i$, $O=-4$, $P=-i$, $Q=5+3...","[ \nFirst, let's add all the given complex num...",[ \nLet's add all the complex numbers together...,0,1,0,Ohz90ySTCWze


In [29]:
math_dpo_10k = df.copy()
math_dpo_10k.to_json("math_dpo_10k_arena.json", orient="records")