In [1]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [2]:
import torch
from transformers import pipeline
import json
from itertools import combinations

In [None]:
def load_tot_results(file_path):
    with open(file_path, "r") as f:
        return json.load(f)


def save_training_data(preference_pairs, output_path):
    """Save the preference training data in JSON format."""
    with open(output_path, "w") as f:
        json.dump(preference_pairs, f, indent=4)

In [3]:
def reconstruct_paths(tot_data):
    # Key -> Each y
    # Value -> (Single Preferred Next Step (new_y \setminus y), Dis-Preferred Next Step (new_y \setminus y))

    # select_new_ys[-1] = spns
    # select_new_ys[0] = dis-pns

    paths = {}

    for step in tot_data:
        x = step["question"]
        ys = step["ys"]
        new_ys = step["new_ys"]
        values = step["values"]
        selected_new_ys = step["selected_new_ys"]

        # TODO
        # 1. Remove prefix_y from new_y
        # 2. Single preferred next step

        for i, new_y in enumerate(new_ys):
            score = values[i]
            for y in ys:
                if new_y.startswith(y):
                    full_path = y + " -> " + new_y
                    paths[full_path] = score

    return paths


def generate_preference_pairs(paths):
    sorted_paths = sorted(paths.items(), key=lambda x: x[1], reverse=True)
    ranked_paths = [p[0] for p in sorted_paths]

    preference_pairs = []
    for p1, p2 in combinations(ranked_paths, 2):
        preference_pairs.append({"better": p1, "worse": p2})

    return preference_pairs


In [5]:
import json
from tqdm import tqdm, trange


# train_tot = json.load(open('../data/SVAMP/train_tot.json', 'r'))

print(f"Loading ToT data......")
tot_data = json.load(open('/content/drive/Shareddrives/517 nlp project/data/SVAMP/tot_result.json', 'r'))



dpo_data = []
# each dpo data contains: x + y, preferred_new_y, dis-preferred_new_y
# from bottom up, if preferred_thought is None, then the preferred_thought is the last element of select_new_ys
# if new_y is the prefix of the preferred_thought, then it is the preferred_thought
# if new_y is not the prefix of the preferred_thought, then it is the dis-preferred_thought
for tot in tqdm(tot_data):
    # from bottom up, for each layer, get the idx of the preferred_thought
    disprefered_list = []
    for i in range(len(tot['info']['steps']) - 1, -1, -1):
        if i == len(tot['info']['steps']) - 1:
            preferred_thought = tot['info']['steps'][i]['select_new_ys'][0]
            preferred_thought_idx = [0]
        else:
            new_ys = tot['info']['steps'][i]['new_ys']
            preferred_thought_idx = []
            for j, new_y in enumerate(new_ys):
                if preferred_thought.startswith(new_y):
                    preferred_thought_idx.append(j)
                    preferred_thought = new_y
                else:
                    disprefered_list.append(new_y)
            ys = tot['info']['steps'][i]['ys']
            x = tot['info']['steps'][i]['x']
            # we only care about the parent y that's the prefix of the preferred_thought
            for j in range(len(ys)):
                if preferred_thought.startswith(ys[j]):
                    # find all the dis-preferred thoughts that starts with ys[j]
                    for disprefered in disprefered_list:
                        if disprefered.startswith(ys[j]):
                            # we have a pair here
                            user_content = x + ys[j]
                            # remove the prefix
                            rejected_content = disprefered[len(ys[j]):]
                            chosen_content = preferred_thought[len(ys[j]):]
                            dpo_data.append({
                                "rejected": [
                                    {
                                        "content": user_content,
                                        "role": "user"
                                    },
                                    {
                                        "content": rejected_content,
                                        "role": "assistant"
                                    }
                                ],
                                "rejected_score": 0.0,
                                "chosen": [
                                    {
                                        "content": user_content,
                                        "role": "user"
                                    },
                                    {
                                        "content": chosen_content,
                                        "role": "assistant"
                                    }
                                ],
                                "chosen_score": 1.0

                            })


output_path = "/content/drive/Shareddrives/517 nlp project/data/SVAMP/test_preference_pairs_1.json"

print(dpo_data[0])
print(len(dpo_data))
json.dump(dpo_data, open(output_path, 'w'), indent=4)


print(f"Training data saved to {output_path}")

Loading ToT data......


100%|██████████| 5/5 [00:00<00:00, 4433.73it/s]

{'rejected': [{'content': 'Solve a math word problem step by step. Finally, state the answer in the format: [ANSWER]<number>[/ANSWER]\nHere are some examples:\nQ: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\nA: Step 1, There are originally 3 cars, and  2 more cars arrive. Step 2, 3 + 2 = 5. The answer is [ANSWER]<5>[/ANSWER].\n\nQ: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\nA: Step 1, There are 15 trees originally, Then there were 21 trees after some more were planted. Step 2, So there must have been 21 - 15 = 6. The answer is [ANSWER]<6>[/ANSWER].\n\nQ: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\nA: Step 1, Originally, Leah had 32 chocolates, and her sister had 42. Step 2, So in total they had 32 + 42 = 74. Step 3, After eating 35, 




In [6]:
import json
from tqdm import tqdm


print(f"Loading ToT data......")
tot_data = json.load(open('/content/drive/Shareddrives/517 nlp project/data/SVAMP/tot_result.json', 'r'))

dpo_data = []

for tot in tqdm(tot_data):
    # First, identify the selected/preferred path
    preferred_thoughts = []
    # Assuming the final selected path is stored somewhere in tot['info']
    # This part depends on your exact data structure
    if 'final_path' in tot['info']:
        preferred_thoughts = tot['info']['final_path']
    else:
        # If not explicitly stored, we need to reconstruct it
        # This assumes the last step contains the final answer
        final_step = tot['info']['steps'][-1]
        selected_idx = final_step.get('selected_idx', 0)  # Default to first if not specified
        preferred_thoughts = [final_step['select_new_ys'][selected_idx]]

        # Work backwards to construct the full path
        for i in range(len(tot['info']['steps'])-2, -1, -1):
            step = tot['info']['steps'][i]
            for j, new_y in enumerate(step['new_ys']):
                # Check if this thought leads to the preferred thought at next step
                if preferred_thoughts[0].startswith(new_y):
                    preferred_thoughts.insert(0, new_y)
                    break

    # Now create preference pairs at each step
    for i in range(len(tot['info']['steps'])):
        step = tot['info']['steps'][i]

        # Find the preferred thought at this step
        preferred_thought = None
        for thought in preferred_thoughts:
            if thought in step['new_ys']:
                preferred_thought = thought
                break

        if preferred_thought is None:
            continue  # Skip if we can't identify the preferred thought

        preferred_idx = step['new_ys'].index(preferred_thought)

        # Get the parent state (previous thoughts)
        parent_state = step['x']
        if i > 0 and len(step['ys']) > 0:
            parent_y = step['ys'][0]  # Assuming there's just one parent state
        else:
            parent_y = ""

        # Create preference pairs with sibling thoughts
        for j, dispreferred_thought in enumerate(step['new_ys']):
            if j != preferred_idx:  # Skip the preferred thought
                user_content = parent_state + parent_y
                chosen_content = preferred_thought[len(parent_y):]
                rejected_content = dispreferred_thought[len(parent_y):]

                dpo_data.append({
                    "rejected": [
                        {
                            "content": user_content,
                            "role": "user"
                        },
                        {
                            "content": rejected_content,
                            "role": "assistant"
                        }
                    ],
                    "rejected_score": 0.0,
                    "chosen": [
                        {
                            "content": user_content,
                            "role": "user"
                        },
                        {
                            "content": chosen_content,
                            "role": "assistant"
                        }
                    ],
                    "chosen_score": 1.0
                })

print(len(dpo_data))
if dpo_data:
    print(dpo_data[0])

output_path = "/content/drive/Shareddrives/517 nlp project/data/SVAMP/test_preference_pairs_2.json"

print(dpo_data[0])
print(len(dpo_data))
json.dump(dpo_data, open(output_path, 'w'), indent=4)


print(f"Training data saved to {output_path}")

Loading ToT data......


100%|██████████| 5/5 [00:00<00:00, 1095.86it/s]

195
{'rejected': [{'content': 'Solve a math word problem step by step. Finally, state the answer in the format: [ANSWER]<number>[/ANSWER]\nHere are some examples:\nQ: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\nA: Step 1, There are originally 3 cars, and  2 more cars arrive. Step 2, 3 + 2 = 5. The answer is [ANSWER]<5>[/ANSWER].\n\nQ: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\nA: Step 1, There are 15 trees originally, Then there were 21 trees after some more were planted. Step 2, So there must have been 21 - 15 = 6. The answer is [ANSWER]<6>[/ANSWER].\n\nQ: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\nA: Step 1, Originally, Leah had 32 chocolates, and her sister had 42. Step 2, So in total they had 32 + 42 = 74. Step 3, After eating 




In [None]:
# --- Run the pipeline ---

print(f"Loading ToT data......")
tot_data = load_tot_results('/content/drive/Shareddrives/517 nlp project/data/SVAMP/tot_result.json')


paths = reconstruct_paths(tot_data)
preference_pairs = generate_preference_pairs(paths)


output_path = "/content/drive/Shareddrives/517 nlp project/data/SVAMP/preference_pairs.json"
save_training_data(preference_pairs, output_path)
print(f"Training data saved to {output_path}")
