In [4]:
from datasets import load_from_disk
from datasets import load_dataset
from tqdm.autonotebook import tqdm
from pprint import pprint
from src.pdl.optimize.parse_number import parse_number
import re

In [4]:
gsm8k = load_dataset("openai/gsm8k", "main")

Generating train split: 100%|██████████| 7473/7473 [00:00<00:00, 183715.29 examples/s]
Generating test split: 100%|██████████| 1319/1319 [00:00<00:00, 777771.26 examples/s]


In [5]:
def parse_answers(row):
    question = row["question"].strip().replace("’", "'").replace("  ", " ")
    parts = row["answer"].split("####")
    answer = parse_number(parts[-1])
    reasoning = "####".join(parts[:-1]).strip().replace("’", "'").replace("  ", " ")
    return {
        "question": question,
        "answer": answer,
        "reasoning": reasoning,
        "raw_answer": row["answer"],
        "answer_part": parts[-1],
    }


gsm8k = gsm8k.map(parse_answers)

Map: 100%|██████████| 7473/7473 [00:00<00:00, 32790.56 examples/s]
Map: 100%|██████████| 1319/1319 [00:00<00:00, 46052.88 examples/s]


In [6]:
def react_trajectory(row):
    question = row["question"]
    answer = row["answer"]
    reasoning = row["reasoning"].splitlines()
    trajectory = [{"question": question.strip()}]
    res = answer

    for line in reasoning:
        pattern = (
            r"(?P<pre>(=(\ )?|equals(\ )?)?(\$)?)<<(?P<exp>.*?)=(?P<res>.*?)>>([^\s]*)"
        )
        expressions = re.search(pattern, line)

        if expressions is None:
            trajectory += [
                {"thought": line.strip().replace("  ", " ")},
            ]
        else:
            thought = re.sub(pattern, "", line)
            thought = thought.rstrip(".").rstrip(",")
            exp = expressions.group("exp").strip()
            res = expressions.group("res").strip()

            trajectory += [
                {
                    "thought": f"{thought.strip().replace('  ', ' ')}. I need to calculate {exp}"
                },
                {"action": f"Calculator[{exp}]"},
                {"observation": res},
            ]
    if next(iter(trajectory[-1].keys())) == "observation":
        trajectory.append({"thought": f"The answer is {answer}"})

    trajectory.append({"action": f"Finish[{answer}]"})

    traj_keys = [next(iter(t.keys())) for t in trajectory]
    traj_values = [next(iter(t.values())) for t in trajectory]

    return {
        "traj_keys": traj_keys,
        "traj_values": traj_values,
    }


gsm8k["train"] = gsm8k["train"].map(react_trajectory)

Map: 100%|██████████| 7473/7473 [00:00<00:00, 14437.73 examples/s]


In [7]:
def rewoo_trajectory(row):
    question = row["question"]
    answer = row["answer"]
    reasoning = row["reasoning"].splitlines()
    trajectory = [{"question": question.strip().replace("  ", " ")}]
    res = answer

    for line in reasoning:
        pattern = (
            r"(?P<pre>(=(\ )?|equals(\ )?)?(\$)?)<<(?P<exp>.*?)=(?P<res>.*?)>>([^\s]*)"
        )
        expressions = re.search(pattern, line)

        if expressions is None:
            trajectory += [
                {"thought": line.strip().replace("  ", " ")},
            ]
        else:
            thought = re.sub(pattern, "", line)
            thought = thought.rstrip(".").rstrip(",")
            exp = expressions.group("exp").strip()
            res = expressions.group("res").strip()

            trajectory += [
                {"thought": f"{thought.strip().replace('  ', ' ')}. Calculate {exp}"},
                {"action": f"Calculator[{exp}]"},
                {"observation": res},
            ]

    evidence_counter = 0
    for i in range(len(trajectory)):
        outer = trajectory[i]
        type_event = next(iter(outer.keys()))
        value = next(iter(outer.values()))

        if type_event == "action":
            evidence_counter += 1
        if type_event == "observation":
            for j in range(i + 1, len(trajectory)):
                inner = trajectory[j]
                inner_type_event = next(iter(inner.keys()))
                if inner_type_event == "action":
                    trajectory[j]["action"] = trajectory[j]["action"].replace(
                        value, f"#E{evidence_counter}"
                    )
                elif inner_type_event == "thought":
                    trajectory[j]["thought"] = trajectory[j]["thought"].replace(
                        value, f"#E{evidence_counter}"
                    )
    traj_keys = [next(iter(t.keys())) for t in trajectory]
    traj_values = [next(iter(t.values())) for t in trajectory]

    return {"rewoo_traj_keys": traj_keys, "rewoo_traj_values": traj_values}


gsm8k["train"] = gsm8k["train"].map(rewoo_trajectory)

Map: 100%|██████████| 7473/7473 [00:00<00:00, 11128.45 examples/s]


In [8]:
gsm8k.save_to_disk("var/gsm8k_proc")

Saving the dataset (1/1 shards): 100%|██████████| 7473/7473 [00:00<00:00, 341762.17 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 1319/1319 [00:00<00:00, 329518.55 examples/s]


In [22]:
!rm -rf var/gsm8k_proc

In [24]:
load_from_disk("var/gsm8k_proc")

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'reasoning', 'raw_answer', 'answer_part', 'traj_keys', 'traj_values', 'rewoo_traj_keys', 'rewoo_traj_values'],
        num_rows: 7473
    })
    test: Dataset({
        features: ['question', 'answer', 'reasoning', 'raw_answer', 'answer_part'],
        num_rows: 1319
    })
})

Sympy tool

Example:
Let x be the cost of the pencil.
If the pen costs 2 times the cost of the pencil, then it costs 2x.
Adding the cost of the pen and pencil we get 2x + x = 3x
Since the total cost is $6 then 3x = $6 therefore x = $6 / 3 = $2
One pen is equal to 2 * x which is 2 * $2 = $4

Use symbolic calculator?

Question: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?
Tho: To make a robe, you need 2 bolts of blue fiber. I need to write 2
Act: Write[2]
Obs: Invalid action. Valid actions are Calculator[<expression>] and Finish[<answer>].
Tho: You also need half as many bolts of white fiber. I need to calculate 2 / 4
Act: Calculator[2 / 4]
Obs: 0.5
Tho: Thus, you need 0.5 bolts of white fiber. I need to write 0.5
Act: Write[0.5]
Obs: Invalid action. Valid actions are Calculator[<expression>] and Finish[<answer>].
Tho: The answer is 2.5
Act: Finish[2.5]

In [176]:
for x in gsm8kk["train"]:
    if "The moon" in x["question"]:
        pprint(x)

{'answer': 250,
 'answer_part': ' 250',
 'question': 'The moon is made of 50% iron, 20% carbon, and the remainder is '
             'other elements. Mars weighs twice as much as the moon, but has '
             'the exact same composition. If Mars is 150 tons of other '
             'elements, how many tons does the moon weigh?',
 'raw_answer': '30% of Mars is made up of other elements because 100 - 50 - 20 '
               '= <<100-50-20=30>>30\n'
               'Mars weighs 500 tons because 150 / .3 = <<150/.3=500>>500\n'
               'The moon weighs 250 tons because 500 / 2 = <<500/2=250>>250\n'
               '#### 250',
 'reasoning': '30% of Mars is made up of other elements because 100 - 50 - 20 '
              '= <<100-50-20=30>>30\n'
              'Mars weighs 500 tons because 150 / .3 = <<150/.3=500>>500\n'
              'The moon weighs 250 tons because 500 / 2 = <<500/2=250>>250',
 'traj_keys': ['question',
               'thought',
               'action',
              

# trajectory bootstrapping

manual conversion of ~5 examples

Cot question/reasoning/answer
ReAct: question/thoughts/observations/answer

Cot question/reasoning/answer query
get React trajectory, if answer matches groundtruth, use, otherwise resample
- can add more examples to prompt to improve results

# json

In [11]:
gsm8k_orig = load_dataset("openai/gsm8k", "main")
new_split = gsm8k_orig["train"].train_test_split(test_size=1024)
gsm8k_orig["validation"] = new_split["test"]
gsm8k_orig["train"] = new_split["train"]
gsm8k_orig.save_to_disk("var/gsm8k_split")

Saving the dataset (0/1 shards):   0%|          | 0/6449 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1319 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1024 [00:00<?, ? examples/s]

In [12]:
gsm8k = load_from_disk("var/gsm8k_split")

In [13]:
def parse_answers(row):
    question = row["question"].strip().replace("’", "'").replace("  ", " ")
    parts = row["answer"].split("####")
    answer = parse_number(parts[-1])
    reasoning = "####".join(parts[:-1]).strip().replace("’", "'").replace("  ", " ")
    return {
        "question": question,
        "answer": answer,
        "reasoning": reasoning,
        "raw_answer": row["answer"],
        "answer_part": parts[-1],
    }


gsm8k = gsm8k.map(parse_answers)

Map:   0%|          | 0/6449 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

Map:   0%|          | 0/1024 [00:00<?, ? examples/s]

In [14]:
def react_trajectory(row):
    question = row["question"]
    answer = row["answer"]
    reasoning = row["reasoning"].splitlines()
    trajectory = [{"question": question.strip()}]
    res = answer

    for line in reasoning:
        pattern = (
            r"(?P<pre>(=(\ )?|equals(\ )?)?(\$)?)<<(?P<exp>.*?)=(?P<res>.*?)>>([^\s]*)"
        )
        expressions = re.search(pattern, line)

        if expressions is None:
            trajectory += [
                {"thought": line.strip().replace("  ", " ")},
            ]
        else:
            thought = re.sub(pattern, "", line)
            thought = thought.rstrip(".").rstrip(",")
            exp = expressions.group("exp").strip()
            res = expressions.group("res").strip()

            trajectory += [
                {
                    "thought": f"{thought.strip().replace('  ', ' ')}. I need to calculate {exp}"
                },
                {"action": '{"name": "Calculator", "arguments": {"expr": "' + f"{exp}" +'"}}'}, #Calculator[{exp}]"},
                {"observation": res},
            ]
    if next(iter(trajectory[-1].keys())) == "observation":
        trajectory.append({"thought": f"The answer is {answer}"})

    trajectory.append({"action":
                       '{"name": "Finish", "arguments": {"topic": "' + f"{answer}" + '"}}'
                       })
                       #f"Finish[{answer}]"

    traj_keys = [next(iter(t.keys())) for t in trajectory]
    traj_values = [next(iter(t.values())) for t in trajectory]

    return {
        "traj_keys": traj_keys,
        "traj_values": traj_values,
    }


gsm8k["train"] = gsm8k["train"].map(react_trajectory)

Map:   0%|          | 0/6449 [00:00<?, ? examples/s]

In [15]:
def rewoo_trajectory(row):
    question = row["question"]
    answer = row["answer"]
    reasoning = row["reasoning"].splitlines()
    trajectory = [{"question": question.strip().replace("  ", " ")}]
    res = answer

    for line in reasoning:
        pattern = (
            r"(?P<pre>(=(\ )?|equals(\ )?)?(\$)?)<<(?P<exp>.*?)=(?P<res>.*?)>>([^\s]*)"
        )
        expressions = re.search(pattern, line)

        if expressions is None:
            trajectory += [
                {"thought": line.strip().replace("  ", " ")},
            ]
        else:
            thought = re.sub(pattern, "", line)
            thought = thought.rstrip(".").rstrip(",")
            exp = expressions.group("exp").strip()
            res = expressions.group("res").strip()

            trajectory += [
                {"thought": f"{thought.strip().replace('  ', ' ')}. Calculate {exp}"},
                {"action": '{"name": "Calculator", "arguments": {"expr": "' + f"{exp}" +'"}}'},
                {"observation": res},
            ]

    evidence_counter = 0
    for i in range(len(trajectory)):
        outer = trajectory[i]
        type_event = next(iter(outer.keys()))
        value = next(iter(outer.values()))

        if type_event == "action":
            evidence_counter += 1
        if type_event == "observation":
            for j in range(i + 1, len(trajectory)):
                inner = trajectory[j]
                inner_type_event = next(iter(inner.keys()))
                if inner_type_event == "action":
                    trajectory[j]["action"] = trajectory[j]["action"].replace(
                        value, f"#E{evidence_counter}"
                    )
                elif inner_type_event == "thought":
                    trajectory[j]["thought"] = trajectory[j]["thought"].replace(
                        value, f"#E{evidence_counter}"
                    )
    traj_keys = [next(iter(t.keys())) for t in trajectory]
    traj_values = [next(iter(t.values())) for t in trajectory]

    return {"rewoo_traj_keys": traj_keys, "rewoo_traj_values": traj_values}


gsm8k["train"] = gsm8k["train"].map(rewoo_trajectory)

Map:   0%|          | 0/6449 [00:00<?, ? examples/s]

In [16]:
gsm8k.save_to_disk("var/gsm8k_proc_json")

Saving the dataset (0/1 shards):   0%|          | 0/6449 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1319 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1024 [00:00<?, ? examples/s]

In [1]:
from datasets import load_from_disk
ds = load_from_disk("var/gsm8k_proc_json")
ds

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'reasoning', 'raw_answer', 'answer_part', 'traj_keys', 'traj_values', 'rewoo_traj_keys', 'rewoo_traj_values'],
        num_rows: 6449
    })
    test: Dataset({
        features: ['question', 'answer', 'reasoning', 'raw_answer', 'answer_part'],
        num_rows: 1319
    })
    validation: Dataset({
        features: ['question', 'answer', 'reasoning', 'raw_answer', 'answer_part'],
        num_rows: 1024
    })
})