Load in the values

In [126]:
import pandas as pd
number_templates = pd.read_json('templates/number_templates.jsonl', lines=True)
number_templates = number_templates.head(20)

Helper function to query openai

In [102]:
import os
from openai import OpenAI
import pandas as pd
import json

# set prompt
with open('fill_in_numbers_prompt.txt', 'r') as file:
    system_prompt = file.read()

suffix = "\n\nYour output should strictly be a json object mapping the inputs to outputs. It should be in the format of {variable1: value, variable2: value, ...}"

# get API key
with open('openai_key.txt', 'r') as file:
    API_KEY = file.read().strip()

def get_variables(target_question, API_KEY = API_KEY, prompt = system_prompt, suffix = suffix):
  os.environ["OPENAI_API_KEY"] = API_KEY
  client = OpenAI()
  response = client.chat.completions.create(
      model="gpt-4o-mini",
      messages=[
          {"role": "system", "content": prompt},
          {"role": "user", "content": target_question + suffix},
      ]
  )
  answer = response.choices[0].message.content
  return answer

In [130]:
def valid_output(constraints, vars):
    keys = vars.keys()
    # go in this order to avoid partial matching of future keys
    sorted_keys = sorted(keys, key=lambda x: len(x), reverse=True)
    for constraint in constraints:
        original_constraint = constraint
        for key in sorted_keys:
            constraint = constraint.replace(key, str(vars[key]))
        if not eval(constraint):
            print("Failed constraint: ", original_constraint, "with", constraint)
            return False
    return True

In [70]:
def replace_vars(query, answer, vars):
    for key in vars:
        query = query.replace('{' + key + '}', str(vars[key]))
        answer = answer.replace('{' + key + '}', str(vars[key]))
        # answer = answer.replace(key, str(vars[key]))
    return query, answer

## Toy Example

In [55]:
query = str(number_templates.iloc[0].to_dict())
vars = get_variables(query)
vars = json.loads(vars)
print(vars)

{'a': 10, 'b': 3, 'c': 2, 'd': 5, 'e': 5, 'f': 25, 'x': 10, 'y': 3, 'z': 2}


In [56]:
valid_output(number_templates.iloc[0]['constraints'], vars)

True

In [57]:
replace_vars(number_templates.iloc[0]['question'], number_templates.iloc[0]['answer'], vars)

("Janet’s ducks lay 10 eggs per day. Janet eats 3 for breakfast every morning and bakes muffins for her friends every day with 2. Janet sells the remainder at the farmers' market daily for $5 per fresh duck egg. How much in dollars does Janet make every day at the farmers' market?",
 'Janet sells 10 - 3 - 2 = <<10 - 3 - 2 = 5>>e duck eggs a day.\nShe makes 5 * 5 = $<<5 * 5 = 25>>f every day at the farmer’s market.\n#### 25')

## Execute on all samples

In [133]:
vars

{'a': 2,
 'b': 60,
 'c': 1,
 'd': 4,
 'e': 1,
 'f': 120,
 'g': 3,
 'h': 180,
 'i': 0,
 'j': 180,
 'k': -60}

In [123]:
number_templates.iloc[i]['constraints']

['a > 0',
 'b > 0',
 'c > 0',
 'd > 0',
 'e > 0',
 'f == a * b',
 'g == d - e',
 'h == c * g',
 'i == e * d',
 'j == h + i',
 'k == f - j']

In [114]:
valid_output(number_templates.iloc[i]['constraints'], vars)

False

In [124]:
number_templates.iloc[i]['answer']

'When John turned around, they were {a} * {b} = <<{a}*{b}={f}>>{f} miles from home.\nThey were only able to drive {d} - {e} = <<{d}-{e}={g}>>{g} hours in the first {d} hours.\nIn the next {c} hour(s), they drove {g} * {c} = <<{g}*{c}={h}>>{h} miles.\nThey then drove another {g}-{c}=<<{g}-{c}=remaining_time>>remaining_time hour(s).\nIn that time, they went {h} * remaining_time = <<{h}*remaining_time={i}>>{i} miles.\nSo they drove a total of {h}+{i}=<<{h}+{i}=j>>{j} miles.\nThus, they are still {f}-{j}=<<{f}-{j}=k>>{k} miles away from home.\n#### {k}'

In [132]:
replace_vars(number_templates.iloc[i]['question'], number_templates.iloc[i]['answer'], vars)

('John drives for 2 hours at a speed of 60 mph and then turns around because John realizes John forgot something very important at home. John tries to get home in 4 hours but spends the first 1 hours in standstill traffic. John spends the next 1 hour(s) driving at a speed of 3 mph, before being able to drive the remaining time of the 4 hours going at 180 mph. How far is John from home at the end of those 4 hours?',
 'When John turned around, they were 2 * 60 = <<2*60=120>>120 miles from home.\nThey were only able to drive 4 - 1 = <<4-1=3>>3 hours in the first 4 hours.\nIn the next 1 hour(s), they drove 3 * 1 = <<3*1=180>>180 miles.\nThey then drove another 3-1=<<3-1={remaining_time}>>{remaining_time} hour(s).\nIn that time, they went 180 * {remaining_time} = <<180*{remaining_time}=0>>0 miles.\nSo they drove a total of 180+0=<<180+0=j>>180 miles.\nThus, they are still 120-180=<<120-180=-60>>-60 miles away from home.\n#### -60')

In [119]:
completed_cache = set() # useful for debugging

In [135]:
max_retries = 5
for i in range(len(number_templates)):
    if i in completed_cache:
        continue
    print(i)
    query = str(number_templates.iloc[i].to_dict())
    num_retries = 0
    while num_retries < max_retries:
        vars = get_variables(query)
        try:
            vars = json.loads(vars.strip('```').replace('json', '').strip())
        except:
            print("invalid json. retrying")
            num_retries += 1
            continue
        if valid_output(number_templates.iloc[i]['constraints'], vars):
            q, ans = replace_vars(number_templates.iloc[i]['question'], number_templates.iloc[i]['answer'], vars)
            number_templates.iloc[i]['question'] = q
            number_templates.iloc[i]['answer'] = ans
            completed_cache.add(i)
            break
        else:
            print("didn't meet constraints. retrying")
            num_retries += 1
    if num_retries == max_retries:
        print("Failed to generate valid output for template " + str(i))
number_templates.to_json('datasets/gsm_numbers.jsonl', orient='records', lines=True)

8
Failed constraint:  e == b * a with 100 == 20 * 10
didn't meet constraints. retrying
Failed constraint:  g == (d - a) * f + e with 1525 == (50 - 40) * 22.5 + 600
didn't meet constraints. retrying
Failed constraint:  g == (d - a) * f + e with 1125 == (45 - 40) * 22.5 + 600
didn't meet constraints. retrying
Failed constraint:  g == (d - a) * f + e with 1050 == (50 - 40) * 22.5 + 600
didn't meet constraints. retrying
Failed constraint:  e == b * a with 15 == 15 * 40
didn't meet constraints. retrying
Failed to generate valid output for template 8
9


KeyboardInterrupt: 