In [43]:
import datasets
import more_itertools as mit
import sys
sys.path.append("open-instruct")
from eval.MATH.utilities import last_boxed_only_string, remove_boxed
from eval.MATH.minerva_utils import normalize_final_answer, get_unnormalized_answer, is_equiv

In [37]:
train = datasets.load_dataset("ai2-adapt-dev/math_ground_truth", split="train")

In [57]:
train["messages"][0]

[{'content': 'Question: Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}\nAnswer:The expressions inside each square root must be non-negative.\nTherefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$.\nAlso, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$.\nTherefore, the domain of the expression is $\\boxed{[2,5)}$.\n\nQuestion: If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$\nAnswer:We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\n\nQuestion: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nAnswer:If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight.  If he lifts two 15-pound weights instead for $n$ times, he will lift a total of

In [39]:
few_shot, question = mit.one(train["messages"][0])["content"].rsplit("\n\n", 1)

In [40]:
few_shot

'Question: Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}\nAnswer:The expressions inside each square root must be non-negative.\nTherefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$.\nAlso, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$.\nTherefore, the domain of the expression is $\\boxed{[2,5)}$.\n\nQuestion: If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$\nAnswer:We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\n\nQuestion: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nAnswer:If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight.  If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\

In [41]:
train["ground_truth"]

['8',
 '2x-8',
 '1110_4',
 '408',
 '1293',
 '\\frac{1}{3}',
 '25',
 '15',
 '20',
 '3',
 '70',
 '\\text{(C)}',
 '432',
 '576',
 '-\\frac{11}{2}',
 '100^\\circ',
 '-\\frac{3}{2}',
 '22.21',
 '8',
 '\\frac{7}{11}',
 '0',
 '16',
 '5',
 '40',
 '\\frac{1}{16}',
 '\\frac{1}{7}\\triangle ABC',
 '18',
 '-27',
 '21',
 '2',
 '486 \\sqrt{3}',
 '\\frac{1}{2}',
 '\\text{(A)}',
 '\\left( \\frac{1}{4}, \\frac{1}{8}, \\frac{5}{64} \\right)',
 '\\text{even}',
 '17',
 '2',
 '3',
 '-1',
 '27',
 '3',
 '4',
 '12',
 '40',
 '60^\\circ',
 '1,\\!000,\\!000',
 '\\dfrac{40}{81}',
 '10',
 '41',
 '\\frac{1}{7}',
 '25',
 '8',
 '145',
 '499477',
 '21',
 '\\frac{1 + \\sqrt{5}}{2}',
 ' \\$ 5',
 '\\frac{7}{33}',
 '3',
 '1',
 '8.8',
 '3',
 '\\text{B}',
 '383',
 '-45',
 '32',
 '8',
 '252',
 '4\\sqrt{3}',
 '6',
 '(0,-5)',
 '12',
 '-4',
 '57',
 '-8',
 '9',
 '112',
 '\\text{Sunday}',
 '999',
 '8',
 '\\frac{7}{12}',
 '17',
 '\\begin{pmatrix} -1 & -5 \\\\ 1 & 4 \\end{pmatrix}',
 '79',
 '6',
 '26',
 '7',
 '90',
 '\\frac{1033}{4

In [42]:
ds = datasets.load_dataset("hendrycks/competition_math")

In [44]:
def normalize(solution):
    return normalize_final_answer(remove_boxed(last_boxed_only_string((solution))))

In [64]:
tests = []
for example in ds["test"]:
    tests.append({
        "messages": [{
            "content": few_shot.strip() + "\n\n" + example["problem"].strip(), 
            "role": "user"
        }],
        "ground_truth": normalize(example["solution"]),
        "dataset": "MATH",
    })

In [67]:
test = datasets.Dataset.from_list(tests)

In [71]:
test["ground_truth"]

['2',
 '10',
 '\\dfrac{9}{7}',
 'i',
 '4',
 '402',
 'x\\in[-2,7]',
 '7',
 '461415',
 '-\\frac{1}{8}',
 '\\frac{x+2}{7}',
 '-15',
 '10',
 '8',
 '75',
 '\\frac{11}{2}',
 '-25',
 '8',
 '3',
 '187.5',
 '18',
 '40',
 '5',
 '8',
 '3125',
 '[0,\\iny)',
 '.5',
 '12106',
 '5',
 '16',
 '2300',
 '5',
 '105',
 '-13.5',
 '\\frac{243}{625}',
 '2',
 '(-\\sqrt{3},\\sqrt{3})',
 '23',
 '49',
 '2x^9-8x^7+9x^6-16x^5-12x^4+9x^3-24x^2',
 '(-\\iny,-8)\\cup(8,\\iny)',
 '0',
 '2',
 '16',
 '\\frac{1}{12}',
 '6+9i',
 '2',
 '20',
 '7(x+3)(x-3)',
 'y^4-2y^3+7y^2+y-5',
 '4',
 '0',
 '12',
 '\\frac{7}{2}',
 '69',
 '5',
 '\\le(-\\iny,-\\frac{1}{2}\\right)\\cup\\le(-\\frac{1}{2},\\iny\\right)',
 '\\frac{2}{5}',
 '7',
 '20',
 '7',
 '\\sqrt{x}',
 '78',
 '9',
 '4',
 '6',
 '17',
 '\\le(1,\\frac{9}{2}\\right)',
 '\\frac{15}{2}',
 '-2',
 '8',
 '20',
 '24',
 '\\frac{19}{4}',
 '5',
 '-55',
 '60',
 '-7',
 '0.43',
 '108',
 '4950',
 '50',
 '14',
 '8',
 '26',
 '129',
 '0',
 '-5',
 '2',
 '4',
 '30',
 '161',
 '1',
 '5',
 '3s^2',
 '1

In [72]:
train["ground_truth"]

['8',
 '2x-8',
 '1110_4',
 '408',
 '1293',
 '\\frac{1}{3}',
 '25',
 '15',
 '20',
 '3',
 '70',
 '\\text{(C)}',
 '432',
 '576',
 '-\\frac{11}{2}',
 '100^\\circ',
 '-\\frac{3}{2}',
 '22.21',
 '8',
 '\\frac{7}{11}',
 '0',
 '16',
 '5',
 '40',
 '\\frac{1}{16}',
 '\\frac{1}{7}\\triangle ABC',
 '18',
 '-27',
 '21',
 '2',
 '486 \\sqrt{3}',
 '\\frac{1}{2}',
 '\\text{(A)}',
 '\\left( \\frac{1}{4}, \\frac{1}{8}, \\frac{5}{64} \\right)',
 '\\text{even}',
 '17',
 '2',
 '3',
 '-1',
 '27',
 '3',
 '4',
 '12',
 '40',
 '60^\\circ',
 '1,\\!000,\\!000',
 '\\dfrac{40}{81}',
 '10',
 '41',
 '\\frac{1}{7}',
 '25',
 '8',
 '145',
 '499477',
 '21',
 '\\frac{1 + \\sqrt{5}}{2}',
 ' \\$ 5',
 '\\frac{7}{33}',
 '3',
 '1',
 '8.8',
 '3',
 '\\text{B}',
 '383',
 '-45',
 '32',
 '8',
 '252',
 '4\\sqrt{3}',
 '6',
 '(0,-5)',
 '12',
 '-4',
 '57',
 '-8',
 '9',
 '112',
 '\\text{Sunday}',
 '999',
 '8',
 '\\frac{7}{12}',
 '17',
 '\\begin{pmatrix} -1 & -5 \\\\ 1 & 4 \\end{pmatrix}',
 '79',
 '6',
 '26',
 '7',
 '90',
 '\\frac{1033}{4

In [78]:
test["messages"][22][0]["content"]

'Question: Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}\nAnswer:The expressions inside each square root must be non-negative.\nTherefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$.\nAlso, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$.\nTherefore, the domain of the expression is $\\boxed{[2,5)}$.\n\nQuestion: If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$\nAnswer:We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\n\nQuestion: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nAnswer:If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight.  If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\

In [79]:
train["messages"][22][0]["content"]

'Question: Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}\nAnswer:The expressions inside each square root must be non-negative.\nTherefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$.\nAlso, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$.\nTherefore, the domain of the expression is $\\boxed{[2,5)}$.\n\nQuestion: If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$\nAnswer:We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\n\nQuestion: Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nAnswer:If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight.  If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\

In [None]:
datasets.DatasetDict(dict(train=train, test=test)).push_to_hub("JulesGM/math_ground_truth_with_test")

HfHubHTTPError: (Request ID: Root=1-6774bb2a-7a5645c73e79af954861c895;33b71e7f-ba74-4220-a3f9-5ebee4bd0dca)

403 Forbidden: You don't have the rights to create a dataset under the namespace "JulesGM".
Cannot access content at: https://huggingface.co/api/repos/create.
Make sure your token has the correct permissions.