In [1]:
import os
import json
import pandas as pd

test_root = "data/test"

data_records = []
for kind in os.listdir(test_root):
    path = os.path.join(test_root, kind)
    for f in os.listdir(path):
        if not f.endswith('.json'):
            print("warning: has non-json file")
            continue

        with open(os.path.join(path, f), "r") as j:
            data = json.load(j)
            data["json_number"] = int(f.split('.')[0]) # json_number means 
                                                       # if this data comes from 1.json
                                                       # then it is 1
            data_records.append(data)

df = pd.DataFrame(data_records)
df['id'] = df.index

In [2]:
level = 3
df3 = df[df['level'] == f"Level {level}"]
df3.head()

Unnamed: 0,problem,level,type,solution,json_number,id
0,How many vertical asymptotes does the graph of...,Level 3,Algebra,The denominator of the rational function facto...,1,0
14,Kite $ABCD$ (a quadrilateral with two pairs of...,Level 3,Algebra,"As the problem suggests, we need to compute th...",1026,14
17,"If $A$, $B$ and $C$ are positive integers such...",Level 3,Algebra,Multiplying the numerator and denominator of t...,1034,17
18,Ten treeks weigh as much as three squigs and o...,Level 3,Algebra,"Let $t,s,g$ be the weight of one treek, the we...",1035,18
23,"At constant temperature, the pressure of a sam...",Level 3,Algebra,Since the pressure $p$ of the hydrogen and the...,1049,23


In [3]:
from datasets import MATH
df.loc[:, 'actual'] = df['solution'].apply(MATH.extract_answer)
df_int = df.dropna(subset=['actual'])
df_int

Unnamed: 0,problem,level,type,solution,json_number,id,actual
0,How many vertical asymptotes does the graph of...,Level 3,Algebra,The denominator of the rational function facto...,1,0,2.0
1,What is the positive difference between $120\%...,Level 1,Algebra,One hundred twenty percent of 30 is $120\cdot3...,10,1,10.0
4,"If $2^8=4^x$, what is the value of $x$?",Level 1,Algebra,Rewrite $4$ as $2^2$ to find $4^x=2^{2x}$. Si...,1004,4,4.0
5,What is the 100th term of the arithmetic seque...,Level 2,Algebra,"The common difference is $10 - 6 = 4$, so the ...",1009,5,402.0
7,Mr. Madoff invests 1000 dollars in a fund that...,Level 4,Algebra,Let $r$ be the annual interest rate. Then aft...,1014,7,7.0
...,...,...,...,...,...,...,...
4987,Given $\|\mathbf{v}\| = 5$ and $\|\mathbf{w}\|...,Level 3,Precalculus,Note that\n\begin{align*}\n\operatorname{proj}...,98,4987,5.0
4989,If $0^\circ < x < 180^\circ$ and $\cos x + \si...,Level 5,Precalculus,"From the given equation, $\cos x = \frac{1}{2}...",984,4989,14.0
4990,"Let $x_1,$ $x_2,$ $x_3,$ $y_1,$ $y_2,$ and $y_...",Level 5,Precalculus,"In general,\n\[\frac{1}{2} \begin{vmatrix} x_1...",986,4990,144.0
4992,Compute\n\[\frac{1}{\cos^2 10^\circ} + \frac{1...,Level 4,Precalculus,We can write\n\begin{align*}\n\frac{1}{\cos^2 ...,989,4992,12.0


In [59]:
df_int.loc[0, 'solution']

'The denominator of the rational function factors into $x^2+x-6=(x-2)(x+3)$. Since the numerator is always nonzero, there is a vertical asymptote whenever the denominator is $0$, which occurs for $x = 2$ and $x = -3$.  Therefore, the graph has $\\boxed{2}$ vertical asymptotes.'

In [41]:
df_int.loc[1, 'solution']

'One hundred twenty percent of 30 is $120\\cdot30\\cdot\\frac{1}{100}=36$, and $130\\%$ of 20 is $ 130\\cdot 20\\cdot\\frac{1}{100}=26$.  The difference between 36 and 26 is $\\boxed{10}$.'

In [176]:
import re
from sympy import symbols, simplify, Eq, solve
from latex2sympy2 import latex2sympy

def get_equations(text: str):
    expr_pattern = r'\$([^$]+)\$'
    eqn_pattern = r'^(.*?)\s*=\s*([^=]+).*$'
    # we will match each expression with this eqn_pattern
    # if the expression turns out to be chained equailties (more than 2 terms) 
    # then we only extract the first two terms

    expression_matches = re.finditer(expr_pattern, text) # all math expressions. The text is scanned from left to right,
    # so the expressions that appear will also follow that order
    expressions = [] # stores [expr, start_pos, end_pos]

    for match in expression_matches:
        expr = match.group(1).strip()
        start_pos = match.start()
        end_pos = match.end()
        expressions.append([expr, start_pos, end_pos])

    equations = []
    for l in expressions:
        expr = l[0]
        match = re.match(eqn_pattern, expr)
        eqn = None
        if match:
            lhs = match.group(1).strip()
            rhs = match.group(2).strip()
            eqn = [lhs, rhs]
        equations.append(eqn)

    # compare lhs and rhs

    equalities = {} # a dictionary whose key is the index of the equation in `equations` array
                    # and value is boolean
    solutions = {} # a dictionary whose key is the index of the equation in `equations` array
                   # and value is a list of solutions for that equation
    parse_exceptions = set() # stores indices of the equations that fail parsing

    for i, eqn in enumerate(equations):
        if eqn is None:
            continue

        lhs = latex2sympy(eqn[0])
        rhs = latex2sympy(eqn[1])
        
        # Get variables in the expressions
        lhs_vars = lhs.free_symbols
        rhs_vars = rhs.free_symbols
        
        if lhs_vars == rhs_vars:
            # If variables are the same, check for equality
            try:
                equality_check = Eq(lhs, rhs)
                result = equality_check.simplify()
                equalities[i] = result
            except:
                parse_exceptions.add(i)
        else:
            # If variables are different, solve the equation
            try:
                solution = solve(Eq(lhs, rhs))
                solutions[i] = solution
            except:
                parse_exceptions.add(i)

    return expressions, equations, equalities, solutions, parse_exceptions



In [173]:
expressions, equations, equalities, solutions, parse_exceptions = get_equations(df_int.loc[0, 'solution'])

[['x^2+x-6', '(x-2)(x+3)'], None, ['x', '2'], ['x', '-3'], None]


In [174]:
print(equalities)

{0: True}


In [93]:
for key, val in solutions.items():
    print(f"expression {expressions[key][0]}: {val}")

expression x = 2: [2]
expression x = -3: [-3]


In [95]:
from openai import OpenAI
import openai
client = OpenAI()
api_key = os.environ.get("OPENAI_API_KEY")
if api_key is not None:
    openai.api_key = api_key
else:
    raise Exception("openai api key not set")

Detecting equations:

In [96]:
python_caller_prompt = '''
    You are given a paragraph written in English that explains how to solve a math problem. Many sentences will include
    math equations. Please detect which sentences are math equations and wrap them within the tags <equation> and </equation>
'''



response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": python_caller_prompt},

    ]
)
    

Converting equations to python code for verification:

In [None]:
python_caller_prompt = '''
    You are given a math equation written in latex. Convert it into Python and verify its correctness.
'''
"$x^2+x-6=(x-2)(x+3)$"


{
    "symbols": "x",
    "LHS": "x**2 + x - 6",
    "RHS": "(x - 2) * (x + 3)"    
}

"$120\\cdot30\\cdot\\frac{1}{100}=36$"
{
    "symbols": "",
    "LHS": "120*30\\cdot\\frac{1}{100}",
    "RHS": 
}


response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": python_caller_prompt},

    ]
)

## Testing gpt response with vs without appending hints:

In [97]:
ai_prompt = "You are an assistant that is very good at mathematics. Given a mathematics problem, determine the answer, which is an integer, a float, or a fraction. Put your answer in the box \\boxed{}"


def ai_response(question: str):

    response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": ai_prompt},
                    {"role": "user", "content": "What is $(\\frac{7}{8})^3 \\cdot (\\frac{7}{8})^{-3}$?"},
                    {"role": "assistant", "content": "$\\boxed{1}$."},
                    {"role": "user", "content": "In how many ways can 4 books be selected from a shelf of 6 books if the order in which the books are selected does not matter?"},
                    {"role": "assistant", "content": "$\\boxed{15}$."},
                    {"role": "user", "content": "Find the distance between the points $(2,1,-4)$ and $(5,8,-3).$"},
                    {"role": "assistant", "content": "$\\boxed{\\sqrt{59}}$."},
                    {"role": "user", "content": "The faces of an octahedral die are labeled with digits $1$ through $8$. What is the probability, expressed as a common fraction, of rolling a sum of $15$ with a pair of such octahedral dice?"},
                    {"role": "assistant", "content": "$\\boxed{\\frac{1}{32}}$."},
                    {"role": "user", "content": "The first three terms of an arithmetic sequence are 1, 10 and 19, respectively. What is the value of the 21st term?"},
                    {"role": "assistant", "content": "$\\boxed{181}$."},
                    {"role": "user", "content": "Calculate $6 \\cdot 8\\frac{1}{3}$."},
                    {"role": "assistant", "content": "$\\boxed{50}$."},
                    {"role": "user", "content": "When the binary number $100101110010_2$ is divided by 4, what is the remainder (give your answer in base 10)?"},
                    {"role": "assistant", "content": "$\\boxed{2}$."},
                    {"role": "user", "content": "How many zeros are at the end of the product 25 $\\times$ 240?"},
                    {"role": "assistant", "content": "$\\boxed{3}$."},
                    {"role": "user", "content": question}
                ]
            )
    return response.choices[0].message.content



In [98]:
ans = ai_response("What is the value of $x$ which satisfies $\sqrt[3]{x\sqrt{x}}=7$?")

In [99]:
ans

'To solve this equation, we first cube both sides to get rid of the cube root.\n\\begin{align*}\n\\sqrt[3]{x\\sqrt{x}} &= 7 \\\\\n(\\sqrt[3]{x\\sqrt{x}})^3 &= 7^3 \\\\\nx\\sqrt{x} &= 343\n\\end{align*}Next, we square both sides.\n\\begin{align*}\n(x\\sqrt{x})^2 &= 343^2 \\\\\nx^2 \\cdot x &= 117649 \\\\\nx^3 &= 117649 \\\\\nx &= \\sqrt[3]{117649} \\\\\nx &= \\boxed{49}\n\\end{align*}'

In [108]:
question = "What is the value of $x$ which satisfies $\sqrt[3]{x\sqrt{x}}=7$?"
question_hinted = question + "\nYou MUST use the following facts: the solution to the equation $\sqrt[3]{x\sqrt{x}}=7$ is $x=49$"
ans1 = ai_response(question_hinted)

In [109]:
ans1 # good!

'Given that $\\sqrt[3]{x\\sqrt{x}}=7$ and the solution is $x=49$, we have $\\boxed{x=49}$.'

## -----------------------------------------
### Use sympy to solve equations that appear in the question or gpt's first generated answer and append them as hints to the gpt

In [161]:
problem = df_int.iloc[2, :]['problem']
expressions, equations, equalities, solutions, parse_exceptions = get_equations(problem)

hint_title = "You MUST consider the following facts, which will help you solve the question a lot:\n"
hint = ""
for idx, sol_list in solutions.items():
    # this contest only has integer solutions
    # so filter out non-integer solutions, like complex numbers
    sol_cleaned = []
    for sol in sol_list:
        try:
            sol_int = int(sol)
            sol_cleaned.append(str(sol_int))
        except:
            pass
    
    s = '' if len(sol_cleaned) == 1 else 's'

    hint += f"The equation {expressions[idx][0]} has solution{s} {', '.join(sol_cleaned)}\n"

if len(hint) > 0:
    hint = hint_title + hint

print(hint)




In [122]:
expressions

[['2^8=4^x', 3, 12], ['x', 35, 38]]

In [123]:
equations

[['2^8', '4^x'], None]

In [120]:
solutions

{0: [4, (log(16) + I*pi)/log(2)]}

### Test on gpt3 output answers

In [182]:
from models.gpt3 import get_equations, get_hint


gpt_outputs = pd.read_csv("gpt3_predictions.csv")

ImportError: cannot import name 'get_equations' from 'models.gpt3' (c:\Users\a0306\ai-math\models\gpt3.py)

In [149]:
gpt_outputs.loc[3, 'problem'] # This is so wrong...

'The prime numbers with squares between 100 and 300 are 11, 13, 17, and 19. \n\nTherefore, there are $\\boxed{4}$ prime numbers with squares between 100 and 300.'

In [151]:
gpt_outputs.loc[5, 'problem']

"The greatest perimeter of the triangle occurs when the two shorter sides are the base of the triangle and the longest side is the height (forming a right-angled triangle with the altitude to the base). Using Pythagoras' theorem, the length of the longest side is $\\sqrt{15^2 - \\left(\\frac{10}{2}\\right)^2} = \\sqrt{225 - 25} = \\sqrt{200} = 10\\sqrt{2}$. \n\nTherefore, the greatest possible perimeter of the isosceles triangle is $2 \\times 15 + 10\\sqrt{2}$. Simplifying, we get $30 + 10\\sqrt{2} \\approx 44.14$.\n\nTherefore, the greatest possible perimeter of this isosceles triangle is $\\boxed{44.14}$ cm."

In [177]:
output = gpt_outputs.loc[5, 'problem']
expressions, equations, equalities, solutions, parse_exceptions = get_equations(output)
for expr in expressions:
    print(expr)

['\\sqrt{15^2 - \\left(\\frac{10}{2}\\right)^2} = \\sqrt{225 - 25} = \\sqrt{200} = 10\\sqrt{2}', 263, 350]
['2 \\times 15 + 10\\sqrt{2}', 426, 452]
['30 + 10\\sqrt{2} \\approx 44.14', 474, 505]
['\\boxed{44.14}', 581, 596]


In [155]:
from latex2sympy2 import latex2sympy
latex2sympy("\\sqrt{15^2 - \\left(\\frac{10}{2}\\right)^2} = \\sqrt{225 - 25} = \\sqrt{200}")
# latex2sympy does not support chained equalities

TypeError: unsupported operand type(s) for -: 'Equality' and 'Pow'

In [157]:
simplify(latex2sympy("\\sqrt{15^2 - \\left(\\frac{10}{2}\\right)^2}"))

10*sqrt(2)

In [143]:
from latex2sympy2 import latex2sympy
expr = latex2sympy("120*30\\cdot\\frac{1}{100}")
simplify(expr) == 36

True

In [49]:
!pip install sympy
!pip install latex2sympy2

Collecting latex2sympy2
  Downloading latex2sympy2-1.9.1-py3-none-any.whl (89 kB)
                                              0.0/89.8 kB ? eta -:--:--
     ----                                     10.2/89.8 kB ? eta -:--:--
     ---------------------------------------- 89.8/89.8 kB 1.0 MB/s eta 0:00:00
Collecting antlr4-python3-runtime==4.7.2 (from latex2sympy2)
  Downloading antlr4-python3-runtime-4.7.2.tar.gz (112 kB)
                                              0.0/112.3 kB ? eta -:--:--
     -------------------------------------- 112.3/112.3 kB 3.2 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py): started
  Building wheel for antlr4-python3-runtime (setup.py): finished with status 'done'
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.7.2-py3-none-any.whl size=140

In [48]:
from sympy import symbols, Eq, solve, simplify

# Define the variable
x = symbols('x')

lhs = x**2 + x - 6
rhs = (x - 2) * (x + 3)
simplify(lhs-rhs) == 0

True

In [65]:
latex2sympy("\sqrt[3]{x\sqrt{x}}=7")

[Eq(x, 49)]

In [72]:
lhs = latex2sympy('x^2 + x - 6')

# -----------------------------------

In [7]:
group_sample_size = 2
df_int_gp = df_int.groupby(['level', 'type'])
min_group_size = df_int_gp.apply(lambda group: len(group.index)).min()
if group_sample_size > min_group_size:
    raise Exception("group sample size is larger than the size of a group")
df1 = df_int_gp.apply(lambda group: group.sample(group_sample_size, replace=False))

In [16]:
df2 = df1.apply(lambda group: pd.concat([group, df1['id'] * 10], axis=1))

ValueError: If using all scalar values, you must pass an index

In [9]:
from models.gpt3 import GPT3

my_model = GPT3()

pred = df1['problem'].apply(my_model._predict)

In [14]:
pred.to_csv("gpt3_predictions.csv")

In [35]:
pred.apply(MATH.extract_answer)

level    type                        
Level 1  Algebra                 1008       4.0
                                 497       27.0
         Counting & Probability  1552       3.0
                                 1418       4.0
         Geometry                2008      16.0
                                          ...  
Level 5  Number Theory           3167       1.0
         Prealgebra              3838      36.0
                                 4238    1999.0
         Precalculus             4642       1.0
                                 4880       1.0
Name: problem, Length: 70, dtype: float64

In [37]:
pd.concat([pred.apply(MATH.extract_answer), df1['actual']], axis=1)

# todo: review how you did pandas,
# read https://pandas.pydata.org/docs/user_guide/groupby.html

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,problem,actual
level,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Level 1,Algebra,1008,4.0,1.0
Level 1,Algebra,497,27.0,27.0
Level 1,Counting & Probability,1552,3.0,3.0
Level 1,Counting & Probability,1418,4.0,3.0
Level 1,Geometry,2008,16.0,24.0
...,...,...,...,...
Level 5,Number Theory,3167,1.0,24.0
Level 5,Prealgebra,3838,36.0,36.0
Level 5,Prealgebra,4238,1999.0,10090.0
Level 5,Precalculus,4642,1.0,12.0


In [7]:
from datasets import MATH
MATH.extract_answer(pred.loc[2803, 'output_lengthy'])

answer in boxed not found


In [4]:
merged_df = pd.merge(df3, pred[['id', 'output']], on='id')
merged_df

Unnamed: 0,problem,level,type,solution,json_number,id,output
0,"Let $a,$ $b,$ $c$ be positive real numbers. F...",Level 3,Intermediate Algebra,"By QM-AM,\n\[\sqrt{\frac{x^2 + y^2 + z^2}{3}} ...",437,2803,
1,Given $\mathbf{a} = \begin{pmatrix} 2 \\ 1 \\ ...,Level 3,Precalculus,We have that\n\begin{align*}\n(\mathbf{a} \tim...,1004,4459,


In [7]:
pip install git+https://github.com/hendrycks/math.git

Collecting git+https://github.com/hendrycks/math.git
  Cloning https://github.com/hendrycks/math.git to c:\users\a0306\appdata\local\temp\pip-req-build-ky0t90fv
  Resolved https://github.com/hendrycks/math.git to commit 357963a7f5501a6c1708cf3f3fb0cdf525642761
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: math-equivalence
  Building wheel for math-equivalence (setup.py): started
  Building wheel for math-equivalence (setup.py): finished with status 'done'
  Created wheel for math-equivalence: filename=math_equivalence-0.0.0-py3-none-any.whl size=3509 sha256=072d83a6d524bb41728c6a4a3b239173b2f0894e646470e7778cc2685ae3150a
  Stored in directory: C:\Users\a0306\AppData\Local\Temp\pip-ephem-wheel-cache-yz7idzj1\wheels\b7\16\f0\4a69d4d9b720086e22842cbd2d896b66298e6424b8f289f37c
Successfully built math-equivalence
Installing collected packages: math-equivalence
Successfully installed math-equivale

  Running command git clone --filter=blob:none --quiet https://github.com/hendrycks/math.git 'C:\Users\a0306\AppData\Local\Temp\pip-req-build-ky0t90fv'


In [2]:
import math_equivalence


In [8]:
y_true = extract_answer(merged_df.loc[0, 'solution'])
y_pred = extract_answer(merged_df.loc[0, 'output'])
print(y_pred, y_true)

47 49


In [10]:
print(merged_df.loc[0, 'problem'])
print("-----------------")
print(merged_df.loc[0, 'output'])

What is the value of $x$ which satisfies $\sqrt[3]{x\sqrt{x}}=7$?
We start by cubing both sides to get rid of the cube root:
$$\sqrt[3]{x\sqrt{x}}=7$$
$$(\sqrt[3]{x\sqrt{x}})^3=7^3$$
$$x\sqrt{x}=343$$
Now, we can square both sides to eliminate the square root:
$$x^2\cdot x=343^2$$
$$x^3=117649$$
Taking the cube root of both sides, we find that $x= \boxed{47}$.


In [12]:
merged_df.loc[0, 'solution']

'Each of the five marked angles measures $360/5=72$ degrees, so $\\boxed{72}$ degrees is the minimum angle through which the pentagon may be rotated so that it coincides with its original position.\n\n[asy]\nsize(150);\ndefaultpen(linewidth(0.7));\nint i;\nfor(i=0;i<=4;++i)\n\n{\ndraw(origin--dir(18+72*i)--dir(18+72*(i+1)));\ndraw(anglemark(dir(18+72*i),origin,dir(18+72*(i+1)),3+fmod(i,3)));\n}\n[/asy]'

# Testing contest train data

In [7]:
from models.gpt3 import GPT3

system_prompt = "You are an assistant that is very good at mathematics. Given a mathematics problem, determine the answer, which is always an integer. Put your answer in the box \\boxed{}"

my_model = GPT3(system_prompt=system_prompt)



In [5]:
contest_df = pd.read_csv("train.csv")
contest_df

Unnamed: 0,id,problem,answer
0,229ee8,"Let $k, l > 0$ be parameters. The parabola $y ...",52
1,246d26,Each of the three-digits numbers $111$ to $999...,250
2,2fc4ad,Let the `sparkle' operation on positive intege...,702
3,430b63,What is the minimum value of $5x^2+5y^2-8xy$ w...,800
4,5277ed,There exists a unique increasing geometric seq...,211
5,739bc9,For how many positive integers $m$ does the eq...,199
6,82e2a0,Suppose that we roll four 6-sided fair dice wi...,185
7,8ee6f3,"The points $\left(x, y\right)$ satisfying $((\...",320
8,bedda4,Let $ABCD$ be a unit square. Let $P$ be the po...,480
9,d7e9c9,A function $f: \mathbb N \to \mathbb N$ satisf...,199


In [8]:
pred = my_model.predict(contest_df[['id', 'problem']]) # 24 seconds

In [9]:
pred

Unnamed: 0,id,problem,output_lengthy,output
0,229ee8,"Let $k, l > 0$ be parameters. The parabola $y ...",Let's find the $x$-coordinates of points $A$ a...,45.0
1,246d26,Each of the three-digits numbers $111$ to $999...,$\boxed{26}$.,26.0
2,2fc4ad,Let the `sparkle' operation on positive intege...,$\boxed{8}$.,8.0
3,430b63,What is the minimum value of $5x^2+5y^2-8xy$ w...,The minimum value of $5x^2+5y^2-8xy$ is $\boxe...,3200.0
4,5277ed,There exists a unique increasing geometric seq...,"The geometric sequence is $10, 20, 40, 80, 160...",310.0
5,739bc9,For how many positive integers $m$ does the eq...,$\boxed{5}$.,5.0
6,82e2a0,Suppose that we roll four 6-sided fair dice wi...,The total number of outcomes when rolling four...,329.0
7,8ee6f3,"The points $\left(x, y\right)$ satisfying $((\...",The given equation $(\vert x + y \vert - 10)^2...,200.0
8,bedda4,Let $ABCD$ be a unit square. Let $P$ be the po...,The largest region is the triangle formed by t...,
9,d7e9c9,A function $f: \mathbb N \to \mathbb N$ satisf...,We can deduce that $f(n)$ is an increasing fun...,201.0
