### Part 1: read in data and convert to jsonl

In [18]:
import json
import os
import re
import openai
from dotenv import load_dotenv
import time

The code snippet below is only for Math Dataset.

In [5]:
def concatenate_json_to_jsonl(folder_path, output_file):
    # List all json files in the folder
    json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

    with open(output_file, 'w') as outfile:
        for file_name in json_files:
            file_path = os.path.join(folder_path, file_name)

            # Read the content of each json file
            with open(file_path, 'r') as infile:
                data = json.load(infile)
                # Write each dictionary as a single line in the jsonl file
                json.dump(data, outfile)
                outfile.write('\n')

# Usage
folder_path = 'MATH/test/algebra/'  # Replace with the path to your folder
output_file = 'algebra.jsonl'         # Name of the output .jsonl file
concatenate_json_to_jsonl(folder_path, output_file)

### Part 2: openAI setup and example test (taken from website)

In [7]:


load_dotenv()

openai.api_key = os.getenv('OPENAI_API_KEY')
client = openai.Client()
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ]
)

print(completion.choices[0].message)


ChatCompletionMessage(content='In the realm of code, a concept profound,\nWhere elegance and logic doth resound,\nLies recursion, a mystical enchantment,\nA dance of functions, a loop in ascent.\n\nLike a mirror reflecting its own reflection,\nRecursion calls itself without hesitation,\nA nesting of steps, in which we embark,\nTo solve tasks grand, and choices mark.\n\nPicture a tale of infinite unfolding,\nA story rewriting, constantly molding,\nWhere a hero within calls out once more,\nSeeking the answers, a quest to restore.\n\nA function, a hero, armoured with might,\nSteps into darkness, with code as his light,\nHe encounters a problem, complex and vast,\nIn search of a solution, his journey does cast.\n\nYet in this journey, a riddle he finds,\nTo solve it, he muses, he must rewind,\nBack to the beginning, where his tale did start,\nTo challenge the problem with all of his heart.\n\nWith each step retraced, the hero does see,\nA smaller version of what used to be,\nA smaller prob

### Part 3: test GPT3.5 on the algebra dataset

In [12]:
def test_math_problems(n):
    for i in range(1, n+1):
        file_path = f'./MATH/test/algebra/{i}.json'
        if not os.path.isfile(file_path):
            print(f"File {file_path} not found.")
            continue

        with open(file_path, 'r') as f:
            data = json.load(f)
            problem = data['problem']
            solution = data['solution']



            response = client.chat.completions.create(
                model="gpt-3.5-turbo-1106",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant, skilled in solving common algebraic problems."},
                    {"role": "user", "content": f"Solve this math problem and return the answer (a number) in latex box format, e.g. $\boxed$: {problem}"},
                ],
                temperature=1,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            ai_solution = response.choices[0].message.content
            print(f"Problem: {problem}")
            print("--------------------------------------------------")
            print(f"AI Solution: {ai_solution}")
            print(f"Actual Solution: {solution}")
            print("--------------------------------------------------")
    return ai_solution, solution

ai_solution, target_solution = test_math_problems(1)  # Change the number as per your requirement

Problem: How many vertical asymptotes does the graph of $y=\frac{2}{x^2+x-6}$ have?
AI Solution: To find the vertical asymptotes of the function \( y = \frac{2}{x^2+x-6} \), we need to determine the values of \( x \) that make the denominator equal to zero. Factorizing the denominator, we have:

\[ x^2 + x - 6 = (x+3)(x-2) \]

Setting the denominator equal to zero gives us the values of \( x \) where the function has vertical asymptotes:

\[ x+3=0 \quad \text{and} \quad x-2=0 \]

So the function has two vertical asymptotes: \( x = -3 \) and \( x = 2 \). Therefore, the graph of the function has $\boxed{2}$ vertical asymptotes.
Actual Solution: The denominator of the rational function factors into $x^2+x-6=(x-2)(x+3)$. Since the numerator is always nonzero, there is a vertical asymptote whenever the denominator is $0$, which occurs for $x = 2$ and $x = -3$.  Therefore, the graph has $\boxed{2}$ vertical asymptotes.
--------------------------------------------------


In [10]:
# function for extracting boxed value
def extract_number_from_latex(paragraph):
    """
    Extracts a number from a LaTeX paragraph enclosed in the '$\boxed{}$' pattern.
    
    Parameters:
        paragraph (str): The LaTeX paragraph containing the number enclosed in the '$\boxed{}$' pattern.
        
    Returns:
        int or None: The extracted number if found, otherwise None.
    """
    # Regular expression to find the $\boxed{x}$ pattern
    # This assumes 'x' is an integer; modify the regex if 'x' can be a different type of number
    match = re.search(r'\$\\boxed\{(\d+)\}', paragraph)
    if match:
        return int(match.group(1))
    else:
        return None

In [15]:
extract_number_from_latex(target_solution)

2

In [16]:
# function for comparing values
def compared_boxed_val(paragraph1, paragraph2):
    num1 = extract_number_from_latex(paragraph1)
    num2 = extract_number_from_latex(paragraph2)
    return 1*(num1 == num2)

In [17]:
compared_boxed_val(target_solution, ai_solution)

1

### Part 4: produce test scores in batch

In [26]:
def test_math_problems_batch(jsonl_path, model, num_lines_to_parse,output_jsonl_path):
    """
    Test a batch of math problems using the given model.

    Parameters:
    - jsonl_path (str): The path to the JSONL file containing the math problems.
    - model (str): The name of the model to use for solving the problems.
    - num_lines_to_parse (int): The number of lines to parse from the JSONL file.
    - output_jsonl_path (str): The path to the output JSONL file to store the results.

    Returns:
    - test_score (int): The total score obtained from solving the math problems.
    - average_score (float): The average score obtained from solving the math problems.
    """
    test_score = 0
    total_count = 0
    with open(jsonl_path, 'r') as file:
        for i, line in enumerate(file):
            # Find which line we are at with output_jsonl_path:
            checkpoint = 0
            with open(output_jsonl_path, 'r') as outfile:
                for j, line in enumerate(outfile):
                    checkpoint += 1
            if i < num_lines_to_parse and i >= checkpoint:
                json_data = json.loads(line)
                problem = json_data['problem']
                solution = json_data['solution']
                response = client.chat.completions.create(
                    model=model,
                    messages=[
                        {"role": "system", "content": "You are an assistant skilled in solving common algebraic problems. Solve this math problem given by the user and return the answer (a number) in latex box format, e.g. $\boxed$:"},
                        {"role": "user", "content": f" {problem}"},
                    ],
                    temperature=1,
                    max_tokens=256,
                    top_p=1,
                    frequency_penalty=0,
                    presence_penalty=0
                )
                time.sleep(5)
                ai_solution = response.choices[0].message.content
                score = compared_boxed_val(solution, ai_solution)
                test_score += score
                total_count += 1
                result = {
                    "problem": problem,
                    "target_solution": solution,
                    "gpt_solution": ai_solution,
                    "score": score
                }
                with open(output_jsonl_path, 'a') as outfile:
                    json.dump(result, outfile)
                    outfile.write('\n')
            else:
                break
    return test_score, test_score/total_count

In [27]:
test_math_problems_batch('algebra.jsonl', "gpt-3.5-turbo-1106", 5, 'algebra_test_sol.jsonl')

(4, 0.8)