In [50]:
from agents import Executor, Supervisor
import json
import os

In [51]:
class CONFIG:
    num_iteration: int = 20,

In [52]:
executor = Executor(config=CONFIG, model_name='gpt-3.5-turbo-1106')
supervisor = Supervisor(config=CONFIG, model_name='gpt-4')

In [53]:
# Load the math dataset
def load_math_dataset(file_path):
    json_data = []
    if not os.path.exists(file_path):
        pass
    else:
        with open(file_path, 'r') as file:
            json_data = json.load(file)
            
    return json_data

def answer_math_questions(questions, model_name="text-davinci-003"):
    answers = []

    for question in questions:
        try:
            answer = executor.answer_question(question, model_name=model_name)
            answers.append(answer)
        except Exception as e:
            print(f"Error in processing question: {e}")
            answers.append(None)

    return answers

# Loop the /data/MATH/train/ directory and answer all the questions
file_names = []
length_of_file_names = 0
subjects = []

# Find the subjects in the directory, subjects are the name of the folders under /data/MATH/train/
with os.scandir('../data/MATH/train/') as entries:
    for entry in entries:
        if entry.is_dir():
            subjects.append(entry.name)

    
print(f"Found {len(subjects)} subjects in the directory")

# Find the numbers of files in the directory
for subject in subjects:
    with os.scandir(f'../data/MATH/train/{subject}') as entries:
        for entry in entries:
            if entry.is_file():
                file_names.append(entry.name)
                length_of_file_names += 1
    
print(f"Found {length_of_file_names} files in the directory")

training_subject = 'algebra'

# Load the dataset
questions = []
answers = []

# Load all file names in the directory
with os.scandir(f'../data/MATH/train/{training_subject}') as entries:
    for entry in entries:
        if entry.is_file():
            file_names.append(entry.name)
            length_of_file_names += 1

print(f"Found {length_of_file_names} files in the {subject} directory")

# Load the train/algebra/ directory
data = []

for file_name in file_names:
    json_data = load_math_dataset(f'../data/MATH/train/{training_subject}/{file_name}')
    if len(json_data) > 0:
        if json_data['problem'] is not None:
            questions.append(json_data['problem'])
            answers.append(json_data['solution'])
    
print(f"Found {len(questions)} questions in the {subject} directory")
print(f"Found {len(answers)} answers in the {subject} directory")

Found 7 subjects in the directory
Found 7500 files in the directory
Found 9244 files in the algebra directory
Found 6670 questions in the algebra directory
Found 6670 answers in the algebra directory


In [54]:
for i in range(2):
    print(f"Question: {questions[i]}")
    print(f"Answer: {answers[i]}")
    print("")

Question: The ratio of the areas of two squares is $\frac{192}{80}$. After rationalizing the denominator, the ratio of their side lengths can be expressed in the simplified form $\frac{a\sqrt{b}}{c}$ where $a$, $b$, and $c$ are integers. What is the value of the sum $a+b+c$?
Answer: We start off by simplifying the ratio $\frac{192}{80}$ to $\frac{12}{5}$. The area of a square equals the side length squared, so we can get the ratio of sidelengths by taking the square root of the ratio of areas: $$\sqrt{\frac{12}{5}}=\frac{\sqrt{12}}{\sqrt{5}}=\frac{2\sqrt{3}}{\sqrt{5}}=\frac{2\sqrt{3}}{\sqrt{5}}\cdot\frac{\sqrt{5}}{\sqrt{5}}=\frac{2\sqrt{15}}{5}.$$So, our answer is $2+15+5=\boxed{22}$. If you started off by taking the square root of $\frac{192}{80}$ directly without simplifying it first, you still get the same answer. $$\sqrt{\frac{192}{80}}=\frac{\sqrt{192}}{\sqrt{80}}=\frac{8\sqrt{3}}{4\sqrt{5}}=\frac{2\sqrt{3}}{\sqrt{5}}=\frac{2\sqrt{15}}{5}.$$

Question: If $\sqrt{2\sqrt{t-2}} = \sq

In [55]:
num_epochs = len(questions)
print(f"Number of epochs: {num_epochs}")
executor_answers = []
supervisor_outputs = []

learning_prompts = []
correctness_scores = []


def training(num_epochs, num_iterations):
    for i in range(num_epochs):
        for j in range(num_iterations):
            print(f"Epoch: {i}, Iteration: {j}")
            print(f"Question: {questions[i]}")
            question = questions[i]
            if len(learning_prompts) > 0:
                print(f"Prompt: {learning_prompts[i]}")
                print("")
                executor_answer = executor.answer_question(question=learning_prompts[i])
                executor_answers.append(executor_answer)
            else:
                print(f"Prompt: {question}")
                print("")
                executor_answer = executor.answer_question(question=question)
                executor_answers.append(executor_answer)

            print(f"Executor Answer: {executor_answers[i]}")

            # Evaluate the answer
            supervisor_outputs.append(
                supervisor.evaluate_answer(
                    question=question,
                    executor_answer=executor_answer,
                    correct_answer=answers[i],
                )
            )
            print(f"Supervisor Output: {supervisor_outputs[i]}")
            print("")

            # Take the supervisor output and update the prompt for the executor
            updated_prompt = executor.update_prompt(
                supervisor_output=supervisor_outputs[i],
                executor_answer=executor_answers[i],
                correct_answer=answers[i],
            )

            learning_prompts.append(updated_prompt)

            print(f"Updated Prompt: {updated_prompt}")
            print("")

            with open(f"../outputs/{subject}/{i}.json", "w") as file:
                json.dump(
                    {
                        "messages": [
                            {"role": "system", "content": questions[i]},
                            {"role": "user", "content": executor_answers[i]},
                            {"role": "assistant", "content": supervisor_outputs[i]},
                            {"role": "assistant", "content": updated_prompt},
                        ]
                    },
                    file,
                )


training(num_epochs=10, num_iterations=2)

Number of epochs: 6670
Epoch: 0, Iteration: 0
Question: The ratio of the areas of two squares is $\frac{192}{80}$. After rationalizing the denominator, the ratio of their side lengths can be expressed in the simplified form $\frac{a\sqrt{b}}{c}$ where $a$, $b$, and $c$ are integers. What is the value of the sum $a+b+c$?
Prompt: The ratio of the areas of two squares is $\frac{192}{80}$. After rationalizing the denominator, the ratio of their side lengths can be expressed in the simplified form $\frac{a\sqrt{b}}{c}$ where $a$, $b$, and $c$ are integers. What is the value of the sum $a+b+c$?

Executor Answer: First, let's find the ratio of the side lengths of the two squares:

Let's call the side lengths of the squares x and y. Then the ratio of their areas is equal to the square of the ratio of their side lengths. Therefore,
$\frac{x^2}{y^2} = \frac{192}{80}$.

Simplifying the ratio of the areas, we get:
$\frac{x}{y} = \sqrt{\frac{192}{80}} = \sqrt{\frac{24}{10}} = \frac{2\sqrt{6}}{\sqrt