In [40]:
from openai import OpenAI
import os
import time
import json
import vertexai
from vertexai.generative_models import GenerativeModel
from math_equivalence import is_equiv

In [41]:
# initialize the vertex ai client
project_id = "licenta-425710"
vertexai.init(project=project_id)

In [42]:
# initialize the clients
client_gpt = OpenAI(api_key='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')
client_nvidia = OpenAI(base_url='https://integrate.api.nvidia.com/v1', 
                       api_key='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')
gemini_model_15 = GenerativeModel(model_name="gemini-1.5-flash")
gemini_model_15_pro = GenerativeModel(model_name="gemini-1.5-pro")

In [43]:
# Get dataset directory
directory = os.getcwd()
dataset_directory = os.path.join(directory, 'problems_dataset')

# Levenshtein distance threshold
threshold = 0.7

In [44]:
# Function to get all the filenames of dataset problems
def get_problem_filenames():
    filenames = []
    for filename in os.listdir(dataset_directory):
        if filename.endswith('.json'):
            filenames.append(filename)
    return filenames

In [45]:
# Function to load a problem file from dataset directory
def load_problem(problem_filename):
    path = os.path.join(dataset_directory, problem_filename)
    with open(path, 'r') as json_file:
        problem = json.load(json_file)
    return problem

In [46]:
# Function to calculate levenstein distance between two strings as a fraction
def levenshtein_distance(s1, s2):
    m = len(s1)
    n = len(s2)
    dist = [0] * (n + 1)
    new_dist = [0] * (n + 1)
    for i in range(n + 1):
        dist[i] = i

    for i in range(m):
        new_dist[0] = i + 1
        for j in range(n):
            deletion_cost = dist[j + 1] + 1
            insertion_cost = new_dist[j] + 1
            if s1[i] == s2[j]:
                substitution_cost = dist[j]
            else:
                substitution_cost = dist[j] + 1
            new_dist[j + 1] = min(deletion_cost, insertion_cost, substitution_cost)
        dist, new_dist = new_dist, dist

    return dist[n] / max(m, n)

In [47]:
# Function to test if a solution is correct using  an AI cvorum
def is_solution_correct_cvorum(correct_solution, generated_solution):
    rules = f'You are a math assistant and you are given two solutions to a math problem. The FIRST SOLUTION is the correct one,\
            and the SECOND SOLUTION is the one you need to evaluate based on the first.\
            You MUST check if the SECOND SOLUTION has the final result as the FIRST SOLUTION, if it exists.'
    text = f'The correct FIRST SOLUTION is: {correct_solution}.\n The SECOND SOLUTION is: {generated_solution}.\
            \n\nBased on the FIRST SOLUTION, is the SECOND SOLUTION correct? Please answer YES or NO.'
    
    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response2 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response3 = gemini_model_15_pro.generate_content(rules + '\n' + text)

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.choices[0].message.content:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.text:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the solution is correct
    if number_of_yes >= 2:
        return True
    return False

In [48]:
# Agregated models to use for solution generation
models = ['gpt-3.5-turbo', 'meta/llama3-70b-instruct', 'gemini-1.5-flash', 'gemini-1.5-pro']

In [49]:
# Function to generate a solution for a problem using an openAI
def generate_solution(problem, category, difficulty, model):
    role = f'You are a math assistant and you are given a math problem.'
    base_prompt = f'You are given an {category} problem of dificulty {difficulty} on a scale from Level 1 to Level 5.\
                    \nThe problem is: {problem}.'
    
    # Divide and conquer & use fractions
    strategy = f'Please solve the problem by trying to split the problem into smaller parts and solve them in logical order.\
                        \nExplain the details of each parts of the solution.'
    question = f'What is the solution to the problem?'
    
    api = None
    if model == 'gpt-3.5-turbo':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    #use the model to generate the solution
    response = ""
    if model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        response = api.generate_content(role + '\n' + base_prompt + '\n' + strategy + '\n' + question)
        response = response.text
    elif model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct':
        response = api.chat.completions.create(
            model=model,
            messages=[{"role": "system", "content": role},
                        {"role": "user", "content": base_prompt + '\n' + strategy + '\n' + question}])
        response = response.choices[0].message.content
    
    return response

In [50]:
test_correctitude_strategies = ['Cvorum', 'Levenshtein distance']
def is_solution_correct_with_strategy(correct_solution, generated_solution, strategy):
    if strategy == 'Cvorum':
        return is_solution_correct_cvorum(correct_solution, generated_solution)
    elif strategy == 'Levenshtein distance':
        return levenshtein_distance(correct_solution, generated_solution) >= threshold
    return False

In [51]:
# # Mock function to test the solution generation
# problem = 'Solve the equation $27 = 3(9)^{x-1}$ for $x.$'
# category = 'Algebra'
# difficulty = 'Level 1'
# correct_solution = 'Dividing both sides by 3, we quickly note that $ 9 = 9^{x-1} \\rightarrow 1 = x-1 \\rightarrow x = \\boxed{2}$.'
# other_solution = 'To solve \(27 = 3(9)^{x-1}\), rewrite \(27\) as \(3^3\) and \(9\) as \(3^2\), yielding \(3^3 = 3(3^{2(x-1)})\); simplify to \(3^3 = 3^{2x-1}\); equate exponents to get \(3 = 2x-1\); solve for \(x\) to find \(x = 2\).'

# # print("Cvorum strategy")
# # print(is_solution_correct_with_strategy(correct_solution, other_solution, 'Cvorum'))

# print("Levenshtein distance strategy")
# print("levenshtein_distance: ", levenshtein_distance(correct_solution, other_solution))
# print(is_solution_correct_with_strategy(correct_solution, other_solution, 'Levenshtein distance'))

# print("Dataset function strategy")
# print(is_solution_correct_with_strategy(correct_solution, other_solution, 'dataset function'))

In [52]:
# Load the problems from the dataset
problems = get_problem_filenames()
model = 'meta/llama3-70b-instruct'
output_folder = os.path.join(directory, 'output-direct-' + model)

#create folder if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Iterate through the problems and generate solutions
index = 0
for problem in problems:
    # if output file exists skip problem
    if os.path.exists(os.path.join(output_folder, str(problem).replace(".json", "") + '_out.json')):
        index += 1
        continue

    output = {}
    print(f'Processing problem {index + 1}/{len(problems)}')
    problem_data = load_problem(problem)
    category = problem_data['category']
    difficulty = problem_data['level']
    correct_solution = problem_data['solution']
    output[problem] = {}
    output[problem]['category'] = category
    output[problem]['difficulty'] = difficulty
    output[problem]['problem'] = problem_data['problem']
    output[problem]['correct_solution'] = correct_solution
    output[problem]['generated_solutions'] = {}
    generated_solution = generate_solution(problem_data['problem'], category, difficulty, model)
    output[problem]['generated_solutions'][model] = generated_solution

    output[problem]['correctitude'] = {}
    output[problem]['correctitude'][model] = {}
    output[problem]['correctitude'][model]['Cvorum'] = is_solution_correct_with_strategy(correct_solution, generated_solution, 'Cvorum')
    output[problem]['correctitude'][model]['Levenshtein distance'] = is_solution_correct_with_strategy(correct_solution, generated_solution, 'Levenshtein distance')
    output[problem]['correctitude'][model]['Levenshtein distance value'] = levenshtein_distance(correct_solution, generated_solution)

    # Save the output to a file
    with open(os.path.join(output_folder, str(problem).replace(".json", "") + '_out.json'), 'w') as json_file:
        json.dump(output, json_file, indent=4)

    index += 1
    time.sleep(20)


Processing problem 1/105


TypeError: Missing required arguments; Expected either ('model' and 'prompt') or ('model', 'prompt' and 'stream') arguments to be given