In [182]:
from openai import OpenAI
import os
import time
import json
import vertexai
from vertexai.generative_models import GenerativeModel
import re

# import prompt from prompt.py
from prompts import *

In [183]:
# initialize the vertex ai client
project_id = "licenta-425710"
vertexai.init(project=project_id)

In [184]:
# initialize the clients
client_gpt = OpenAI(api_key='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')
client_nvidia = OpenAI(base_url='https://integrate.api.nvidia.com/v1', 
                       api_key='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')
gemini_model_15 = GenerativeModel(model_name="gemini-1.5-flash")
gemini_model_15_pro = GenerativeModel(model_name="gemini-1.5-pro")

In [185]:
# Get dataset directory
directory = os.getcwd()
dataset_directory = os.path.join(directory, 'problems_dataset')

# Levenshtein distance threshold
threshold = 0.7

In [186]:
# Function to get all the filenames of dataset problems
def get_problem_filenames():
    filenames = []
    for filename in os.listdir(dataset_directory):
        if filename.endswith('.json'):
            filenames.append(filename)
    return filenames

In [187]:
# Function to load a problem file from dataset directory
def load_problem(problem_filename):
    path = os.path.join(dataset_directory, problem_filename)
    with open(path, 'r') as json_file:
        problem = json.load(json_file)
    return problem

In [188]:
# Function to calculate levenstein distance between two strings as a fraction
def levenshtein_distance(s1, s2):
    m = len(s1)
    n = len(s2)
    dist = [0] * (n + 1)
    new_dist = [0] * (n + 1)
    for i in range(n + 1):
        dist[i] = i

    for i in range(m):
        new_dist[0] = i + 1
        for j in range(n):
            deletion_cost = dist[j + 1] + 1
            insertion_cost = new_dist[j] + 1
            if s1[i] == s2[j]:
                substitution_cost = dist[j]
            else:
                substitution_cost = dist[j] + 1
            new_dist[j + 1] = min(deletion_cost, insertion_cost, substitution_cost)
        dist, new_dist = new_dist, dist

    return dist[n] / max(m, n)

In [189]:

# Function to test if a solution is correct using  an AI cvorum
def is_solution_correct_cvorum(correct_solution, generated_solution):
    rules = f'You are a math assistant and you are given two solutions to a math problem. The FIRST SOLUTION is the correct one,\
            and the SECOND SOLUTION is the one you need to evaluate based on the first.\
            You MUST check if the SECOND SOLUTION has the final result as the FIRST SOLUTION, if it exists.'
    text = f'The correct FIRST SOLUTION is: {correct_solution}.\n The SECOND SOLUTION is: {generated_solution}.\
            \n\nBased on the FIRST SOLUTION, is the SECOND SOLUTION correct? Please answer YES or NO.'
    
    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response2 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response3 = gemini_model_15_pro.generate_content(rules + '\n' + text)

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.choices[0].message.content:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.text:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the solution is correct
    if number_of_yes >= 2:
        return True
    return False


In [190]:
# Check if both solutions have the same result, which is marked by \boxed{}
def check_solutions_same_result(correct_solution, generated_solution):
    # extract the result from the correct solution
    result_correct = re.search(r'\\boxed{(.*)}', correct_solution).group(1)
    # extract the result from the generated solution
    result_generated = re.search(r'\\boxed{(.*)}', generated_solution).group(1)
    return result_correct == result_generated


In [191]:
test_correctitude_strategies = ['Cvorum', 'Levenshtein distance', 'Same result']
def is_solution_correct_with_strategy(correct_solution, generated_solution, strategy):
    if strategy == 'Cvorum':
        return is_solution_correct_cvorum(correct_solution, generated_solution)
    elif strategy == 'Levenshtein distance':
        return levenshtein_distance(correct_solution, generated_solution) >= threshold
    elif strategy == 'Same result':
        return check_solutions_same_result(correct_solution, generated_solution)
    return False

In [192]:
# Agregated models to use for solution generation
models = ['gpt-3.5-turbo', 'meta/llama3-70b-instruct', 'gemini-1.5-flash', 'gemini-1.5-pro', 'gpt-4o']

In [193]:
# extract the conditions and goals from the problem
def extract_constraints_goals(problem, model):
    response = ""

    api = None
    if model == 'gpt-3.5-turbo' or model == 'gpt-4o':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct' or model == 'gpt-4o':
        message = {
            "role": "user",
            "content": extract_constraints_goal_prompt.format(Problem = problem)
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = extract_constraints_goal_prompt.format(Problem = problem)
        response = api.generate_content(message)
        response = response.text
        response = response.replace("##", "")

    # extract the conditions and goal from the response
    conditions = ""
    goal = ""

    response_parts = response.split('Goal:')
    if "Main constraints:" in response_parts[0]:
        conditions = response_parts[0].split("Main constraints:")[1]
    if "Main Constraints:" in response_parts[0]:
        conditions = response_parts[0].split("Main Constraints:")[1]
    lines = conditions.split('\n')
    # remove empty lines
    lines = [line for line in lines if line.strip()]
    conditions = []
    for line in lines:
        conditions.append(line.split('*# ')[1].strip())

    goal = response_parts[1].strip()

    return conditions, goal


In [194]:
# Function to deduce new constraints from the problem
def deduce_new_constraints(problem, known_contraints, goal, model):
    response = ""
    constraints = ""
    for constraint in known_contraints:
        constraints += f'*# {constraint}\n'

    api = None
    if model == 'gpt-3.5-turbo' or model == 'gpt-4o':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct' or model == 'gpt-4o':
        message = {
            "role": "user",
            "content": deduce_constraint_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = deduce_constraint_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)
        response = api.generate_content(message)
        response = response.text
    
    if "NONE" in response:
        return "NONE"

    return response.strip()

In [195]:
# Function to test if a constraint is valid using AI cvorum
def is_constraint_valid(problem, known_contraints, new_constraint, goal):
    response1 = ""
    response2 = ""
    response3 = ""

    prompt = test_constraint_prompt.format(Problem = problem, Constraints = known_contraints, Constraint = new_constraint, Goal = goal)

    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}])
    response2 = gemini_model_15_pro.generate_content(prompt)
    response3 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "user", "content": prompt}])

    # print(response1.choices[0].message.content)
    # print(response2.text)
    # print(response3.choices[0].message.content)    

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.text:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.choices[0].message.content:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the constraint is valid
    if number_of_yes >= 2:
        return True
    return False

In [196]:
# Function to correct a constraint for a given model
def correct_constraint(problem, constraints, constraint, goal, model):
    response = ""
    prompt = correct_constraints_prompt.format(Problem = problem, Incorrect_constraint = constraint, Constraints = constraints, Goal = goal)

    api = None
    if model == 'gpt-3.5-turbo' or model == 'gpt-4o':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct' or model == 'gpt-4o':
        message = {
            "role": "user",
            "content": prompt
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = prompt
        response = api.generate_content(message)
        response = response.text

    if "NONE" in response:
        return "NONE"

    return response.strip()

In [197]:
# Function to test if  the constraints are sufficient to reach the goal using AI covrum
def are_constraints_sufficient(problem, constraints, goal):
    response1 = ""
    response2 = ""
    response3 = ""

    prompt = test_sufficiency_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)

    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}])
    response2 = gemini_model_15_pro.generate_content(prompt)
    response3 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "user", "content": prompt}])

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.text:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.choices[0].message.content:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the constraints are sufficient
    if number_of_yes >= 2:
        return True
    return False


In [198]:
# Function to generate a solution for a problem
def generate_solution(problem, constraints, goal, model):
    response = ""

    prompt = generate_solution_prompt.format(Problem = problem,
                                            Constraints = constraints,
                                            Goal = goal)

    api = None
    if model == 'gpt-3.5-turbo' or model == 'gpt-4o':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct' or model == 'gpt-4o':
        message = {
            "role": "user",
            "content": prompt
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = prompt
        response = api.generate_content(message)
        response = response.text

    return response.strip()

In [199]:
# Function to correct solution
def correct_solution_if_needed(problem, constraints, goal, generated_solution, model):
    response = ""
    prompt = correct_solution_prompt.format(Problem = problem,
                                            Constraints = constraints,
                                            Goal = goal,
                                            Solution = generated_solution)

    api = None
    if model == 'gpt-3.5-turbo' or model == 'gpt-4o':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct' or model == 'gpt-4o':
        message = {
            "role": "user",
            "content": prompt
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = prompt
        response = api.generate_content(message)
        response = response.text

    return response.strip()

In [200]:
#flow engeniering for math problems
def flow_engienering_math_solver(problem, model):
    # extract the contraints and goals from the problem
    contraints, goal = extract_constraints_goals(problem, model)
    # check if the new constraint is valid
    max_iterations = 6
    while max_iterations > 0:
        time.sleep(20)
        # deduce new constraints from the problem
        new_constraint = deduce_new_constraints(problem, contraints, goal, model)
        valid = is_constraint_valid(problem, contraints, new_constraint, goal)
        if not valid:
            new_constraint = correct_constraint(problem, contraints, new_constraint, goal, model)
            if new_constraint != "NONE":
                contraints.append(new_constraint)
        
        max_iterations -= 1
        # check if the constraints are sufficient to reach the goal
        sufficient = are_constraints_sufficient(problem, contraints, goal)
        if sufficient:
            break
    
    # generate a solution for the problem
    time.sleep(10)
    solution = generate_solution(problem, contraints, goal, model)

    #correct the solution if needed
    time.sleep(20)
    corrected_solution = correct_solution_if_needed(problem, contraints, goal, solution, model)

    return corrected_solution

In [201]:
# Load the problems from the dataset
problems = get_problem_filenames()
model = 'gemini-1.5-pro'
output_folder = os.path.join(directory, 'output-flow-' + model)

# create the output folder if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Iterate through the problems and generate solutions
index = 0
skip = 1
for problem in problems:
    # if output file exists skip problem
    if os.path.exists(os.path.join(output_folder, str(problem).replace(".json", "") + '_out.json')):
        index += 1
        continue
    if skip != 0:
        skip -= 1
        index += 1
        continue

    output = {}
    print(f'Processing problem {index + 1}/{len(problems)}')
    problem_data = load_problem(problem)
    category = problem_data['category']
    difficulty = problem_data['level']
    correct_solution = problem_data['solution']
    output[problem] = {}
    output[problem]['category'] = category
    output[problem]['difficulty'] = difficulty
    output[problem]['problem'] = problem_data['problem']
    output[problem]['correct_solution'] = correct_solution
    output[problem]['generated_solutions'] = {}

    # Generate solutions using the flow engienering approach
    tmp = problem_data['problem']
    problem_text = f'This is a {category} math problem of dificulty {difficulty} on a scale from Level 1 to Level 5:\n{tmp}'
    generated_solution = flow_engienering_math_solver(problem_text, model)
    output[problem]['generated_solutions'][model] = generated_solution

    output[problem]['correctitude'] = {}
    output[problem]['correctitude'][model] = {}
    output[problem]['correctitude'][model]['Cvorum'] = is_solution_correct_with_strategy(correct_solution, generated_solution, 'Cvorum')
    output[problem]['correctitude'][model]['Levenshtein distance'] = is_solution_correct_with_strategy(correct_solution, generated_solution, 'Levenshtein distance')
    try:
        output[problem]['correctitude'][model]['Same result'] = is_solution_correct_with_strategy(correct_solution, generated_solution, 'Same result')
    except:
        output[problem]['correctitude'][model]['Same result'] = False
    
    output[problem]['correctitude'][model]['Levenshtein distance value'] = levenshtein_distance(correct_solution, generated_solution)

    # Save the output to a file
    with open(os.path.join(output_folder, str(problem).replace(".json", "") + '_out.json'), 'w') as json_file:
        json.dump(output, json_file, indent=4)

    index += 1
    time.sleep(20)

Processing problem 38/105
Processing problem 39/105
Processing problem 40/105
Processing problem 41/105
Processing problem 42/105
Processing problem 43/105
Processing problem 44/105
Processing problem 45/105
Processing problem 46/105
Processing problem 47/105
Processing problem 48/105
Processing problem 49/105
Processing problem 50/105
Processing problem 51/105
Processing problem 52/105
Processing problem 53/105
Processing problem 54/105
Processing problem 55/105
Processing problem 56/105


APIStatusError: Error code: 402 - {'type': 'urn:kaizen:problem-details:payment-required', 'title': 'Payment Required', 'status': 402, 'detail': "Account '4-5k2HDVck50EPgzRy3ybYK1q8gRouvpvslP7gIF3Kc': Cloud credits expired - Please contact NVIDIA representatives", 'instance': '/v2/nvcf/pexec/functions/a88f115a-4a47-4381-ad62-ca25dc33dc1b'}