In [88]:
from openai import OpenAI
import os
import time
import json
import vertexai
from vertexai.generative_models import GenerativeModel
import re

# import prompt from prompt.py
from prompts import *

In [89]:
# initialize the vertex ai client
project_id = "licenta-425710"
vertexai.init(project=project_id)

In [90]:
# initialize the clients
client_gpt = OpenAI(api_key='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')
client_nvidia = OpenAI(base_url='https://integrate.api.nvidia.com/v1', 
                       api_key='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')
gemini_model_15 = GenerativeModel(model_name="gemini-1.5-flash")
gemini_model_15_pro = GenerativeModel(model_name="gemini-1.5-pro")

In [91]:
# Get dataset directory
directory = os.getcwd()
dataset_directory = os.path.join(directory, 'problems_dataset')

# Levenshtein distance threshold
threshold = 0.7

In [92]:
# Function to get all the filenames of dataset problems
def get_problem_filenames():
    filenames = []
    for filename in os.listdir(dataset_directory):
        if filename.endswith('.json'):
            filenames.append(filename)
    return filenames

In [93]:
# Function to load a problem file from dataset directory
def load_problem(problem_filename):
    path = os.path.join(dataset_directory, problem_filename)
    with open(path, 'r') as json_file:
        problem = json.load(json_file)
    return problem

In [94]:
# Function to calculate levenstein distance between two strings as a fraction
def levenshtein_distance(s1, s2):
    m = len(s1)
    n = len(s2)
    dist = [0] * (n + 1)
    new_dist = [0] * (n + 1)
    for i in range(n + 1):
        dist[i] = i

    for i in range(m):
        new_dist[0] = i + 1
        for j in range(n):
            deletion_cost = dist[j + 1] + 1
            insertion_cost = new_dist[j] + 1
            if s1[i] == s2[j]:
                substitution_cost = dist[j]
            else:
                substitution_cost = dist[j] + 1
            new_dist[j + 1] = min(deletion_cost, insertion_cost, substitution_cost)
        dist, new_dist = new_dist, dist

    return dist[n] / max(m, n)

In [95]:

# Function to test if a solution is correct using  an AI cvorum
def is_solution_correct_cvorum(correct_solution, generated_solution):
    rules = f'You are a math assistant and you are given two solutions to a math problem. The FIRST SOLUTION is the correct one,\
            and the SECOND SOLUTION is the one you need to evaluate based on the first.\
            You MUST check if the SECOND SOLUTION has the final result as the FIRST SOLUTION, if it exists.'
    text = f'The correct FIRST SOLUTION is: {correct_solution}.\n The SECOND SOLUTION is: {generated_solution}.\
            \n\nBased on the FIRST SOLUTION, is the SECOND SOLUTION correct? Please answer YES or NO.'
    
    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response2 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "system", "content": rules},
                          {"role": "user", "content": text}])
    response3 = gemini_model_15_pro.generate_content(rules + '\n' + text)

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.choices[0].message.content:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.text:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the solution is correct
    if number_of_yes >= 2:
        return True
    return False


In [96]:
test_correctitude_strategies = ['Cvorum', 'Levenshtein distance']
def is_solution_correct_with_strategy(correct_solution, generated_solution, strategy):
    if strategy == 'Cvorum':
        return is_solution_correct_cvorum(correct_solution, generated_solution)
    elif strategy == 'Levenshtein distance':
        if levenshtein_distance(correct_solution, generated_solution) >= threshold:
            return True
        else:
            return False
    # elif strategy == 'dataset function':
    #     return is_equiv(correct_solution, generated_solution)
    return False

In [97]:
# Agregated models to use for solution generation
models = ['gpt-3.5-turbo', 'meta/llama3-70b-instruct', 'gemini-1.5-flash', 'gemini_model_15_pro']

In [98]:
# extract the conditions and goals from the problem
def extract_conditions_goals(problem, model):
    response = ""

    api = None
    if model == 'gpt-3.5-turbo':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct':
        message = {
            "role": "user",
            "content": extract_constraints_goal_prompt.format(Problem = problem)
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = extract_constraints_goal_prompt.format(Problem = problem)
        response = api.generate_content(message)
        response = response.text
        response = response.replace("##", "")

    # extract the conditions and goal from the response
    conditions = ""
    goal = ""

    response_parts = response.split('Goal:')
    if "Main constraints:" in response_parts[0]:
        conditions = response_parts[0].split("Main constraints:")[1]
    if "Main Constraints:" in response_parts[0]:
        conditions = response_parts[0].split("Main Constraints:")[1]
    lines = conditions.split('\n')
    # remove empty lines
    lines = [line for line in lines if line.strip()]
    conditions = []
    for line in lines:
        conditions.append(line.split('*# ')[1].strip())

    goal = response_parts[1].strip()

    return conditions, goal


In [99]:
# Function to deduce new constraints from the problem
def deduce_new_constraints(problem, known_contraints, goal, model):
    response = ""
    constraints = ""
    for constraint in known_contraints:
        constraints += f'*# {constraint}\n'

    api = None
    if model == 'gpt-3.5-turbo':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct':
        message = {
            "role": "user",
            "content": deduce_constraint_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash' or model == 'gemini-1.5-pro':
        message = deduce_constraint_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)
        response = api.generate_content(message)
        response = response.text
    
    if "NONE" in response:
        return "NONE"

    return response.strip()

In [100]:
# Function to test if a constraint is valid using AI cvorum
def is_constraint_valid(problem, known_contraints, new_constraint, goal):
    response1 = ""
    response2 = ""
    response3 = ""

    prompt = test_constraint_prompt.format(Problem = problem, Constraints = known_contraints, Constraint = new_constraint, Goal = goal)

    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}])
    response2 = gemini_model_15_pro.generate_content(prompt)
    response3 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "user", "content": prompt}])

    # print(response1.choices[0].message.content)
    # print(response2.text)
    # print(response3.choices[0].message.content)    

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.text:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.choices[0].message.content:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the constraint is valid
    if number_of_yes >= 2:
        return True
    return False

In [101]:
# Function to correct a constraint for a given model
def correct_constraint(problem, constraints, constraint, goal, model):
    response = ""
    prompt = correct_constraints_prompt.format(Problem = problem, Incorrect_constraint = constraint, Constraints = constraints, Goal = goal)

    api = None
    if model == 'gpt-3.5-turbo':
        api = client_gpt
    elif model == 'meta/llama3-70b-instruct':
        api = client_nvidia
    elif model == 'gemini-1.5-flash':
        api = gemini_model_15
    elif model == 'gemini-1.5-pro':
        api = gemini_model_15_pro

    if model == 'gpt-3.5-turbo' or model == 'meta/llama3-70b-instruct':
        message = {
            "role": "user",
            "content": prompt
        }
        response = api.chat.completions.create(model=model, messages=[message])
        response = response.choices[0].message.content
    elif model == 'gemini-1.5-flash':
        message = prompt
        response = api.generate_content(message)
        response = response.text

    if "NONE" in response:
        return "NONE"

    return response.strip()

In [102]:
# Function to test if  the constraints are sufficient to reach the goal using AI covrum
def are_constraints_sufficient(problem, constraints, goal):
    response1 = ""
    response2 = ""
    response3 = ""

    prompt = test_sufficiency_prompt.format(Problem = problem, Constraints = constraints, Goal = goal)

    response1 = client_gpt.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}])
    response2 = gemini_model_15_pro.generate_content(prompt)
    response3 = client_nvidia.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "user", "content": prompt}])

    number_of_yes = 0
    # check if response1 contains YES
    if 'YES' in response1.choices[0].message.content:
        number_of_yes += 1
    # check if response2 contains YES
    if 'YES' in response2.text:
        number_of_yes += 1
    # check if response3 contains YES:
    if 'YES' in response3.choices[0].message.content:
        number_of_yes += 1

    # if at least 2 of the responses contain YES, the constraints are sufficient
    if number_of_yes >= 2:
        return True
    return False


In [103]:
# flow engeniering
# def flow_engienering_math_solver(problem, model):
problem = "The sum of two numbers is 10. The difference between the two numbers is 2. What are the two numbers?"
constraints, goal = extract_conditions_goals(problem, 'gemini-1.5-flash')

new_constraint =  deduce_new_constraints(problem, constraints, goal, 'gemini-1.5-flash')
print(new_constraint)
r = correct_constraint(problem, constraints, new_constraint, goal, 'gemini-1.5-flash')
print(r)

r = is_constraint_valid(problem, constraints, new_constraint, goal)
print(r)

r = are_constraints_sufficient(problem, constraints, goal)
print(r)



One number is larger than the other.
The difference between the two numbers is 2.
True
True
