In [1]:
import os
import re
import json
import random
import sys
import asyncio
import pickle
import datetime
import gurobipy as gp
sys.path.append('../')

from openai import OpenAI, AsyncClient
from json import JSONDecodeError
from enum import Enum
from tqdm.auto import tqdm
from utils import *
from pydantic import BaseModel
from colorama import Fore, Style
from llama_index.program.openai import OpenAIPydanticProgram
from llama_index.llms.openai import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cfg = json.load(open('../configs./configs.json', 'r'))
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
asyncclient = AsyncClient(api_key=os.environ["OPENAI_API_KEY"])

dt = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')

In [3]:
DATA_DIR = '../data'
DATASET_NAME = 'LPWP' 
OUTPUT_DIR = '../output'  

nl4opt_data = read_txt_file(os.path.join(DATA_DIR, DATASET_NAME, 'lpwp.txt'))
questions, answers = get_nl4opt_qas(nl4opt_data)
assert len(questions) == len(answers)

qa_pairs = list(zip(questions, answers))
# demo_samples, test_samples = get_demo_and_test_samples(qa_pairs)

questions = [q for q, _ in qa_pairs]
answers = [a for _, a in qa_pairs]

[32m2024-09-19 19:28:28.688[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m15[0m - [34m[1mReading file: ../data\LPWP\lpwp.txt[0m
[32m2024-09-19 19:28:28.690[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m17[0m - [34m[1mFile read successfully: ../data\LPWP\lpwp.txt[0m
[32m2024-09-19 19:28:28.691[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m36[0m - [1mNumber of questions: 288[0m
[32m2024-09-19 19:28:28.691[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m37[0m - [1mNumber of answers: 288[0m


In [4]:
# class VarType(Enum):
#     INTEGER = "INTEGER"
#     CONTINUOUS = "CONTINUOUS"

class CodeScheme(BaseModel):
    """The variable type in the problem and the code string"""
    
    # var_type: VarType
    code: str     

In [5]:
sys_prompt = """You are an expert in optimization problems and domain specific language generation. 
Your task is to convert the textual optimization text into lines of code.
You should also analyze whether the variable in the optimization problem should be INTEGER or CONTINUOUS.
DO NOT ADD ANY COMMENTS OR EXPLANATION TO THE CODE. JUST OUTPUT THE CODE.
Here are some examples that you should refer to:\n"""

example = """
QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE:
x = m.addVar(name="Oil Max", vtype=gp.GRB.INTEGER)
y = m.addVar(name="Oil Max Pro", vtype=gp.GRB.INTEGER)
m.setObjective(10 * x + 15 * y, gp.GRB.MAXIMIZE)
m.addConstr(46 * x + 13 * y <= 1345)
m.addConstr(43 * x + 4 * y <= 346)
m.addConstr(56 * x + 45 * y <= 1643)

QUESTION:
Ben is growing apples and pears on his orchard. He has 50 acres available on which he must grow a minimum of 5 acres of apples and a minimum of 10 acres of pears to meet demands. The profit per apple is $2 and the profit per pear is $4. He prefers to grow more pears than apples but limitations in his workforce allow him to grow at most twice the amount of pears as apples. How many of each fruit should Ben grow in order to maximize his profit? What is that profit?
CODE:
x = m.addVar(name="apples", vtype=gp.GRB.INTEGER)
y = m.addVar(name="pears", vtype=gp.GRB.INTEGER)
m.setObjective(2 * x + 4 * y, gp.GRB.MAXIMIZE)
m.addConstr(x + y <= 50)
m.addConstr(x >= 5)
m.addConstr(y >= 10)
m.addConstr(y <= 2 * x)
"""

prompt_template_str = sys_prompt + example + "\nPlease finish the task think step by step.\nQUESTION:{q}"
print(prompt_template_str)

You are an expert in optimization problems and domain specific language generation. 
Your task is to convert the textual optimization text into lines of code.
You should also analyze whether the variable in the optimization problem should be INTEGER or CONTINUOUS.
DO NOT ADD ANY COMMENTS OR EXPLANATION TO THE CODE. JUST OUTPUT THE CODE.
Here are some examples that you should refer to:

QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE

In [6]:
program = OpenAIPydanticProgram.from_defaults(
    output_cls=CodeScheme, 
    llm=OpenAI("gpt-3.5-turbo"),
    prompt_template_str=prompt_template_str.format(q=questions[2]), 
    verbose=True
)

In [7]:
code = program()

Function call: CodeScheme with args: {"code":"x = m.addVar(name=\"fertilizer A\", vtype=gp.GRB.CONTINUOUS)\ny = m.addVar(name=\"fertilizer B\", vtype=gp.GRB.CONTINUOUS)\nm.setObjective(5 * x + 9 * y, gp.GRB.MINIMIZE)\nm.addConstr(13 * x + 8 * y >= 220)\nm.addConstr(5 * x + 14 * y >= 160)\nm.addConstr(6 * x + 6 * y <= 350)"}


In [8]:
execute_code(complement_code(clean_code(code.code)))

125.49295774647888

In [9]:
# batch_size = 8
# lp_reasoning_list = []
# for idx in tqdm(range(0, len(questions), batch_size)):
#     batch = questions[idx:idx+batch_size]
    
#     tasks = [asyncclient.beta.chat.completions.parse(
#         model="gpt-4o-2024-08-06",
#         temperature=0,
#         response_format=Code,
#         messages=[
#             {"role": "system", "content": sys_prompt},
#             {"role": "user", "content": f"QUESTION: {q}"}
#         ]) for q in batch
#     ]

#     combined_responses = await asyncio.gather(*tasks)
#     lp_reasoning_list.extend([r.choices[0].message.parsed for r in combined_responses])

codes = []
for i in tqdm(range(len(questions))):
    program = OpenAIPydanticProgram.from_defaults(
        output_cls=CodeScheme, 
        llm=OpenAI("gpt-3.5-turbo"),
        prompt_template_str=prompt_template_str.format(q=questions[i]), 
        verbose=False
    )
    code = program()
    codes.append(code)

100%|██████████| 288/288 [12:14<00:00,  2.55s/it]


In [10]:
filename = 'e2e_codegen_gpt-3.5-turbo_lpwp_gurobi_' + dt + '.pkl'
with open(os.path.join(OUTPUT_DIR, filename), 'wb') as f:
    pickle.dump(codes, f)

In [19]:
filename = 'e2e_codegen_gpt_3.5_turbo_lpwp_gurobi_2024-09-19-14-35-37.pkl'
with open(os.path.join(OUTPUT_DIR, filename), 'rb') as f:
    codes = pickle.load(f)

In [20]:
codes = [codes[i].code for i in range(len(codes))]

In [21]:
prefix = """
import gurobipy as gp
env = gp.Env(empty=True)
env.setParam("OutputFlag",0)
env.start()
m = gp.Model(env=env)
"""
                
suffix = """
m.optimize()
"""

def complement_code(code: str) -> float:
    return prefix + code + suffix

In [22]:
def clean_code(code: str) -> str:
    cleand_code = []
    for line in code.split('\n'):
        line = line.strip()
        if line.startswith('m.addConstr') and not re.findall(r'<=|>=', line):
            line = re.sub(r'<', r'<=', line)
            line = re.sub(r'>', r'>=', line)
        cleand_code.append(line)
    cleand_code = '\n'.join(cleand_code)
    cleand_code = cleand_code.replace(')m', ')\nm')
    return cleand_code

In [23]:
def execute_code(code: str) -> float:
    ex_locals = {}
    exec(code, None, ex_locals)
    
    try:
        return ex_locals["m"].objVal
    except Exception as e:
        # print(e)
        return np.inf

In [24]:
pred_answers = []
for i, code_str in enumerate(codes):
    try:
        cleaned_code = clean_code(code_str)
        code = complement_code(cleaned_code)
        ans = execute_code(code)
        loguru.logger.info(f"question {i} obtain answer")
        pred_answers.append(ans)
    except Exception as e:
        loguru.logger.error(f"Error for question {i}: {e}")
        pred_answers.append("Error")

[32m2024-09-19 19:41:46.183[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [31m[1mError for question 0: name 'cost_sled_dogs' is not defined[0m
[32m2024-09-19 19:41:46.185[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 1 obtain answer[0m
[32m2024-09-19 19:41:46.186[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 2 obtain answer[0m
[32m2024-09-19 19:41:46.189[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 3 obtain answer[0m
[32m2024-09-19 19:41:46.191[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 4 obtain answer[0m
[32m2024-09-19 19:41:46.193[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 5 obtain answer[0m
[32m2024-09-19 19:41:46.195[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 6 obtain answer[0m
[3

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 10.0 (19045.2))

CPU model: AMD Ryzen 7 5700X 8-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 3 rows, 2 columns and 5 nonzeros
Model fingerprint: 0xfd346e33
Coefficient statistics:
  Matrix range     [2e-01, 1e+00]
  Objective range  [5e-01, 1e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [2e+04, 8e+05]
Presolve removed 3 rows and 2 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    6.8400000e+05   0.000000e+00   0.000000e+00      0s

Solved in 0 iterations and 0.00 seconds (0.00 work units)
Optimal objective  6.840000000e+05


[32m2024-09-19 19:41:46.213[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 13 obtain answer[0m
[32m2024-09-19 19:41:46.215[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 14 obtain answer[0m
[32m2024-09-19 19:41:46.216[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 15 obtain answer[0m
[32m2024-09-19 19:41:46.218[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 16 obtain answer[0m
[32m2024-09-19 19:41:46.219[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 17 obtain answer[0m
[32m2024-09-19 19:41:46.220[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 18 obtain answer[0m
[32m2024-09-19 19:41:46.222[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 19 obtain answer[0m
[32m2024-09-19 19:41:46.223[0m | [1mIN

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 10.0 (19045.2))

CPU model: AMD Ryzen 7 5700X 8-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 2 rows, 2 columns and 4 nonzeros
Model fingerprint: 0x567311ae
Coefficient statistics:
  Matrix range     [3e-01, 1e+00]
  Objective range  [3e-01, 5e-01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [4e+00, 8e+00]
Presolve time: 0.00s
Presolved: 2 rows, 2 columns, 4 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   4.000000e+00   0.000000e+00      0s
       1    1.5000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.00 seconds (0.00 work units)
Optimal objective  1.500000000e+00


[32m2024-09-19 19:41:46.238[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 25 obtain answer[0m
[32m2024-09-19 19:41:46.239[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 26 obtain answer[0m
[32m2024-09-19 19:41:46.241[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 27 obtain answer[0m
[32m2024-09-19 19:41:46.242[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 28 obtain answer[0m
[32m2024-09-19 19:41:46.244[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 29 obtain answer[0m
[32m2024-09-19 19:41:46.248[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 30 obtain answer[0m
[32m2024-09-19 19:41:46.250[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 31 obtain answer[0m
[32m2024-09-19 19:41:46.252[0m | [1mIN

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 10.0 (19045.2))

CPU model: AMD Ryzen 7 5700X 8-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 4 rows, 2 columns and 7 nonzeros
Model fingerprint: 0x5eedd26c
Coefficient statistics:
  Matrix range     [3e-01, 8e+02]
  Objective range  [5e+02, 8e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+01, 3e+04]
Presolve removed 1 rows and 0 columns
Presolve time: 0.01s
Presolved: 3 rows, 2 columns, 6 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    2.6238500e+04   4.834347e+02   0.000000e+00      0s
       1    2.8125000e+04   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds (0.00 work units)
Optimal objective  2.812500000e+04


[32m2024-09-19 19:41:46.308[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 57 obtain answer[0m
[32m2024-09-19 19:41:46.310[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 58 obtain answer[0m
[32m2024-09-19 19:41:46.312[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 59 obtain answer[0m
[32m2024-09-19 19:41:46.313[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 60 obtain answer[0m
[32m2024-09-19 19:41:46.315[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 61 obtain answer[0m
[32m2024-09-19 19:41:46.317[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 62 obtain answer[0m
[32m2024-09-19 19:41:46.318[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 63 obtain answer[0m
[32m2024-09-19 19:41:46.319[0m | [1mIN

In [25]:
answers = [a if a != 'None' else str(np.inf) for a in answers]

In [26]:
def mark(pred, real, error: float) -> List[bool]:    
    correct = []
    for p, r in zip(pred, real):
        if p == 'Error':
            continue
        if float(r) != 0:
            if (float(p) == np.inf and float(r) == np.inf) or (abs(float(p) - float(r)) / float(r) < error):
                correct.append(True)
            else:
                correct.append(False)
        else:
            if float(p) < error:
                correct.append(True)
            else:
                correct.append(False)
    return correct

In [27]:
error1, error2 = 1e-2, 1e-4
print(f"Under {error1} error rate: the accuracy is {sum(mark(pred_answers, answers, error=error1)) / len(answers)}")
print(f"Under {error2} error rate: the accuracy is {sum(mark(pred_answers, answers, error=error2)) / len(answers)}")
print(f"Under {1e-6} error rate: the accuracy is {sum(mark(pred_answers, answers, error=1e-6)) / len(answers)}")

Under 0.01 error rate: the accuracy is 0.6076388888888888
Under 0.0001 error rate: the accuracy is 0.5902777777777778
Under 1e-06 error rate: the accuracy is 0.5902777777777778
