In [1]:
import os
import re
import json
import random
import sys
import asyncio
import pickle
import datetime
sys.path.append('../')

from openai import OpenAI, AsyncClient
from json import JSONDecodeError
from tqdm.auto import tqdm
from utils import *
from pydantic import BaseModel
from colorama import Fore, Style

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cfg = json.load(open('../configs./configs.json', 'r'))
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
asyncclient = AsyncClient(api_key=os.environ["OPENAI_API_KEY"])

dt = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')

In [3]:
DATA_DIR = '../data'
DATASET_NAME = 'NL4OPT' 
OUTPUT_DIR = '../output'  

nl4opt_data = read_txt_file(os.path.join(DATA_DIR, DATASET_NAME, 'nl4opt.txt'))
questions, answers = get_nl4opt_qas(nl4opt_data)
assert len(questions) == len(answers)

qa_pairs = list(zip(questions, answers))
demo_samples, test_samples = get_demo_and_test_samples(qa_pairs)

questions = [q for q, _ in test_samples]
answers = [a for _, a in test_samples]

[32m2024-09-17 14:29:31.070[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m14[0m - [34m[1mReading file: ../data\NL4OPT\nl4opt.txt[0m
[32m2024-09-17 14:29:31.071[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m16[0m - [34m[1mFile read successfully: ../data\NL4OPT\nl4opt.txt[0m
[32m2024-09-17 14:29:31.072[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m35[0m - [1mNumber of questions: 245[0m
[32m2024-09-17 14:29:31.073[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m36[0m - [1mNumber of answers: 245[0m
[32m2024-09-17 14:29:31.073[0m | [1mINFO    [0m | [36mutils[0m:[36mget_demo_and_test_samples[0m:[36m47[0m - [1mNumber of demo samples: 20[0m
[32m2024-09-17 14:29:31.074[0m | [1mINFO    [0m | [36mutils[0m:[36mget_demo_and_test_samples[0m:[36m48[0m - [1mNumber of test samples: 225[0m


### GPT-4o-mini + 2-shots CoT + Structured Output (on 225 samples, error < 1%) = 70.22%

### GPT-4o-mini + 2-shots CoT + Structured Output (on 225 samples, error < 1.5%) = 75.56%


In [4]:
# class VarRecStep(BaseModel):
#     reasoning: str

class VarRecg(BaseModel):
    # steps: list[VarRecStep]
    var_recg: str
    
# class ConObjStep(BaseModel):
#     reasoning: str

class ConObj(BaseModel):
    # steps: List[ConObjStep]
    evidence: List[str]
    math_expression: List[str]

# class CodeStep(BaseModel):
#     reasoning: str

class Code(BaseModel):
    # steps: List[CodeStep]
    reasoning: List[str]
    code: str     

class MultiStepInOnePrompt(BaseModel):
    var_recg: VarRecg
    con_obj: ConObj
    code: Code

In [5]:
sys_prompt = """You are an expert in optimization problems and domain specific language generation. Your task is to convert the textual optimization text into a piece of code.
Here are some examples that you should refer to:\n"""

example = """
QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE:
x1 = solver.IntVar(0, solver.infinity(), 'x1')
x2 = solver.IntVar(0, solver.infinity(), 'x2')
solver.Add(46 * x1 + 13 * x2 <= 1345)
solver.Add(43 * x1 + 4 * x2 <= 346)
solver.Add(56 * x1 + 45 * x2 <= 1643)
objective = solver.Objective()
objective.SetCoefficient(x1, 10.0)
objective.SetCoefficient(x2, 15.0)
objective.SetMaximization()

QUESTION:
Ben is growing apples and pears on his orchard. He has 50 acres available on which he must grow a minimum of 5 acres of apples and a minimum of 10 acres of pears to meet demands. The profit per apple is $2 and the profit per pear is $4. He prefers to grow more pears than apples but limitations in his workforce allow him to grow at most twice the amount of pears as apples. How many of each fruit should Ben grow in order to maximize his profit? What is that profit?
CODE:
x1 = solver.IntVar(5, solver.infinity(), 'x1')
x2 = solver.IntVar(10, solver.infinity(), 'x2')
solver.Add(x1 + x2 <= 50)
solver.Add(x1 >= 5)
solver.Add(x2 >= 10)
solver.Add(x2 <= 2 * x1)
objective = solver.Objective()
objective.SetCoefficient(x1, 2.0)   
objective.SetCoefficient(x2, 4.0)
objective.SetMaximization()
"""

sys_prompt = sys_prompt + example + "\nPlease finish the task think step by step."
print(sys_prompt)

You are an expert in optimization problems and domain specific language generation. Your task is to convert the textual optimization text into a piece of code.
Here are some examples that you should refer to:

QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE:
x1 = solver.IntVar(0, solver.infinity(), 'x1')
x2 = solver.IntVar(0, solver.infinity(), 'x2')
solver.Add(46 * x1 + 13 * x2 <= 1345)
solver.Add(43 * x1 + 4 * x2 <= 346)
solver.Ad

In [8]:
batch_size = 8
lp_reasoning_list = []
for idx in tqdm(range(0, len(questions), batch_size)):
    batch = questions[idx:idx+batch_size]
    
    tasks = [asyncclient.beta.chat.completions.parse(
        model="gpt-4o-mini",
        temperature=0,
        response_format=Code,
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": f"QUESTION: {q}"}
        ]) for q in batch
    ]

    combined_responses = await asyncio.gather(*tasks)
    lp_reasoning_list.extend([r.choices[0].message.parsed for r in combined_responses])

100%|██████████| 29/29 [02:24<00:00,  4.97s/it]


In [9]:
filename = 'code_gen_large-' + dt + '.pkl'
with open(os.path.join(OUTPUT_DIR, filename), 'wb') as f:
    pickle.dump(lp_reasoning_list, f)

In [10]:
codes = [lp_reasoning_list[i].code for i in range(len(lp_reasoning_list))]

In [11]:
prefix = """
from ortools.linear_solver import pywraplp
solver = pywraplp.Solver.CreateSolver('GLOP') 
if not solver: raise
"""
                
suffix = """
status = solver.Solve()
"""

def complement_code(code: str) -> float:
    return prefix + code + suffix

In [12]:
def clean_code(code: str) -> str:
    cleand_code = []
    for line in code.split('\n'):
        line = line.strip()
        if line.startswith('solver.Add') and not re.findall(r'<=|>=', line):
            line = re.sub(r'<', r'<=', line)
            line = re.sub(r'>', r'>=', line)
        cleand_code.append(line)
    return '\n'.join(cleand_code)

In [13]:
def execute_code(code: str) -> float:
    ex_locals = {}
    exec(code, None, ex_locals)
    solver = ex_locals["solver"]
    
    if ex_locals['status'] == ex_locals['pywraplp'].Solver.OPTIMAL:
        return solver.Objective().Value()
    else:
        return np.inf

In [14]:
pred_answers = []
for i, code_str in enumerate(codes):
    try:
        cleaned_code = clean_code(code_str)
        code = complement_code(cleaned_code)
        ans = execute_code(code)
        loguru.logger.info(f"question {i} obtain answer")
        pred_answers.append(ans)
    except Exception as e:
        loguru.logger.error(f"Error for question {i}: {e}")
        pred_answers.append("Error")

[32m2024-09-16 18:28:09.931[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 0 obtain answer[0m
[32m2024-09-16 18:28:09.933[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 1 obtain answer[0m
[32m2024-09-16 18:28:09.934[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 2 obtain answer[0m
[32m2024-09-16 18:28:09.934[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 3 obtain answer[0m
[32m2024-09-16 18:28:09.935[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 4 obtain answer[0m
[32m2024-09-16 18:28:09.936[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 5 obtain answer[0m
[32m2024-09-16 18:28:09.936[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 6 obtain answer[0m
[32m2024-09-16 18:28:09.937[0m | [1mINFO    

[32m2024-09-16 18:28:09.945[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 20 obtain answer[0m
[32m2024-09-16 18:28:09.945[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 21 obtain answer[0m
[32m2024-09-16 18:28:09.946[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 22 obtain answer[0m
[32m2024-09-16 18:28:09.947[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 23 obtain answer[0m
[32m2024-09-16 18:28:09.947[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 24 obtain answer[0m
[32m2024-09-16 18:28:09.948[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 25 obtain answer[0m
[32m2024-09-16 18:28:09.948[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 26 obtain answer[0m
[32m2024-09-16 18:28:09.949[0m | [1mIN

In [19]:
correct = []
for p, r in zip(pred_answers, answers):
    if p == 'Error':
        continue
    if (float(p) == np.inf and float(r) == np.inf) or (abs(float(p) - float(r)) / float(r) < 1e-2):
        correct.append(True)
    else:
        correct.append(False)

In [20]:
(sum(correct) / len(answers)) * 100

70.22222222222221