In [1]:
import os
import re
import json
import random
import sys
import pickle
import datetime
import asyncio
import nest_asyncio

import gurobipy as gp

from openai import OpenAI, AsyncClient
from json import JSONDecodeError
from tqdm.auto import tqdm
from colorama import Fore, Style
from pydantic import BaseModel
from typing import List
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.llms.lmstudio import LMStudio

sys.path.append('../')
from utils import *

nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm



In [3]:
DATA_DIR = '../data'
DATASET_NAME = 'NL4OPT' 
OUTPUT_DIR = '../output'  

dt = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')

nl4opt_data = read_txt_file(os.path.join(DATA_DIR, DATASET_NAME, 'nl4opt.txt'))
questions, answers = get_nl4opt_qas(nl4opt_data)
assert len(questions) == len(answers)

qa_pairs = list(zip(questions, answers))
demo_samples, test_samples = get_demo_and_test_samples(qa_pairs)

questions = [q for q, _ in demo_samples]
answers = [a for _, a in demo_samples]

[32m2024-09-18 17:18:43.814[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m15[0m - [34m[1mReading file: ../data\NL4OPT\nl4opt.txt[0m
[32m2024-09-18 17:18:43.815[0m | [34m[1mDEBUG   [0m | [36mutils[0m:[36mread_txt_file[0m:[36m17[0m - [34m[1mFile read successfully: ../data\NL4OPT\nl4opt.txt[0m
[32m2024-09-18 17:18:43.816[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m36[0m - [1mNumber of questions: 245[0m
[32m2024-09-18 17:18:43.817[0m | [1mINFO    [0m | [36mutils[0m:[36mget_nl4opt_qas[0m:[36m37[0m - [1mNumber of answers: 245[0m
[32m2024-09-18 17:18:43.817[0m | [1mINFO    [0m | [36mutils[0m:[36mget_demo_and_test_samples[0m:[36m48[0m - [1mNumber of demo samples: 20[0m
[32m2024-09-18 17:18:43.817[0m | [1mINFO    [0m | [36mutils[0m:[36mget_demo_and_test_samples[0m:[36m49[0m - [1mNumber of test samples: 225[0m


In [4]:
questions

['A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?',
 'Ben is growing apples and pears on his orchard. He has 50 acres available on which he must grow a minimum of 5 acres of apples and a minimum of 10 acres of pears to meet demands. The profit per apple is $2 and the profit per pear is $4. He prefers to grow more pears than apples but limitations in his workforce allow him to grow at most twice the amount of pears as apples. How many of each fruit

In [5]:
llm = LMStudio(
    model_name="bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF",
    base_url="http://localhost:1234/v1",
    temperature=0.0,
)

In [6]:
class Code(BaseModel):
    reasoning: List[str]
    code: str     

In [17]:
prompt_template_str = """You are an expert in optimization problems and domain specific language generation. 
Your task is to convert the textual optimization text into a piece of code.
DO NOT ADD ANY COMMENTS OR EXPLANATION TO THE CODE. JUST OUTPUT THE CODE.
Here are some examples that you should refer to:\n"""

example = """
QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE:
x = m.addVar(name="Oil Max", vtype=gp.GRB.INTEGER)
y = m.addVar(name="Oil Max Pro", vtype=gp.GRB.INTEGER)
m.setObjective(10 * x + 15 * y, gp.GRB.MAXIMIZE)
m.addConstr(46 * x + 13 * y <= 1345)
m.addConstr(43 * x + 4 * y <= 346)
m.addConstr(56 * x + 45 * y <= 1643)

QUESTION:
Ben is growing apples and pears on his orchard. He has 50 acres available on which he must grow a minimum of 5 acres of apples and a minimum of 10 acres of pears to meet demands. The profit per apple is $2 and the profit per pear is $4. He prefers to grow more pears than apples but limitations in his workforce allow him to grow at most twice the amount of pears as apples. How many of each fruit should Ben grow in order to maximize his profit? What is that profit?
CODE:
x = m.addVar(name="apples", vtype=gp.GRB.INTEGER)
y = m.addVar(name="pears", vtype=gp.GRB.INTEGER)
m.setObjective(2 * x + 4 * y, gp.GRB.MAXIMIZE)
m.addConstr(x + y <= 50)
m.addConstr(x >= 5)
m.addConstr(y >= 10)
m.addConstr(y <= 2 * x)
"""

prompt_template_str = prompt_template_str + example + "\nQUESTIOM: {q}"
print(prompt_template_str)

You are an expert in optimization problems and domain specific language generation. Your task is to convert the textual optimization text into a piece of code.
DO NOT ADD ANY COMMENTS OR EXPLANATION TO THE CODE. JUST OUTPUT THE CODE.
Here are some examples that you should refer to:

QUESTION:
A car manufacturer makes two types of car oils: Oil Max and Oil Max Pro. A container of Oil Max contains 46 grams of substance A, 43 grams of substance B and 56 grams of substance C. A container of Oil Max Pro contains 13 grams of substance A, 4 grams of substance B and 45 grams of substance C. The car manufacturer has 1345 grams of substance A, 346 grams of substance B, 1643 grams of substance C. In addition, the profit per container of Oil Max is $10 and the profit per container of Oil Max Pro is $15. How many containers of each of oil should the car manufacturer make to maximize profit?
CODE:
x = m.addVar(name="Oil Max", vtype=gp.GRB.INTEGER)
y = m.addVar(name="Oil Max Pro", vtype=gp.GRB.INTEGE

In [8]:
program = LLMTextCompletionProgram.from_defaults(
    output_cls=Code,
    prompt_template_str=prompt_template_str,
    llm=llm,
    verbose=True,
)

In [18]:
# batch_size = 8
# codes = []
# for i in tqdm(range(0, len(questions), batch_size)):
#     batch = questions[i:i+batch_size]
    
#     tasks = [program(q=q) for q in batch]
    
#     combined_responses = await asyncio.gather(*tasks)
#     codes.extend([r.code for r in combined_responses])

In [20]:
codes = []
for i, q in tqdm(enumerate(questions)):
    codes.append(program(q=q))

20it [05:21, 16.05s/it]


In [21]:
filename = 'e2e_codegen_localllm_nl4opt_gurobi_' + dt + '.pkl'
with open(os.path.join(OUTPUT_DIR, filename), 'wb') as f:
    pickle.dump(codes, f)

In [22]:
code_strs = [codes[i].code for i in range(len(codes))]

In [None]:
prefix = """
import gurobipy as gp
env = gp.Env(empty=True)
env.setParam("OutputFlag",0)
env.start()
m = gp.Model(env=env)
"""
                
suffix = """
m.optimize()
"""

def complement_code(code: str) -> float:
    return prefix + code + suffix

In [40]:
def clean_code(code: str) -> str:
    cleand_code = []
    for line in code.split('\n'):
        line = line.strip()
        if line.startswith('m.addConstr') and not re.findall(r'<=|>=', line):
            line = re.sub(r'<', r'<=', line)
            line = re.sub(r'>', r'>=', line)
        cleand_code.append(line)
    cleand_code = '\n'.join(cleand_code)
    cleand_code = cleand_code.replace(')m', ')\nm')
    return cleand_code

In [41]:
def execute_code(code: str) -> float:
    ex_locals = {}
    exec(code, None, ex_locals)
    
    try:
        return ex_locals["m"].objVal
    except Exception as e:
        # print(e)
        return np.inf

In [42]:
pred_answers = []
for i, code_str in enumerate(code_strs):
    try:
        cleaned_code = clean_code(code_str)
        code = complement_code(cleaned_code)
        ans = execute_code(code)
        loguru.logger.info(f"question {i} obtain answer")
        pred_answers.append(ans)
    except Exception as e:
        loguru.logger.error(f"Error for question {i}: {e}")
        pred_answers.append("Error")

[32m2024-09-17 21:46:22.188[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 0 obtain answer[0m
[32m2024-09-17 21:46:22.189[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 1 obtain answer[0m
[32m2024-09-17 21:46:22.191[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 2 obtain answer[0m
[32m2024-09-17 21:46:22.193[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 3 obtain answer[0m
[32m2024-09-17 21:46:22.195[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 4 obtain answer[0m
[32m2024-09-17 21:46:22.196[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 5 obtain answer[0m


Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 10.0 (19045.2))

CPU model: AMD Ryzen 7 5700X 8-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 3 rows, 2 columns and 5 nonzeros
Model fingerprint: 0x0bcfab34
Variable types: 0 continuous, 2 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+01]
  Objective range  [2e+00, 5e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [2e+00, 4e+02]
Presolve removed 1 rows and 0 columns
Presolve time: 0.00s
Presolved: 2 rows, 2 columns, 4 nonzeros
Variable types: 0 continuous, 2 integer (0 binary)
Found heuristic solution: objective 101.0000000
Found heuristic solution: objective 100.0000000

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 16 (of 16 available processors)

Solution count 2: 100 101 

Optimal solution found (tolerance 1.00e-04)
Best objective 1.00

[32m2024-09-17 21:46:22.206[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 6 obtain answer[0m
[32m2024-09-17 21:46:22.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 7 obtain answer[0m
[32m2024-09-17 21:46:22.210[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 8 obtain answer[0m
[32m2024-09-17 21:46:22.213[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 9 obtain answer[0m
[32m2024-09-17 21:46:22.215[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 10 obtain answer[0m
[32m2024-09-17 21:46:22.217[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 11 obtain answer[0m
[32m2024-09-17 21:46:22.219[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 12 obtain answer[0m
[32m2024-09-17 21:46:22.220[0m | [1mINFO  

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 10.0 (19045.2))

CPU model: AMD Ryzen 7 5700X 8-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 3 rows, 2 columns and 6 nonzeros
Model fingerprint: 0xd6f98ba5
Variable types: 0 continuous, 2 integer (0 binary)
Coefficient statistics:
  Matrix range     [4e+00, 6e+01]
  Objective range  [1e+01, 2e+01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e+02, 2e+03]
Found heuristic solution: objective 80.0000000
Presolve removed 3 rows and 2 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 1 (of 16 available processors)

Solution count 2: 540 80 

Optimal solution found (tolerance 1.00e-04)
Best objective 5.400000000000e+02, best bound 5.400000000000e+02, gap 0.0000%


[32m2024-09-17 21:46:22.237[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 17 obtain answer[0m
[32m2024-09-17 21:46:22.241[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 18 obtain answer[0m
[32m2024-09-17 21:46:22.243[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mquestion 19 obtain answer[0m


In [52]:
questions[5]

'A suspicious factory has 100 sq. feet of space. It makes bootleg phones and laptops. Phones require 2 hours of labor and cost $12 for each sq. foot of space allocated for phone production (cost of electricity and equipment). Laptops require 3 hours of labor and cost $15 for each sq. foot of space allocated for laptop production. Phones produce a net revenue of $50 per sq. foot while laptops produce a net revenue of $70 per sq. foot. The factory wants to spend at most $5000 and 2000 hours of labor. What is the optimal factory layout to maximize revenue?'

In [51]:
print(complement_code(clean_code(code_strs[5])))


import gurobipy as gp
env = gp.Env(empty=True)
env.setParam("OutputFlag",0)
env.start()
m = gp.Model(env=env)
x = m.addVar(name="phones", vtype=gp.GRB.INTEGER)
y = m.addVar(name="laptops", vtype=gp.GRB.INTEGER)
m.setObjective(50 * x + 70 * y, gp.GRB.MAXIMIZE)
m.addConstr(2 * x + 3 * y <= 100) # space constraint
m.addConstr(2 * x * 12 + 3 * y * 15 <= 5000) # cost constraint
m.addConstr(2 * x + 3 * y <= 2000) # labor hours constraint
m.optimize()



In [43]:
print(pred_answers)

[540.0, 166.0, 950.0, 36900.0, 342750.0, 2500.0, 100.0, 11980.0, 480.0, 142.0, 465.0, inf, 67.0, inf, 1500.0, 509.0, 1060.0, 540.0, 20.0, 50.0]


In [44]:
print(answers)

['540.0', '166.66666666666669', '950.0', '36900.0', '342750.0', '7000.0', '100.0', '11980.0', '480.0', '142.0', '465.0', inf, '67.0', inf, '1500.0', '511.42857142857133', '1060.0', '2500.0', '20.0', '-99999']


In [54]:
print(f"Accuracy under error {1e-2}: {sum(mark(pred_answers, answers, 1e-2)) / len(answers) * 100}")
print(f"Accuracy under error {1e-4}: {sum(mark(pred_answers, answers, 1e-4)) / len(answers) * 100}")

Accuracy under error 0.01: 90.0
Accuracy under error 0.0001: 80.0
