# Environment Configuration

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain_classic.chains import RetrievalQA
from langchain_classic.agents import initialize_agent, AgentType, Tool
import re
from datetime import time
from langchain_core.messages import HumanMessage
import openai
import requests
import time
import numpy as np
import os
import pandas as pd
import sys
import time
from io import StringIO
import contextlib

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

user_api_key = "Your OpenAI API Key"  # Replace with your OpenAI API key

# Main Model

## Classification

In [None]:
llm1 = ChatOpenAI(
    temperature=0.0, model_name="gpt-4", openai_api_key=user_api_key
)

loader = CSVLoader(file_path="Large_Scale_Or_Files/RefData.csv", encoding="utf-8")
data = loader.load()
documents = data
embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
vectors = FAISS.from_documents(documents, embeddings)

retriever = vectors.as_retriever(search_kwargs={'k': 5})
qa_chain = RetrievalQA.from_chain_type(
    llm=llm1,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)
qa_tool = Tool(
    name="FileQA",
    func=qa_chain.invoke,
    description=(
        "Use this tool to answer questions about the problem type of the text. "
    ),
)

few_shot_examples_csv = """

Query: What is the problem type in operation of the text? Please give the answer directly. Text:There are three best-selling items (P1, P2, P3) on Amazon with the profit w_1,w_2,w_3.There is an independent demand stream for each of the products. The objective of the company is to decide which demands to be fufilled over a ﬁnite sales horizon [0,10] to maximize the total expected revenue from ﬁxed initial inventories. The on-hand inventories for the three items are c_1,c_2,c_3 respectively. During the sales horizon, replenishment is not allowed and there is no any in-transit inventories. Customers who want to purchase P1,P2,P3 arrive at each period accoring to a Poisson process with a_1,a_2,a_3 the arrival rates respectively. Decision variables y_1,y_2,y_3 correspond to the number of requests that the firm plans to fulfill for product 1,2,3. These variables are all positive integers.

Thought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type in operation of the text? text:There are three best-selling items (P1, P2, P3) on Amazon with the profit w_1, w_2, w_3. ..."

Observation: The problem type of the text is Network Revenue Management.

Thought: The problem type Network Revenue Management is in the allowed list [Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, Assignment Problem, , Others without CSV]. I could get the final answer and finish.

Final Answer: Network Revenue Management.

---
Query: What is the problem type in operation of the text? Please give the answer directly. Text:A supermarket needs to allocate various products, including high-demand items like the Sony Alpha Refrigerator, Sony Bravia XR, and Sony PlayStation 5, across different retail shelves. The product values and space requirements are provided in the "Products.csv" dataset. Additionally, the store has multiple shelves, each with a total space limit and specific space constraints for Sony and Apple products, as outlined in the "Capacity.csv" file. The goal is to determine the optimal number of units of each Sony product to place on each shelf to maximize total value while ensuring that the space used by Sony products on each shelf does not exceed the brand-specific limits. The decision variables x_ij represent the number of units of product i to be placed on shelf j.

Thought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type in operation of the text? Text:A supermarket needs to allocate various products, including high-demand items like the Sony Alpha Refrigerator, Sony Bravia XR, ...."

Observation: The problem type of the text is Inventory Management.

Thought: The problem type Inventory Management is not in the allowed list [Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, Assignment Problem, Others with CSV, Others without CSV]. Therefore, the problem type should be Others with CSV - Inventory management
Final Answer: Others with CSV - Inventory management

"""

few_shot_examples_without_csv = """
Query: A book distributor needs to shuffle a bunch of books from two warehouses (supply points: W1, W2) to libraries (demand points: L1, L2), using a pair of sorting centers (transshipment points: C1, C2). W1 has a stash of up to p_1 books per day it can send out. W2 can send out up to p_2 books daily. Library L1 needs a solid d_1 books daily. L2 requires d_2 books daily. Storage at the sorting centers has no cap. Transportation costs: From W1 to C1 is t_11 dollars, to C2 is t_12 dollars. From W2 to C1 is t_21 dollars, and to C2 it__ t_22 dollars. From the centers to the libraries: From C1 to L1, it__l cost t_31 dollars, to L2 it__ t_32 dollars. From C2 to L1, it__ t_41 dollars, to L2 it__ t_42 dollars. The strategy here is all about minimizing transportation spend while making sure those libraries get their books on time. We__l use x_11 and x_12 to track shipments from W1 to C1 and C2, and x_21 and x_22 for shipments from W2. For the books going out to the libraries, y_11 and y_12 will handle the flow from C1 to L1 and L2, and y_21 and y_22 from C2. Variables are all positive integers.

Thought: I need to determine the problem type of the text. The Query doesn't contain any descriptions like '.csv' and 'column'. I'll direcrly classify the problem type as 'Others without CSV'.

Final Answer: Others without CSV

"""
prefix = f"""I am a helpful assistant that can answer Querys about operation problems. My response must align with one of the following categories: Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, SBLP, Others with CSV, and Others without CSV. Firstly you need to identify whether the text contains any descriptions like '.csv' and 'column'.

Always remember! If the input does not contain any description like '.csv' and 'column', and the values for all the variables are given directly, I will directly classify the problem type as 'Others without CSV'. Like the example {few_shot_examples_without_csv}. 

However, if the text contains descriptions like '.csv' or 'column', and the values for all the variables are not given directly, I will use the following examples {few_shot_examples_csv} as a guide. And answer the Query by given the answer directly.

"""

suffix = """

Begin!

Query: {input}
{agent_scratchpad}"""

classification_agent = initialize_agent(
    tools=[qa_tool],
    llm=llm1,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    agent_kwargs={
        "prefix": prefix,
        "suffix": suffix,
    },
    verbose=True,
    handle_parsing_errors=True,  
)
openai.api_request_timeout = 60  

## Large Scale OR

### NRM

In [None]:


def retrieve_similar_docs(query,retriever):
    
    similar_docs = retriever.invoke(query)

    results = []
    for doc in similar_docs:
        results.append({
            "content": doc.page_content,
            "metadata": doc.metadata
        })
    return results


def process_dataset_address(dataset_address: str) -> List[Document]:

    documents = []
    file_addresses = dataset_address.strip().split('\n')  
    for file_idx, file_address in enumerate(file_addresses, start=1):
        try:
            df = pd.read_csv(file_address.strip())  
            file_name = file_address.strip().split('/')[-1]  
            for row_idx, row in df.iterrows():
                page_content = ", ".join([f"{col} = {row[col]}" for col in df.columns])
                documents.append(Document(page_content=page_content))
                
        except Exception as e:
            print(f"Error processing file {file_address}: {e}")
            continue
    
    return documents

def get_NRM_response(query,dataset_address):
    retrieve='product'
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_NRM2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 1})
    few_shot_examples = []

    similar_results = retrieve_similar_docs(query,retriever)

    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()  
        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()
        information = pd.read_csv(data_address)
        information_head = information[:36]

        example_data_description = "\nHere is the product data:\n"
        for i, r in information_head.iterrows():
            example_data_description += f"Product {i + 1}: {r['Product Name']}, revenue w_{i + 1} = {r['Revenue']}, demand rate a_{i + 1} = {r['Demand']}, initial inventory c_{i + 1} = {r['Initial Inventory']}\n"


        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append(f"""

Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order, row by row. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, construct the objective function and constraints using the retrieved data as parameters of the formula. Ensure to include any additional detailed constraints present in the problem description. Always pay attention to the variable type. If not mentioned, use nonnegative integer. Do NOT include any explanations, notes, or extra text. Format the expressions strictly in markdown ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address)  
            file_name = file_address.split('/')[-1] 
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        for i, r in df.iterrows():
            description = ""
            description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
            data.append(description + "\n")
    document=data
   
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(document, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 1000})
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1, openai_api_key=user_api_key)

    system_prompt = (
        "Retrieve the documents in order, row by row. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']


### RA

In [None]:
def get_RA_response(query,dataset_address):

    retrieve="product"
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_RA2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 3})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']

        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()

        datas=data_address.split()
        information = []

        for data in datas:
            information.append(pd.read_csv(data))
        example_data_description = "\nHere is the data:\n"
        for df_index, df in enumerate(information):
            if df_index == 0:
                example_data_description += f"\nDataFrame {df_index + 1} - Capacity\n"
            elif df_index == 1:
                example_data_description += f"\nDataFrame {df_index + 1} - Products\n"

            for z, r in df.iterrows():
                description = ""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                example_data_description += description + "\n"
        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula according to the retrieved 'product id'. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1]  
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        if file_name=='products.csv' or file_name=='Products.csv':
            for i, r in df.iterrows():
                description = f"Product id: {i+1}; "
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                data.append(description + "\n")
        else:
            for i, r in df.iterrows():
                description = f""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                data.append(description + "\n")

    
    documents = [content for content in data]
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 220})
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1',top_p=1,n = 1, openai_api_key=user_api_key)


    system_prompt = (
        "Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']



### TP

In [None]:
def get_TP_response(query,dataset_address):
    retrieve="capacity data and products data, "
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_TP2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 3})
    few_shot_examples = []
    similar_results = retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()

        datas=data_address.split()
        information = []

        for data in datas:
            information.append(pd.read_csv(data))
        example_data_description = "\nHere is the data:\n"
        for df_index, df in enumerate(information):
            if df_index == 0:
                example_data_description += f"\nDataFrame {df_index + 1} - Customer Demand\n"
            elif df_index == 1:
                example_data_description += f"\nDataFrame {df_index + 1} - Supply Capacity\n"
            elif df_index == 2:
                example_data_description += f"\nDataFrame {df_index + 1} - Transportation Cost\n"

            for z, r in df.iterrows():
                description = ""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                example_data_description += description + "\n"
            retrieve += ', '.join(df.columns)+', '
        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")
    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        for i, r in df.iterrows():
            description = ""
            description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
            data.append(description + "\n")

    print(data)

    documents = [content for content in data]
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 300})

    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1,openai_api_key=user_api_key)


    system_prompt = (
        "Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']


### AP

In [None]:
def get_AP_response(query,dataset_address):
    retrieve=''
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_AP2_MD.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(max_tokens_limit=400,search_kwargs={'k': 1})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        file_addresses = data_address.strip().split('\n')
        dfs = []
        df_index = 0
        example_data_description = " "
        for file_address in file_addresses:
            try:
                df = pd.read_csv(file_address) 
                file_name = file_address.split('/')[-1]  
                matrix = df.iloc[:,1:].values
                example_data_description +="C=" + np.array_str(matrix)+ "."
                dfs.append((file_name, df))
            except Exception as e:
                print(f"Error reading file {file_address}: {e}")
        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 
        label = label.replace("{", "{{").replace("}", "}}")
        Related=''
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula according to the retrieved 'product id'. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")
        
    data = []
    dfs=[]
    file_addresses = dataset_address.strip().split('\n')
    df_index = 0
    data_description = " "
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            matrix = df.iloc[:,1:].values
            data_description +="C=" + np.array_str(matrix)+ "."
            dfs.append((file_name, df))
            df_index += 1
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts([data_description], embeddings)

    retriever = vectors.as_retriever(max_tokens_limit=400, search_kwargs={'k': 1}) 
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1,openai_api_key=user_api_key)

    system_prompt = (
        "Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical model based on the user's description and provided CSV data.

            Please refer to the following example and generate the answer in the same format:

            {few_shot_examples}

            Note: Please retrieve all neccessary information from the CSV file to generate the answer. When you generate the answer, please output required parameters in a whole text, including all vectors and matrices.

            When you need to retrieve information from the CSV file, use the provided tool.

            """

    suffix = """

            Begin!

            User Description: {input}
            {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True
    )

    result = agent2.invoke(query)
    output = result['output']
    return output


### FLP

In [None]:
def get_FLP_response(query,dataset_address):
    retrieve='supplier'
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_FLP2_MD.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(max_tokens_limit=400,search_kwargs={'k': 1})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        file_addresses = data_address.strip().split('\n')
        dfs = []
        df_index = 0
        example_data_description = " "
        for file_address in file_addresses:
            try:
                df = pd.read_csv(file_address) 
                file_name = file_address.split('/')[-1]  
                if 'demand' in df.columns:
                    result = df['demand'].values.tolist()
                    example_data_description += "d=" + str(result) + "\n"
                elif 'fixed_costs' in df.columns:
                    result = df['fixed_costs'].values.tolist()
                    example_data_description +="c=" + str(result) + "\n"
                elif df_index == 2:
                    matrix = df.iloc[:,1:].values
                    example_data_description +="A=" + np.array_str(matrix)+ "."
                else:
                    for row_idx, row in df.iterrows():
                        example_data_description += ", ".join([f"{col} = {row[col]}" for col in df.columns])
                df_index += 1
                dfs.append((file_name, df))
            except Exception as e:
                print(f"Error reading file {file_address}: {e}")
        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 
        label = label.replace("{", "{{").replace("}", "}}")
        Related=''

        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]
    file_addresses = dataset_address.strip().split('\n')
    df_index = 0
    data_description = " "
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            if 'demand' in df.columns:
                result = df['demand'].values.tolist()
                data_description += "d=" + str(result) + "\n"
            elif 'fixed_costs' in df.columns:
                result = df['fixed_costs'].values.tolist()
                data_description +="c=" + str(result) + "\n"
            elif df_index == 2:
                matrix = df.iloc[:,1:].values
                data_description +="A=" + np.array_str(matrix)+ "."
            else:
                for row_idx, row in df.iterrows():
                    data_description += ", ".join([f"{col} = {row[col]}" for col in df.columns])
            df_index += 1
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts([data_description], embeddings)

    retriever = vectors.as_retriever(max_tokens_limit=400, search_kwargs={'k': 1}) 
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1, openai_api_key=user_api_key)
    
    system_prompt = (
        "Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical model based on the user's description and provided CSV data.

            Please refer to the following example and generate the answer in the same format:

            {few_shot_examples}

            Note: Please retrieve all neccessary information from the CSV file to generate the answer. When you generate the answer, please output required parameters in a whole text, including all vectors and matrices.

            When you need to retrieve information from the CSV file, use the provided tool.

            """

    suffix = """
            Begin!

            User Description: {input}
            {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True
    )

    result = agent2.invoke(query)
    output = result['output']
    return output


### Others With CSV

In [None]:
import re
import pandas as pd
import numpy as np
import os
from typing import Union
from langchain_classic.chains import LLMChain
from langchain_core.prompts import PromptTemplate

def escape_braces(text: str) -> str:
    return text.replace("{", "{{").replace("}", "}}")

OTHERS_RAG_PATH = "Large_Scale_Or_Files/RAG_Example_Others.csv"
embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
Others_docs = CSVLoader(file_path=OTHERS_RAG_PATH, encoding="utf-8").load()
Others_store: FAISS = FAISS.from_documents(Others_docs, embeddings)


def retrieve_examples(store, query: str, k: int = 1):
    return store.as_retriever(search_kwargs={"k": k}).invoke(query)

def build_few_shot_Other(store, user_query: str, k: int = 1, t='Model'):
    """
    (From your code)
    Build few-shot block.
    [IMPORTANT]: This function assumes your CSV example file (RAG_Example_Others.csv)
    has 'prompt', 'Data_address', 'Label', and 'Label_Code' columns.
    You must update your CSV file to match this structure.
    """
    examples = []
    
    # retrieve k most-similar reference examples
    for doc in retrieve_examples(store, user_query, k=k):
        txt = doc.page_content
        
        try:
            if "prompt:" not in txt:
                parts = re.split(r'(Data_address:|Label:|Label_Code:)', txt, flags=re.IGNORECASE)
                prompt_part = parts[0].replace("prompt:", "").strip()
                data_addr = parts[2].strip()
                label_model = parts[4].strip()
                label_code = parts[6].strip()
                
            else:
                prompt_part = txt.split("Data_address:")[0].replace("prompt:", "").strip()
                data_addr = txt.split("Data_address:")[1].split("Label:")[0].strip()
                label_model = txt.split("Label:")[1].split("Label_Code:")[0].strip()
                label_code = txt.split("Label_Code:")[1].split("Related:")[0].strip() # Assuming "Related:" is last

        except Exception as e:
            print(f"[build_few_shot_Other Error] Failed to parse example: {e}. Content: {txt[:50]}")
            continue

        data_blocks = []
        for fp in map(str.strip, data_addr.splitlines()):
            if not fp:
                continue
            try:
                header = f"[{os.path.basename(fp)} | (Example Schema)]"
                rows = "col1, col2, col3\n1, 2, 3\n4, 5, 6" # Placeholder schema
                data_blocks.append(header + "\n" + rows)
            except Exception as e:
                data_blocks.append(f"[Could not read example data {fp}: {e}]")

        data_section = "\n".join(data_blocks) if data_blocks else "[No data found]"
        if t == 'Model':
            label_part = label_model
            ex = (
                "<EXAMPLE>\n"
                f"Question: {prompt_part}\n"
                "Thought: I need to create an Abstract Model Plan based on the user's query and the CSV Schema (data structure)\n"
                "Final Answer:\n"
                f"{label_part}\n"
                "</EXAMPLE>"
            )
            examples.append(escape_braces(ex))
        else:
            label_part = label_code
            ex = (
                "<EXAMPLE>\n"
                f"Question: {prompt_part}\n"
                "Thought: I need to generate a single, complete, executable Gurobi Python code block.\n"
                "Final Answer:\n"
                f"{label_part}\n"
                "</EXAMPLE>"
            )
            examples.append(escape_braces(ex))
        new_example = escape_braces(ex)
        print(f"[build_few_shot_Other] Added example for prompt: {new_example}...")

    return "\n\n".join(examples)



def get_csv_schema(file_paths_string: str) -> str:
    """
    Receives a string with one or more file paths,
    parses the paths, and returns the schema for all files.
    """
    paths = re.split(r'[\s\n]+', file_paths_string)
    all_schema_info = []
    for path in paths:
        if not path.strip(): continue
        try:
            try:
                df = pd.read_csv(path, nrows=10, encoding='utf-8')
            except UnicodeDecodeError:
                df = pd.read_csv(path, nrows=10, encoding='gbk')
            
            schema_info = (
                f"Successfully read first 10 rows from {path}.\n"
                f"Columns: {df.columns.to_list()}\n\n"
                f"Data Head:\n{df.to_string()}"
            )
            all_schema_info.append(schema_info)
        except Exception as e:
            error_info = (
                f"Error reading CSV at {path}: {e}. "
                "You must formulate the model/code based on the file path and assumed column names."
            )
            all_schema_info.append(error_info)
    if not all_schema_info:
        return "Error: No valid file paths were provided."
    return "\n\n---\n\n".join(all_schema_info)

def get_Others_response(user_query: str, dataset_address: str) -> str:
    llm2 = ChatOpenAI(
    temperature=0.0, model_name="gpt-4.1", openai_api_key=user_api_key
    )

    try:
        print("\n[Gurobi Pipeline Step 1/3]: Getting CSV Scheme...")

        schema = get_csv_schema(dataset_address)        
        print("\n[Gurobi Pipeline Step 2/3]: Constructing Abstract Model...")

        few_shot_block_abstract = build_few_shot_Other(Others_store, user_query, k=3, t='Model')
        print(f"[Few-Shot Abstract Examples]:\n{few_shot_block_abstract}...")
        
        abstract_model_template = """
You are an expert optimization modeler.
Your task is to create an "Abstract Model Plan" based on the user's query and the CSV Schema (data structure).
This plan is *not* Gurobi code or mathematical formulas, but a clear, step-by-step reasoning process in English.

[Examples]
{few_shot_examples}

[Current Task]
User Query: {query}

CSV Schema:
{schema}

[Your Output]
You must strictly follow this format for your "Abstract Model Plan" output:

[Abstract Model Plan START]
1.  **Analyze Query:** The user wants to {{{{Your analysis}}}}.
2.  **Identify Model Type:** Based on the query, this is a {{{{e.g., LP, MIP, Fixed-Charge, Blending}}}} problem.
3.  **Define Index Sets:** The primary indices are {{{{e.g., Products, Workers, Sources, Destinations}}}}.
4.  **Define Decision Variables:**
    -   `x[i]` = {{{{Describe first variable, e.g., 'quantity of product i'}}}}. Type: {{{{GRB.CONTINUOUS / GRB.INTEGER}}}}.
    -   `y[i]` = {{{{Describe second variable, e.g., 'if product i is produced'}}}}. Type: {{{{GRB.BINARY}}}}.
5.  **Identify Parameters (from Schema):**
    -   Objective coefficients (e.g., profit) will come from column(s): {{{{e.g., 'Price', 'Production Cost'}}}}.
    -   Constraint coefficients (e.g., resource use) will come from: {{{{e.g., 'Resource 1', 'Available Time'}}}}.
    -   Constraint RHS (limits) will come from: {{{{e.g., 'Max Demand'}}}}.
6.  **Formulate Objective:** {{{{Describe objective, e.g., 'Maximize sum((schema['Price'] - schema['Cost']) * x[i] - schema['FixedCost'] * y[i])'}}}}.
7.  **Formulate Constraints:**
    -   Constraint 1 (e.g., Resource Limit): {{{{Describe constraint 1, e.g., 'sum(schema['Resource 1'][i] * x[i]) <= schema['Available']'}}}}.
    -   Constraint 2 (e.g., Linking): {{{{Describe constraint 2, e.g., 'x[i] <= M * y[i])'}}}}.
    -   ... (Other constraints) ...
[Abstract Model Plan END]
"""
        abstract_prompt = PromptTemplate(
            template=abstract_model_template,
            input_variables=["query", "schema", "few_shot_examples"]
        )
        
        abstract_model_chain = LLMChain(llm=llm2, prompt=abstract_prompt)
        
        abstract_model_plan = abstract_model_chain.run(
            query=user_query,
            schema=schema,
            few_shot_examples=few_shot_block_abstract
        )
        print(f"[Observation]:\n{abstract_model_plan}")
        
        print("\n[Gurobi Pipeline Step 3/3]: Generating Gurobi Code...")
        
        few_shot_block_code = build_few_shot_Other(Others_store, user_query, k=3, t='Code')
        
        code_gen_template = """
You are an expert Gurobi programmer.
Your task is to strictly follow the User Query, CSV Schema, and "Abstract Model Plan" to translate them into a single, complete, executable Gurobi Python code block.
The code must start with ```python and end with ```.
The code must include all necessary imports (`gurobipy`, `pandas`, `numpy`, `sys`, `re`).
The code must include a `try...except` block to handle errors.
The code must robustly read '{dataset_address}' (if it's multiple paths, read the first).
The code must *fully* implement all variables, objectives, and constraints from the "Abstract Model Plan".

[Examples of the Full Process]
{few_shot_examples}

[User Query]
{query}

[CSV Schema]
{schema}

[Abstract Model Plan]
{abstract_plan}

[Your Gurobi Code]
"""

        code_gen_prompt = PromptTemplate(
            template=code_gen_template,
            input_variables=["query", "schema", "abstract_plan", "dataset_address", "few_shot_examples"]
        )
        
        code_gen_chain = LLMChain(llm=llm2, prompt=code_gen_prompt)
        
        final_code = code_gen_chain.run(
            query=user_query,
            schema=schema,
            abstract_plan=abstract_model_plan,
            dataset_address=dataset_address,
            few_shot_examples=few_shot_block_code 
        )
        
        if "```python" in final_code:
            code_block = final_code.split("```python", 1)[1]
            if "```" in code_block:
                code_block = code_block.split("```", 1)[0]
            final_answer = "```python\n" + code_block.strip() + "\n```"
        else:
            if not final_code.strip().startswith("import"):
                print(f"[Warning] Step 3 output was not a valid code block. Output: {final_code[:200]}...")
                final_answer = f"Error: Code generation failed. LLM returned non-code output:\n{final_code}"
            else:
                print("[Warning] Step 3 output missed ```python tag, adding it.")
                final_answer = "```python\n" + final_code.strip() + "\n```"
            
        print(f"[Final Answer]:\n{final_answer}")
        return final_answer

    except Exception as e:
        print(f"[Gurobi Pipeline Error]: {e}")
        return f"Error: The Gurobi Code Generation pipeline failed with error: {e}"

## Others Without CSV

In [None]:
def get_others_without_CSV_response(query):
    llm = ChatOpenAI(
                    temperature=0.0, model_name="gpt-4.1", top_p=1, n = 1, openai_api_key=user_api_key
                )

    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_Others_Without_CSV.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 5})

    
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
    )

    qa_tool = Tool(
        name="ORLM_QA",
        func=qa_chain.invoke,
        description=(
            "Use this tool to answer Querys."
            "Provide the Query as input, and the tool will retrieve the relevant information from the file and use it to answer the Query."
        ),
    )

    few_shot_examples = []
    similar_results = retrieve_similar_docs(query,retriever)

    for i, result in enumerate(similar_results, 1):
        content = result['content']

        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 

        split_at_type = split_at_address[1].split("problem type:", 1)
        Related = split_at_type[0].strip() 

        selected_problem = split_at_type[1].strip()

        label = label.replace("{", "{{").replace("}", "}}")



        example = (
            "<EXAMPLE>\n"
            f"Question: {problem_description}\n\n"
            "Thought: Read the question and 1) identify the goal (minimize time/cost/crew or maximize throughput/value) and collect per-unit coefficients; 2) define decision variables and pick domains: counts of trips/units/vehicles are nonnegative integers, yes or no choices are binary, divisible flows or weights are nonnegative reals; 3) write the linear objective from the coefficients; 4) add constraints in this order: demand or target (exactly, at least, at most), capacity or supply upper bounds, flow conservation or stage linking across nodes or arcs, and any share or ratio limits rewritten as linear inequalities using the given per-unit rates, plus any minimum or maximum usage; 5) add nonnegativity and the chosen integrality or binary domains; 6) output only the LP: objective first, then constraints line by line with brief labels if needed, then a final line stating the variable domains."
            "Final Answer:\n"
            f"{label}\n"
            "</EXAMPLE>"
        )


        example = example.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append(example)
#         few_shot_examples.append( """
# Example (INTEGER trigger):
# Question: A factory can run two machine types. How many of each machine should be installed given budget and space limits? Maximize output.
# Final Answer: 
# $\\max\\; p_1 x_1 + p_2 x_2$
# $\\text{{s.t. }} a_1 x_1 + a_2 x_2 \\le B,\\; s_1 x_1 + s_2 x_2 \\le S$
# $x_1, x_2 \\in \\mathbb{{Z}}_+.$

# Example (MULTI-PERIOD FLOW trigger):
# Question: Multi-period production with inventory and backorders. Costs for production/holding/backorder. Initial and terminal conditions given.
# Final Answer:
# \textbf{{Indices: }} t\in T=\{{1,\dots,n\}}. \\
# \textbf{{Given: }} d_t,\ I_0,\ B_0,\ \dots \\
# \textbf{{Vars: }} x_t\ge0,\ I_t\ge0,\ B_t\ge0. \\
# \min \sum_t (c x_t + h I_t + p B_t) \\
# \text{{s.t. }} I_t - B_t = I_{{t-1}} - B_{{t-1}} + x_t - d_t,\ \forall t \\
# I_n \ge I^{{\min}},\ B_n=0.

# Example (LOGIC+BINARY trigger):
# Question: Choose exactly one option from set P and at least K items from set V, with quantities and budget.
# Final Answer:
# \textbf{{Sets: }} i\in P,\ j\in V. \ \textbf{{Vars: }} q_i,q_j\ge0;\ y_i\in\{{0,1\}}, z_j\in\{{0,1\}}.\\
# \max \sum_j f_j q_j \\
# \text{{s.t. }} \sum_i y_i = 1,\ \sum_j z_j \ge K \\
# 0\le q_i \le M_i y_i,\ \forall i;\ \ 0\le q_j \le M_j z_j,\ \forall j \\
# \text{{Budget: }} \sum_i c_i q_i + \sum_j c_j q_j \le B.
# """)




    prefix = (
    f"{few_shot_examples}\n\n"
    """ 
    Use the following triggers to identify problem structures and apply the corresponding mathematical formulations.
    (1) INTEGER trigger:
    Question: A factory can run two machine types. How many of each machine should be installed given budget and space limits? Maximize output.
    Final Answer: 
    $\\max\\; p_1 x_1 + p_2 x_2$
    $\\text{{s.t. }} a_1 x_1 + a_2 x_2 \\le B,\\; s_1 x_1 + s_2 x_2 \\le S$
    $x_1, x_2 \\in \\mathbb{{Z}}_+.$
    (2) MULTI-PERIOD FLOW trigger:
    Question: Multi-period production with inventory and backorders. Costs for production/holding/backorder. Initial and terminal conditions given.
    Final Answer:
    \textbf{{Indices: }} t\in T=\{{1,\dots,n\}}. \\
    \textbf{{Given: }} d_t,\ I_0,\ B_0,\ \dots \\
    \textbf{{Vars: }} x_t\ge0,\ I_t\ge0,\ B_t\ge0. \\
    \min \sum_t (c x_t + h I_t + p B_t) \\
    \text{{s.t. }} I_t - B_t = I_{{t-1}} - B_{{t-1}} + x_t - d_t,\ \forall t \\
    I_n \ge I^{{\min}},\ B_n=0.
    (3) LOGIC+BINARY trigger:
    Question: Choose exactly one option from set P and at least K items from set V, with quantities and budget.
    Final Answer:
    \textbf{{Sets: }} i\in P,\ j\in V. \ \textbf{{Vars: }} q_i,q_j\ge0;\ y_i\in\{{0,1\}}, z_j\in\{{0,1\}}.\\
    \max \sum_j f_j q_j \\
    \text{{s.t. }} \sum_i y_i = 1,\ \sum_j z_j \ge K \\
    0\le q_i \le M_i y_i,\ \forall i;\ \ 0\le q_j \le M_j z_j,\ \forall j \\
    \text{{Budget: }} \sum_i c_i q_i + \sum_j c_j q_j \le B.
    """
    "USER QUESTION:\n{input}\n\n"
    "TASK:\n"
    "- Produce a complete LaTeX optimization model using ONLY information in the question.\n"
    "- Use a fixed structure INSIDE LaTeX: Indices/Sets; Given Parameters (convert all tables to arrays); Decision Variables (with domains); Objective; Constraints; Domain lines.\n"
    "- For multi-period problems, you MUST include state-balance recurrences and initial/terminal conditions explicitly.\n"
    "- Avoid nonlinear forms when possible: rewrite ratios/logic using linear constraints + binaries (big-M) with clearly defined M.\n\n"
    "You should decide VARIABLE–TYPE first!\n (If not mentioned or ambiguous, integer by default!)"
    "### FIRST RESPONSE FORMAT (exactly 3 lines) ###\n"
    "Thought: <brief>\n"
    "Action: ORLM_QA\n"
    "Action Input: {input}\n\n"
    "Begin."
)



    suffix = (
    "\n### AFTER OBSERVATION ###\n"
    "Respond with exactly two lines:\n"
    "Thought: <variable types: integer/binary/continuous>\n"
    "Final Answer: <ONLY LaTeX model. Must include: (i) indices/sets, (ii) parameter definitions (tables->arrays), (iii) explicit domain lines for EVERY variable, (iv) initial/terminal conditions if any. No prose.>\n"
    "Do NOT output anything else."
)


    agent = initialize_agent(
        tools=[qa_tool],
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
            "input_variables": ["input"]
        },
        verbose=True,
        handle_parsing_errors=True,  # Enable error handling
    )

    openai.api_request_timeout = 60  
    query = query.replace('{','{{').replace('}','}}')
    output = agent.run({"input": query})

    return output

## Code Generation

In [None]:
def get_code(output,selected_problem):
    llm_code = ChatOpenAI(
        temperature=0.0, model_name="gpt-4.1",top_p=1,n = 1, openai_api_key=user_api_key
    )

    prompt = f"""
    You are an expert in mathematical optimization and Python programming. Your task is to write Python code to solve the provided mathematical optimization model using the Gurobi library. The code should include the definition of the objective function, constraints, and decision variables. Please don't add additional explanations. Please don't include ```python and ```.Below is the provided mathematical optimization model:

    Mathematical Optimization Model:
    {output}
    """

    if selected_problem == "Network Revenue Management" or selected_problem == "NRM" or selected_problem == "Network Revenue Management Problem":

        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \max \quad \sum_i A_i \cdot x_i$
Constraints
1. Inventory Constraints:
$\quad \quad x_i \leq I_i, \quad \forall i$
2. Demand Constraints:
$x_i \leq d_i, \quad \forall i$
3. Startup Constraint:
$\sum_i x_i \geq s$
Retrieved Information
$\small I = [7550, 6244]$
$\small A = [149, 389]$
$\small d = [15057, 12474]$
$\small s = 100$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Product_Optimization")

# Decision variables for the number of units of each product
x_1 = m.addVar(vtype=GRB.INTEGER, name="x_1") # Number of units of product 1
x_2 = m.addVar(vtype=GRB.INTEGER, name="x_2") # Number of units of product 2

# Objective function: Maximize 149 x_1 + 389 x_2
m.setObjective(149 * x_1 + 389 * x_2, GRB.MAXIMIZE)

# Constraints
m.addConstr(x_1 <= 7550, name="inventory_constraint_1")
m.addConstr(x_2 <= 6244, name="inventory_constraint_2")
m.addConstr(x_1 <= 15057, name="demand_constraint_1")
m.addConstr(x_2 <= 12474, name="demand_constraint_2")

# Non-negativity constraints are implicitly handled by the integer constraints (x_1, x_2 >= 0)

# Solve the model
m.optimize()

        """

    elif selected_problem == "Facility Location Problem" or selected_problem == "FLP" or selected_problem == "Facility Location":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_{i} \sum_{j} A_{ij} \cdot x_{ij} + \sum_{i} c_i \cdot y_i$

Constraints
1. Demand Constraint:
$\quad \quad \sum_i x_{ij} = d_j, \quad \forall j$
2. Capacity Constraint:
$\quad \quad \sum_j x_{ij} \leq M \cdot y_i, \quad \forall i$
3. Non-negativity:
$\quad \quad x_{ij} \geq 0, \quad \forall i,j$
4. Binary Requirement:
$\quad \quad y_i \in \{0,1\}, \quad \forall i$

Retrieved Information
$\small d = [1083, 776, 16214, 553, 17106, 594, 732]$
$\small c = [102.33, 94.92, 91.83, 98.71, 95.73, 99.96, 98.16]$
$\small A = \begin{bmatrix}
1506.22 & 70.90 & 8.44 & 260.27 & 197.47 & 71.71 & 61.19 \\  
1732.65 & 1780.72 & 567.44 & 448.68 & 29.00 & 1484.91 & 963.92 \\  
115.66 & 100.76 & 64.68 & 1324.53 & 64.99 & 134.88 & 2102.83 \\  
1254.78 & 1115.63 & 52.31 & 1036.16 & 892.63 & 1464.04 & 1383.41 \\  
42.90 & 891.01 & 1013.94 & 1128.72 & 58.91 & 42.89 & 1570.31 \\  
0.70 & 139.46 & 70.03 & 79.15 & 1482.00 & 0.91 & 110.46 \\  
1732.30 & 1780.44 & 486.50 & 523.74 & 522.08 & 82.48 & 826.41
\end{bmatrix}$
$\small M = \sum_j d_j = 1083 + 776 + 16214 + 553 + 17106 + 594 + 732 = 38058 $

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
d = np.array([1083, 776, 16214, 553, 17106, 594, 732])
c = np.array([102.33, 94.92, 91.83, 98.71, 95.73, 99.96, 98.16])
A = np.array([[1506.22, 70.90, 8.44, 260.27, 197.47, 71.71, 61.19],  
[1732.65, 1780.72, 567.44, 448.68, 29.00, 1484.91, 963.92],  
[115.66, 100.76, 64.68, 1324.53, 64.99, 134.88, 2102.83],  
[1254.78, 1115.63, 52.31, 1036.16, 892.63, 1464.04, 1383.41],  
[42.90, 891.01, 1013.94, 1128.72, 58.91, 42.89, 1570.31],  
[0.70, 139.46, 70.03, 79.15, 1482.00, 0.91, 110.46],  
[1732.30, 1780.44, 486.50, 523.74, 522.08, 82.48, 826.41]])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(A.shape[0], A.shape[1], lb=0, name="x")
y = m.addVars(A.shape[0], vtype=GRB.BINARY, name="y")

# Objective function
m.setObjective(gp.quicksum(A[i, j]*x[i, j] for i in range(A.shape[0]) for j in range(A.shape[1])) + gp.quicksum(c[i]*y[i] for i in range(A.shape[0])), GRB.MINIMIZE)

# Constraints
for j in range(A.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(A.shape[0])) == d[j], name=f"demand_constraint_{j}")

M = 1000000  # large number
for i in range(A.shape[0]):
    m.addConstr(-M*y[i] + gp.quicksum(x[i, j] for j in range(A.shape[1])) <= 0, name=f"M_constraint_{i}")

# Solve the model
m.optimize()
        """

    elif selected_problem == "Assignment Problem" or selected_problem == "AP" or selected_problem == "Assignment":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_{i=1}^3 \sum_{j=1}^3 c_{ij} \cdot x_{ij}$

Constraints
1. Row Assignment Constraint:
$\quad \quad \sum_{j=1}^3 x_{ij} = 1, \quad \forall i \in \{1,2,3\}$
2. Column Assignment Constraint:
$\quad \quad \sum_{i=1}^3 x_{ij} = 1, \quad \forall j \in \{1,2,3\}$
3. Binary Constraint:
$\quad \quad x_{ij} \in \{0,1\}, \quad \forall i,j$

Retrieved Information
$\small c = \begin{bmatrix}
3000 & 3200 & 3100 \\
2800 & 3300 & 2900 \\
2900 & 3100 & 3000 
\end{bmatrix}$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
c = np.array([
    [3000, 3200, 3100],
    [2800, 3300, 2900],
    [2900, 3100, 3000]
])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(c.shape[0], c.shape[1], vtype=GRB.BINARY, name="x")

# Objective function
m.setObjective(gp.quicksum(c[i, j]*x[i, j] for i in range(c.shape[0]) for j in range(c.shape[1])), GRB.MINIMIZE)

# Constraints
for i in range(c.shape[0]):
    m.addConstr(gp.quicksum(x[i, j] for j in range(c.shape[1])) == 1, name=f"row_constraint_{i}")

for j in range(c.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(c.shape[0])) == 1, name=f"col_constraint_{j}")

# Solve the model
m.optimize()
"""

    
    elif selected_problem == "Transportation Problem" or selected_problem == "TP" or selected_problem == "Transportation":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_i \sum_j c_{ij} \cdot x_{ij}$

Constraints
1. Demand Constraint:
$\quad \quad \sum_i x_{ij} \geq d_j, \quad \forall j$
2. Capacity Constraint:
$\quad \quad \sum_j x_{ij} \leq s_i, \quad \forall i$

Retrieved Information
$\small d = [94, 39, 65, 435]$
$\small s = [2531, 20, 210, 241]$
$\small c = \begin{bmatrix}
883.91 & 0.04 & 0.03 & 44.45 \\
543.75 & 23.68 & 23.67 & 447.75 \\
537.34 & 23.76 & 498.95 & 440.60 \\
1791.49 & 68.21 & 1432.48 & 1527.76
\end{bmatrix}$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization")

# Decision variables
x_S1_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C1")
x_S1_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C2")
x_S1_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C3")
x_S1_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C4")
x_S2_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C1")
x_S2_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C2")
x_S2_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C3")
x_S2_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C4")
x_S3_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C1")
x_S3_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C2")
x_S3_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C3")
x_S3_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C4")
x_S4_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C1")
x_S4_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C2")
x_S4_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C3")
x_S4_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C4")

# Objective function
m.setObjective(883.91 * x_S2_C1 + 0.04 * x_S2_C2 + 0.03 * x_S2_C3 + 44.45 * x_S2_C4 + 543.75 * x_S1_C1 + 23.68 * x_S1_C2 + 23.67 * x_S1_C3 + 447.75 * x_S1_C4 + 537.34 * x_S3_C1 + 23.76 * x_S3_C2 + 498.95 * x_S3_C3 + 440.60 * x_S3_C4 + 1791.49 * x_S4_C1 + 68.21 * x_S4_C2 + 1432.48 * x_S4_C3 + 1527.76 * x_S4_C4, GRB.MINIMIZE)

# Constraints
m.addConstr(x_S1_C1 + x_S2_C1 + x_S3_C1 + x_S4_C1 >= 94, name="demand_constraint1")
m.addConstr(x_S1_C2 + x_S2_C2 + x_S3_C2 + x_S4_C2 >= 39, name="demand_constraint2")
m.addConstr(x_S1_C3 + x_S2_C3 + x_S3_C3 + x_S4_C3 >= 65, name="demand_constraint3")
m.addConstr(x_S1_C4 + x_S2_C4 + x_S3_C4 + x_S4_C4 >= 435, name="demand_constraint4")
m.addConstr(x_S1_C1 + x_S1_C2 + x_S1_C3 + x_S1_C4 <= 2531, name="capacity_constraint1")
m.addConstr(x_S2_C1 + x_S2_C2 + x_S2_C3 + x_S2_C4 <= 20, name="capacity_constraint2")
m.addConstr(x_S3_C1 + x_S3_C2 + x_S3_C3 + x_S3_C4 <= 210, name="capacity_constraint3")
m.addConstr(x_S4_C1 + x_S4_C2 + x_S4_C3 + x_S4_C4 <= 241, name="capacity_constraint4")

# Solve the model
m.optimize()
        """
    
    elif selected_problem == "Resource Allocation" or selected_problem == "RA" or selected_problem == "Resource Allocation Problem":
        prompt += """
For example, here is a simple instance for reference:

Always remember: If not specified. All the variables are non-negative interger.

Mathematical Optimization Model:

Objective Function:
$\quad \quad \max \quad \sum_i \sum_j p_i \cdot x_{ij}$

Constraints
1. Capacity Constraint:
$\quad \quad \sum_i a_i \cdot x_{ij} \leq c_j, \quad \forall j$
2. Non-negativity Constraint:
$\quad \quad x_{ij} \geq 0, \quad \forall i,j$

Retrieved Information
$\small p = [321, 309, 767, 300, 763, 318, 871, 522, 300, 275, 858, 593, 126, 460, 685, 443, 700, 522, 940, 598]$
$\small a = [495, 123, 165, 483, 472, 258, 425, 368, 105, 305, 482, 387, 469, 341, 318, 104, 377, 213, 56, 131]$
$\small c = [4466]$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(20, vtype=GRB.INTEGER, name="x")

# Objective function
m.setObjective(sum(x[i]*c[i] for i in range(20)), GRB.MAXIMIZE)

# Constraints
m.addConstr(sum(x[i]*w[i] for i in range(20)) <= 4466, name="capacity_constraint")

# Coefficients for the objective function
c = [321, 309, 767, 300, 763, 318, 871, 522, 300, 275, 858, 593, 126, 460, 685, 443, 700, 522, 940, 598]

# Coefficients for the capacity constraint
w = [495, 123, 165, 483, 472, 258, 425, 368, 105, 305, 482, 387, 469, 341, 318, 104, 377, 213, 56, 131]

# Solve the model
m.optimize()
```

-----
Here is another simple instance for reference:

Objective Function:
$\quad \quad \max \quad \sum_i p_i \cdot x_i$

Constraints
1. Capacity Constraint:
$\quad \quad \sum_i a_i \cdot x_i \leq 180$
2. Dependency Constraint:
$\quad \quad x_1 \leq x_3$
3. Non-negativity Constraint:
$\quad \quad x_i \geq 0, \quad \forall i$

Retrieved Information
$\small p = [888, 134, 129, 370, 921, 765, 154, 837, 584, 365]$
$\small a = [4, 2, 4, 3, 2, 1, 2, 1, 3, 3]$

The corresponding Python code for this instance is as follows:

import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(10, vtype=GRB.INTEGER, name="x")

# Objective function
p = [888, 134, 129, 370, 921, 765, 154, 837, 584, 365]
m.setObjective(sum(x[i]*p[i] for i in range(10)), GRB.MAXIMIZE)

# Constraints
a = [4, 2, 4, 3, 2, 1, 2, 1, 3, 3]
m.addConstr(sum(x[i]*a[i] for i in range(10)) <= 180, name="capacity_constraint")
m.addConstr(x[0] <= x[2], name="dependency_constraint")

# Solve the model
m.optimize()
        
        """
    else:
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:
Maximize 5x_S + 8x_F
Subject to
    2x_S + 5x_F <= 200
    x_S <= 0.3(x_S + x_F)
    x_F >= 10
    x_S, x_F _ Z+

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Worker_Optimization")

# Decision variables for the number of seasonal (x_S) and full-time (x_F) workers
x_S = m.addVar(vtype=GRB.INTEGER, lb=0, name="x_S")  # Number of seasonal workers
x_F = m.addVar(vtype=GRB.INTEGER, lb=0, name="x_F")  # Number of full-time workers

# Objective function: Maximize Z = 5x_S + 8x_F
m.setObjective(5 * x_S + 8 * x_F, GRB.MAXIMIZE)

# Constraints
m.addConstr(2 * x_S + 5 * x_F <= 200, name="resource_constraint")
m.addConstr(x_S <= 0.3 * (x_S + x_F), name="seasonal_ratio_constraint")
m.addConstr(x_F >= 10, name="full_time_minimum_constraint")

# Non-negativity constraints are implicitly handled by the integer constraints (x_S, x_F >= 0)

# Solve the model
m.optimize()
```
The another example is:

Mathematical Optimization Model:
Minimize 919x_11 + 556x_12 + 951x_13 + 21x_21 + 640x_22 + 409x_23 + 59x_31 + 786x_32 + 304x_33
Subject to
    x_11 + x_12 + x_13 = 1
    x_21 + x_22 + x_23 = 1
    x_31 + x_32 + x_33 = 1
    x_11 + x_21 + x_31 = 1
    x_12 + x_22 + x_32 = 1
    x_13 + x_23 + x_33 = 1
    x_11, x_12, x_13, x_21, x_22, x_23, x_31, x_32, x_33 ∈ {{0,1}}

The corresponding Python code for this instance is as follows:

```python

import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
c = np.array([
    [919, 556, 951],
    [21, 640, 409],
    [59, 786, 304]
])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(c.shape[0], c.shape[1], vtype=GRB.BINARY, name="x")

# Objective function
m.setObjective(gp.quicksum(c[i, j]*x[i, j] for i in range(c.shape[0]) for j in range(c.shape[1])), GRB.MINIMIZE)

# Constraints
for i in range(c.shape[0]):
    m.addConstr(gp.quicksum(x[i, j] for j in range(c.shape[1])) == 1, name=f"row_constraint_{i}")

for j in range(c.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(c.shape[0])) == 1, name=f"col_constraint_{j}")

# Solve the model
m.optimize() 
```
"""
        prompt += """

-----  Here is a Capacitated Facility Location (MIP) instance for reference:----- 

Mathematical Optimization Model:
\[
\begin{{aligned}}
\min\;& 10000\,y_1 + 15010\,y_2 + 12000\,y_3 \\
& + 5x_{{11}} + 7x_{{12}} + 3x_{{13}} + 4x_{{14}} \\
& + 6x_{{21}} + 4x_{{22}} + 5x_{{23}} + 3x_{{24}} \\
& + 2x_{{31}} + 6x_{{32}} + 7x_{{33}} + 4x_{{34}} \\
\text{s.t.}\;& \sum_{i=1}^{3} x_{ij} \ge d_j && \forall j=1,\dots,4 \\
& \sum_{j=1}^{4} x_{ij} \le s_i\,y_i && \forall i=1,\dots,3 \\
& d = \{100, 150, 200, 120\} \\
& s = \{300, 400, 250\} \\
& y_i \in \{0,1\} && \forall i \\
& x_{ij} \ge 0 && \forall i,j
\end{{aligned}}
\]

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Data
facilities = [1, 2, 3]
customers = [1, 2, 3, 4]

# Fixed costs
fixed_costs = {1: 10000, 2: 15010, 3: 12000}

# Capacities
capacities = {1: 300, 2: 400, 3: 250}

# Demands
demands = {1: 100, 2: 150, 3: 200, 4: 120}

# Variable costs
var_costs = {
    (1, 1): 5, (1, 2): 7, (1, 3): 3, (1, 4): 4,
    (2, 1): 6, (2, 2): 4, (2, 3): 5, (2, 4): 3,
    (3, 1): 2, (3, 2): 6, (3, 3): 7, (3, 4): 4,
}
arcs = var_costs.keys()

m = gp.Model("CFLP_Example")

# Variables
y = m.addVars(facilities, vtype=GRB.BINARY, name="y")
x = m.addVars(arcs, name="x") # default lb=0

# Objective
obj_fixed = y.prod(fixed_costs)
obj_variable = x.prod(var_costs)
m.setObjective(obj_fixed + obj_variable, GRB.MINIMIZE)

# Demand Constraints
for j in customers:
    m.addConstr(x.sum('*', j) >= demands[j], name=f"demand_{j}")

# Capacity Constraints
for i in facilities:
    m.addConstr(x.sum(i, '*') <= capacities[i] * y[i], name=f"capacity_{i}")
    
m.optimize()
```

----- Here is a Traveling Salesperson Problem (TSP) instance for reference: -----

Mathematical Optimization Model:
\[
\begin{aligned}
\min \quad & 15x_{12}+25x_{13}+35x_{14}+18x_{21}+30x_{23}+40x_{24} \\
& + 28x_{31}+20x_{32}+38x_{34}+45x_{41}+50x_{42}+55x_{43} \\
\text{s.t.}\quad & \sum_{j\neq i} x_{ij}=1 && \forall i=1,\dots,4 \\
& \sum_{i\neq j} x_{ij}=1 && \forall j=1,\dots,4 \\
& u_i - u_j + 4x_{ij} \le 3 && \forall i,j \in \{1,\dots,4\}, i \neq j \\
& 1 \le u_i \le 4 && \forall i=1,\dots,4 \\
& x_{ij} \in \{0,1\} && \forall i \neq j \\
& u_i \in \mathbb{Z} && \forall i
\end{aligned}
\]

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

n = 4
nodes = range(1, n + 1)

costs = {
    (1, 2): 15, (1, 3): 25, (1, 4): 35,
    (2, 1): 18, (2, 3): 30, (2, 4): 40,
    (3, 1): 28, (3, 2): 20, (3, 4): 38,
    (4, 1): 45, (4, 2): 50, (4, 3): 55,
}
arcs = costs.keys()

m = gp.Model("TSP_Example")

# Variables
x = m.addVars(arcs, vtype=GRB.BINARY, name="x")
u = m.addVars(nodes, vtype=GRB.INTEGER, lb=1, ub=n, name="u")

# Objective
m.setObjective(x.prod(costs), GRB.MINIMIZE)

# Constraints
for i in nodes:
    m.addConstr(x.sum(i, '*') == 1, name=f"leave_{i}")

for j in nodes:
    m.addConstr(x.sum('*', j) == 1, name=f"enter_{j}")

# MTZ Subtour Elimination
for i, j in arcs:
    m.addConstr(u[i] - u[j] + n * x[i, j] <= n - 1, name=f"MTZ_{i}_{j}")

m.optimize()
```
----- Here is a Maximum Flow Problem (LP) instance for reference: -----

Mathematical Optimization Model:
\[
\begin{aligned}
\max\;& F \\
\text{s.t. } 
& f_{01}+f_{02} = F && \text{(Source 0)} \\
& f_{12}+f_{13}+f_{14} = f_{01} && \text{(Node 1)} \\
& f_{23}+f_{24} = f_{02} + f_{12} && \text{(Node 2)} \\
& f_{34} = f_{13} + f_{23} && \text{(Node 3)} \\
& f_{14}+f_{24}+f_{34} = F && \text{(Sink 4)} \\
& f_{01} \le 25, f_{02} \le 35 \\
& f_{12} \le 12, f_{13} \le 18, f_{14} \le 8 \\
& f_{23} \le 12, f_{24} \le 22 \\
& f_{34} \le 30 \\
& f_{ij} \ge 0, F \ge 0
\end{aligned}
\]

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

capacities = {
    (0, 1): 25, (0, 2): 35,
    (1, 2): 12, (1, 3): 18, (1, 4): 8,
    (2, 3): 12, (2, 4): 22,
    (3, 4): 30
}
arcs = capacities.keys()
nodes = [0, 1, 2, 3, 4]
source = 0
sink = 4

m = gp.Model("MaxFlow_Example")

# Variables
f = m.addVars(arcs, name="f") # default lb=0
F = m.addVar(name="F", lb=0)

# Objective
m.setObjective(F, GRB.MAXIMIZE)

# Capacity Constraints
m.addConstrs((f[i, j] <= capacities[i, j] for i, j in arcs), name="cap")

# Balance Constraints
# Source
m.addConstr(f.sum(source, '*') - f.sum('*', source) == F, name="source_balance")

# Sink
m.addConstr(f.sum('*', sink) - f.sum(sink, '*') == F, name="sink_balance")

# Transshipment nodes
for i in [1, 2, 3]:
    m.addConstr(f.sum('*', i) - f.sum(i, '*') == 0, name=f"balance_{i}")

m.optimize()
```
"""

    messages = [
        HumanMessage(content=prompt) 
    ]

    response = llm_code.invoke(messages)

    print(response.content)

    return response.content



## Run!

In [None]:
def run_test(test, agent):
    output_model = []
    output_code = []
    classification = []
    for index, row in test.iterrows():
        try:
            query = row['Query']
            response = agent.invoke(f"What is the problem type of the text? text:{query}")
            
            def extract_problem_type(output_text):
                pattern = r'(Network Revenue Management|Network Revenue Management Problem|Resource Allocation|Resource Allocation Problem|Transportation|Transportation Problem|Facility Location Problem|Assignment Problem|AP|Uncapacited Facility Location Problem|NRM|RA|TP|FLP|UFLP|Others without CSV|Sales-Based Linear Programming|SBLP|Others with CSV)'
                match = re.search(pattern, output_text, re.IGNORECASE)
                return match.group(0) if match else None
            
            def csv_detect(row):
                return 1 if 'Dataset_address' in row.index else 0
    
            selected_problem = extract_problem_type(response['output'])
            classification.append(selected_problem)
            
            if csv_detect(row):
                dataset_address = row['Dataset_address']
                if selected_problem == "Network Revenue Management" or selected_problem == "NRM" or selected_problem == "Network Revenue Management Problem":
                    print("----------Network Revenue Management-----------")
                    output = get_NRM_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
    
                elif selected_problem == "Resource Allocation" or selected_problem == "RA" or selected_problem == "Resource Allocation Problem":
                    print("----------Resource Allocation-----------")
                    output = get_RA_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
    
                elif selected_problem == "Transportation" or selected_problem == "TP" or selected_problem == "Transportation Problem":
                    print("----------Transportation-----------")
                    output = get_TP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)    
    
                elif selected_problem == "Facility Location Problem" or selected_problem == "FLP" or selected_problem == "Uncapacited Facility Location" or selected_problem == "UFLP":
                    print("----------Facility Location Problem-----------")
                    output = get_FLP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
                
                elif selected_problem == "Assignment Problem" or selected_problem == "AP":
                    print("----------Assignment Problem-----------")
                    output = get_AP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
                else:
                    print("----------Others with CSV-----------")
                    output = get_Others_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    code_response = "" 

                    if output.strip().startswith("```python"):
                        print("[INFO] Output is already Gurobi code. Skipping get_code.")
                        code_response = output.strip().replace("```python", "").replace("```", "")
                        output_code.append(code_response)
                    
                    else:
                        print("[INFO] Output is a Math Model. Calling get_code to convert.")
                        selected_problem = 'Others with CSV'
                        code_response = get_code(output, selected_problem) 
                        output_code.append(code_response)

            else:
                print("----------Others without CSV-----------")
                output = get_others_without_CSV_response(query)
                output_model.append(output)
                code_response = get_code(output,selected_problem)
                output_code.append(code_response)
        except requests.exceptions.RequestException as e:
            print(f"Connection error: {e}")
            continue
        time.sleep(15)
    return output_model, output_code,classification

# Testing

## Preparation:

In [None]:
def read_and_combine_csvs(file_order):
    dfs = []
    for fname in file_order:
        if os.path.exists(fname):
            df = pd.read_csv(fname)
            dfs.append(df)
            print(f"Read file: {fname} (Row length: {len(df)})")
        else:
            print(f"File doesn't exist: {fname}, already skipped")
    
    if not dfs:
        raise ValueError("No effective files")
    
    return pd.concat(dfs, ignore_index=True)

def run_gurobi_code(code_str):
 
    try:
      
        with StringIO() as buf, contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
            env = {
                '__builtins__': __builtins__,
                'gp': gp,
                'GRB': GRB
            }
            
           
            code_str += "\n\n# Added by executor\n"
            code_str += "if hasattr(m, 'status') and m.status == GRB.OPTIMAL:\n"
            code_str += "    __result__ = m.ObjVal\n"
            code_str += "else:\n"
            code_str += "    __result__ = None\n"
            
            
            exec(code_str, env)
            result = env.get('__result__', None)
            
     
            if 'm' in env:
                env['m'].dispose()
                del env['m']
            
            return result
    except Exception as e:
        print(f"Execution error: {str(e)}")
        return None


### Test Large Scale OR

In [None]:
test = pd.read_csv('Test_Dataset/Large-scale-or/Large-scale-or-101.csv')
test_1=test[:30]
test_2=test[30:60]
test_3=test[60:80]
test_4=test[80:]

In [None]:
output_model, output_code,classification = run_test(test_1,classification_agent)
output_df = pd.DataFrame({'Query': test_1['Query'], 'model_output':output_model, 'code_output':output_code,'classification':classification})
output_df.to_csv("Large-scale-or-Lean-4.1_1.csv", index=False)

In [None]:
output_model, output_code,classification = run_test(test_2,classification_agent)
output_df = pd.DataFrame({'Query': test_2['Query'], 'model_output':output_model, 'code_output':output_code,'classification':classification})
output_df.to_csv("Large-scale-or-Lean-4.1_2.csv", index=False)

In [None]:
output_model, output_code,classification = run_test(test_3,classification_agent)
output_df = pd.DataFrame({'Query': test_3['Query'], 'model_output':output_model, 'code_output':output_code,'classification':classification})
output_df.to_csv("Large-scale-or-Lean-4.1_3.csv", index=False)

In [None]:
output_model, output_code,classification = run_test(test_4,classification_agent)
output_df = pd.DataFrame({'Query': test_4['Query'], 'model_output':output_model, 'code_output':output_code,'classification':classification})
output_df.to_csv("Large-scale-or-Lean-4.1_4.csv", index=False)

In [None]:
file_order=[
    "Large-scale-or-Lean-4.1_1.csv",
    "Large-scale-or-Lean-4.1_2.csv",
    "Large-scale-or-Lean-4.1_3.csv",
    "Large-scale-or-Lean-4.1_4.csv"
]
try:
    combined_df = read_and_combine_csvs(file_order)
    combined_df.to_csv('Large-scale-or-Lean-4.1_final.csv')
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)

### Test NL4OPT

In [None]:
test_nl4opt = pd.read_excel('Test_Dataset/Small-scale/NL4OPT NEW.xlsx')
test_nl4opt1=test_nl4opt[:30]
test_nl4opt2=test_nl4opt[30:60]
test_nl4opt3=test_nl4opt[60:90]
test_nl4opt4=test_nl4opt[90:120]
test_nl4opt5=test_nl4opt[120:150]
test_nl4opt6=test_nl4opt[150:180]
test_nl4opt7=test_nl4opt[180:210]
test_nl4opt8=test_nl4opt[210:]
test_nl4opt1

In [None]:
output_model_nl4opt1, output_code_nl4opt1,classification = run_test(test_nl4opt1,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt1['Query'], 'model_output':output_model_nl4opt1, 'code_output':output_code_nl4opt1})
output_df.to_csv("NL4OPT_1-30.csv", index=False)

In [None]:
output_model_nl4opt2, output_code_nl4opt2,classification = run_test(test_nl4opt2,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt2['Query'], 'model_output':output_model_nl4opt2, 'code_output':output_code_nl4opt2})
output_df.to_csv("NL4OPT_31-60.csv", index=False)

In [None]:
output_model_nl4opt3, output_code_nl4opt3,classification = run_test(test_nl4opt3,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt3['Query'], 'model_output':output_model_nl4opt3, 'code_output':output_code_nl4opt3})
output_df.to_csv("NL4OPT_61-90.csv", index=False)

In [None]:
output_model_nl4opt4, output_code_nl4opt4,classification = run_test(test_nl4opt4,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt4['Query'], 'model_output':output_model_nl4opt4, 'code_output':output_code_nl4opt4})
output_df.to_csv("NL4OPT_91-120.csv", index=False)

In [None]:
output_model_nl4opt5, output_code_nl4opt5,classification = run_test(test_nl4opt5,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt5['Query'], 'model_output':output_model_nl4opt5, 'code_output':output_code_nl4opt5})
output_df.to_csv("NL4OPT_121-150.csv", index=False)

In [None]:
output_model_nl4opt6, output_code_nl4opt6,classification = run_test(test_nl4opt6,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt6['Query'], 'model_output':output_model_nl4opt6, 'code_output':output_code_nl4opt6})
output_df.to_csv("NL4OPT_151-180.csv", index=False)

In [None]:
output_model_nl4opt7, output_code_nl4opt7,classification = run_test(test_nl4opt7,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt7['Query'], 'model_output':output_model_nl4opt7, 'code_output':output_code_nl4opt7})
output_df.to_csv("NL4OPT_181-210.csv", index=False)

In [None]:
output_model_nl4opt8, output_code_nl4opt8,classification = run_test(test_nl4opt8,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt8['Query'], 'model_output':output_model_nl4opt8, 'code_output':output_code_nl4opt8})
output_df.to_csv("NL4OPT_211-245.csv", index=False)

In [None]:
file_order=[
    "NL4OPT_1-30.csv",
    "NL4OPT_31-60.csv",
    "NL4OPT_61-90.csv",
    "NL4OPT_91-120.csv",
    "NL4OPT_121-150.csv",
    "NL4OPT_151-180.csv",
    "NL4OPT_181-210.csv",
    "NL4OPT_211-245.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

# Add results column
combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

# Step 3: Save results
output_file = "NL4OPT4.1_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test IndustryOR

In [None]:
test_industryOR = pd.read_csv('Test_Dataset/Small-scale/IndustryOR_fixedV2.csv')
test_industryOR1=test_industryOR[:25]
test_industryOR2=test_industryOR[25:50]
test_industryOR3=test_industryOR[50:75]
test_industryOR4=test_industryOR[75:]
test_industryOR

In [None]:
output_model_industryOR1, output_code_industryOR1,classification = run_test(test_industryOR1,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR1['Query'], 'model_output':output_model_industryOR1, 'code_output':output_code_industryOR1})
output_df.to_csv("IndustryOR_1-25.csv", index=False)

In [None]:
output_model_industryOR2, output_code_industryOR2,classification = run_test(test_industryOR2,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR2['Query'], 'model_output':output_model_industryOR2, 'code_output':output_code_industryOR2})
output_df.to_csv("IndustryOR_26-50.csv", index=False)

In [None]:
output_model_industryOR3, output_code_industryOR3,classification = run_test(test_industryOR3,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR3['Query'], 'model_output':output_model_industryOR3, 'code_output':output_code_industryOR3})
output_df.to_csv("IndustryOR_51-75.csv", index=False)

In [None]:
output_model_industryOR4, output_code_industryOR4,classification = run_test(test_industryOR4,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR4['Query'], 'model_output':output_model_industryOR4, 'code_output':output_code_industryOR4})
output_df.to_csv("IndustryOR_76-100.csv", index=False)

In [None]:
file_order=[
    "IndustryOR_1-25.csv",
    "IndustryOR_26-50.csv",
    "IndustryOR_51-75.csv",
    "IndustryOR_76-100.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "IndustryOR_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test MAMOe

In [None]:
test_MAMOe = pd.read_csv('Test_Dataset/Small-scale/MAMO_EasyLP_fixed.csv')
test_MAMOe1=test_MAMOe[:100]
test_MAMOe2=test_MAMOe[100:200]
test_MAMOe3=test_MAMOe[200:300]
test_MAMOe4=test_MAMOe[300:400]
test_MAMOe5=test_MAMOe[400:500]
test_MAMOe6=test_MAMOe[500:600]
test_MAMOe7=test_MAMOe[600:]

In [None]:
output_model_MAMOe1, output_code_MAMOe1,classification = run_test(test_MAMOe1,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe1['Query'], 'model_output':output_model_MAMOe1, 'code_output':output_code_MAMOe1})
output_df.to_csv("4.1MAMOe1.csv", index=False)

In [None]:
output_model_MAMOe2, output_code_MAMOe2,classification = run_test(test_MAMOe2,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe2['Query'], 'model_output':output_model_MAMOe2, 'code_output':output_code_MAMOe2})
output_df.to_csv("4.1MAMOe2.csv", index=False)

In [None]:
output_model_MAMOe3, output_code_MAMOe3,classification = run_test(test_MAMOe3,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe3['Query'], 'model_output':output_model_MAMOe3, 'code_output':output_code_MAMOe3})
output_df.to_csv("4.1MAMOe3.csv", index=False)

In [None]:
output_model_MAMOe4, output_code_MAMOe4,classification = run_test(test_MAMOe4,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe4['Query'], 'model_output':output_model_MAMOe4, 'code_output':output_code_MAMOe4})
output_df.to_csv("4.1MAMOe4.csv", index=False)

In [None]:
output_model_MAMOe5, output_code_MAMOe5,classification = run_test(test_MAMOe5,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe5['Query'], 'model_output':output_model_MAMOe5, 'code_output':output_code_MAMOe5})
output_df.to_csv("4.1MAMOe5.csv", index=False)

In [None]:
output_model_MAMOe6, output_code_MAMOe6,classification = run_test(test_MAMOe6,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe6['Query'], 'model_output':output_model_MAMOe6, 'code_output':output_code_MAMOe6})
output_df.to_csv("4.1MAMOe6.csv", index=False)

In [None]:
output_model_MAMOe7, output_code_MAMOe7,classification = run_test(test_MAMOe7,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOe7['Query'], 'model_output':output_model_MAMOe7, 'code_output':output_code_MAMOe7})
output_df.to_csv("4.1MAMOe7.csv", index=False)

In [None]:
file_order=[
    "4.1MAMOe1.csv",
    "4.1MAMOe2.csv",
    "4.1MAMOe3.csv",
    "4.1MAMOe4.csv",
    "4.1MAMOe5.csv",
    "4.1MAMOe6.csv",
    "4.1MAMOe7.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "MAMOe4.1_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test MAMOc

In [None]:
test_MAMOc = pd.read_csv('Test_Dataset/Small-scale/MAMO_ComplexLP_fixed.csv')
test_MAMOc1=test_MAMOc[:25]
test_MAMOc2=test_MAMOc[25:50]
test_MAMOc3=test_MAMOc[50:75]
test_MAMOc4=test_MAMOc[75:100]
test_MAMOc5=test_MAMOc[100:125]
test_MAMOc6=test_MAMOc[125:150]
test_MAMOc7=test_MAMOc[150:175]
test_MAMOc8=test_MAMOc[175:]
test_MAMOc8

In [None]:
output_model_MAMOc1, output_code_MAMOc1,classification = run_test(test_MAMOc1,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc1['Query'], 'model_output':output_model_MAMOc1, 'code_output':output_code_MAMOc1})
output_df.to_csv("4.1MAMOc_1-25.csv", index=False)

In [None]:
output_model_MAMOc2, output_code_MAMOc2,classification = run_test(test_MAMOc2,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc2['Query'], 'model_output':output_model_MAMOc2, 'code_output':output_code_MAMOc2})
output_df.to_csv("4.1MAMOc_26-50.csv", index=False)

In [None]:
output_model_MAMOc3, output_code_MAMOc3,classification = run_test(test_MAMOc3,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc3['Query'], 'model_output':output_model_MAMOc3, 'code_output':output_code_MAMOc3})
output_df.to_csv("4.1MAMOc_51-75.csv", index=False)

In [None]:
output_model_MAMOc4, output_code_MAMOc4,classification = run_test(test_MAMOc4,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc4['Query'], 'model_output':output_model_MAMOc4, 'code_output':output_code_MAMOc4})
output_df.to_csv("4.1MAMOc_76-100.csv", index=False)

In [None]:
output_model_MAMOc5, output_code_MAMOc5,classification = run_test(test_MAMOc5,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc5['Query'], 'model_output':output_model_MAMOc5, 'code_output':output_code_MAMOc5})
output_df.to_csv("4.1MAMOc_101-125.csv", index=False)

In [None]:
output_model_MAMOc6, output_code_MAMOc6,classification = run_test(test_MAMOc6,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc6['Query'], 'model_output':output_model_MAMOc6, 'code_output':output_code_MAMOc6})
output_df.to_csv("4.1MAMOc_126-150.csv", index=False)

In [None]:
output_model_MAMOc7, output_code_MAMOc7,classification = run_test(test_MAMOc7,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc7['Query'], 'model_output':output_model_MAMOc7, 'code_output':output_code_MAMOc7})
output_df.to_csv("4.1MAMOc_151-175.csv", index=False)

In [None]:
output_model_MAMOc8, output_code_MAMOc8,classification = run_test(test_MAMOc8,classification_agent)
output_df = pd.DataFrame({'Query': test_MAMOc8['Query'], 'model_output':output_model_MAMOc8, 'code_output':output_code_MAMOc8})
output_df.to_csv("4.1MAMOc_176-202.csv", index=False)

In [None]:
file_order=[
    "4.1MAMOc_1-25.csv",
    "4.1MAMOc_26-50.csv",
    "4.1MAMOc_51-75.csv",
    "4.1MAMOc_76-100.csv",
    "4.1MAMOc_101-125.csv",
    "4.1MAMOc_126-150.csv",
    "4.1MAMOc_151-175.csv",
    "4.1MAMOc_176-202.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "MAMOc4.1_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")