# Environment Configuration

In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent, AgentType, Tool
import re
from datetime import time
from langchain.schema import HumanMessage
import openai
import requests
import time
import numpy as np
import os
import pandas as pd
import sys
import time
from io import StringIO
import contextlib
import gurobipy as gp
from gurobipy import GRB
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

user_api_key = "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"

In [2]:
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
langchain_tracing_v2 = os.getenv('LANGCHAIN_TRACING_V2', 'true')
langchain_endpoint = os.getenv('LANGCHAIN_ENDPOINT', 'https://api.smith.langchain.com')
os.environ['LANGCHAIN_TRACING_V2'] = langchain_tracing_v2
os.environ['LANGCHAIN_ENDPOINT'] = langchain_endpoint
os.environ['LANGCHAIN_API_KEY'] = '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'

# Main Model

## Classification

In [3]:

llm1 = ChatOpenAI(
    temperature=0.0, model_name="gpt-4", openai_api_key=user_api_key
)

loader = CSVLoader(file_path="Large_Scale_Or_Files/RefData_1.csv", encoding="utf-8")
data = loader.load()
documents = data
embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
vectors = FAISS.from_documents(documents, embeddings)

retriever = vectors.as_retriever(search_kwargs={'k': 5})
qa_chain = RetrievalQA.from_chain_type(
    llm=llm1,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)
qa_tool = Tool(
    name="FileQA",
    func=qa_chain.invoke,
    description=(
        "Use this tool to answer questions about the problem type of the text. "
    ),
)

few_shot_examples_csv = """

Query: What is the problem type in operation of the text? Please give the answer directly. Text:There are three best-selling items (P1, P2, P3) on Amazon with the profit w_1,w_2,w_3.There is an independent demand stream for each of the products. The objective of the company is to decide which demands to be fufilled over a ﬁnite sales horizon [0,10] to maximize the total expected revenue from ﬁxed initial inventories. The on-hand inventories for the three items are c_1,c_2,c_3 respectively. During the sales horizon, replenishment is not allowed and there is no any in-transit inventories. Customers who want to purchase P1,P2,P3 arrive at each period accoring to a Poisson process with a_1,a_2,a_3 the arrival rates respectively. Decision variables y_1,y_2,y_3 correspond to the number of requests that the firm plans to fulfill for product 1,2,3. These variables are all positive integers.

Thought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type in operation of the text? text:There are three best-selling items (P1, P2, P3) on Amazon with the profit w_1, w_2, w_3. ..."

Observation: The problem type of the text is Network Revenue Management.

Thought: The problem type Network Revenue Management is in the allowed list [Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, Assignment Problem, , Others without CSV]. I could get the final answer and finish.

Final Answer: Network Revenue Management.

---
Query: What is the problem type in operation of the text? Please give the answer directly. Text:A supermarket needs to allocate various products, including high-demand items like the Sony Alpha Refrigerator, Sony Bravia XR, and Sony PlayStation 5, across different retail shelves. The product values and space requirements are provided in the "Products.csv" dataset. Additionally, the store has multiple shelves, each with a total space limit and specific space constraints for Sony and Apple products, as outlined in the "Capacity.csv" file. The goal is to determine the optimal number of units of each Sony product to place on each shelf to maximize total value while ensuring that the space used by Sony products on each shelf does not exceed the brand-specific limits. The decision variables x_ij represent the number of units of product i to be placed on shelf j.

Thought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type in operation of the text? Text:A supermarket needs to allocate various products, including high-demand items like the Sony Alpha Refrigerator, Sony Bravia XR, ...."

Observation: The problem type of the text is Inventory Management.

Thought: The problem type Inventory Management is not in the allowed list [Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, Assignment Problem, Others with CSV, Others without CSV]. Therefore, the problem type should be Others with CSV - Inventory management
Final Answer: Others with CSV - Inventory management

"""

few_shot_examples_without_csv = """
Query: A book distributor needs to shuffle a bunch of books from two warehouses (supply points: W1, W2) to libraries (demand points: L1, L2), using a pair of sorting centers (transshipment points: C1, C2). W1 has a stash of up to p_1 books per day it can send out. W2 can send out up to p_2 books daily. Library L1 needs a solid d_1 books daily. L2 requires d_2 books daily. Storage at the sorting centers has no cap. Transportation costs: From W1 to C1 is t_11 dollars, to C2 is t_12 dollars. From W2 to C1 is t_21 dollars, and to C2 it__ t_22 dollars. From the centers to the libraries: From C1 to L1, it__l cost t_31 dollars, to L2 it__ t_32 dollars. From C2 to L1, it__ t_41 dollars, to L2 it__ t_42 dollars. The strategy here is all about minimizing transportation spend while making sure those libraries get their books on time. We__l use x_11 and x_12 to track shipments from W1 to C1 and C2, and x_21 and x_22 for shipments from W2. For the books going out to the libraries, y_11 and y_12 will handle the flow from C1 to L1 and L2, and y_21 and y_22 from C2. Variables are all positive integers.

Thought: I need to determine the problem type of the text. The Query doesn't contain any descriptions like '.csv' and 'column'. I'll direcrly classify the problem type as 'Others without CSV'.

Final Answer: Others without CSV

"""
prefix = f"""I am a helpful assistant that can answer Querys about operation problems. My response must align with one of the following categories: Network Revenue Management, Resource Allocation, Transportation, Facility Location Problem, SBLP, Others with CSV, and Others without CSV. Firstly you need to identify whether the text contains any descriptions like '.csv' and 'column'.

Always remember! If the input does not contain any description like '.csv' and 'column', and the values for all the variables are given directly, I will directly classify the problem type as 'Others without CSV'. Like the example {few_shot_examples_without_csv}. 

However, if the text contains descriptions like '.csv' or 'column', and the values for all the variables are not given directly, I will use the following examples {few_shot_examples_csv} as a guide. And answer the Query by given the answer directly.

"""

suffix = """

Begin!

Query: {input}
{agent_scratchpad}"""

classification_agent = initialize_agent(
    tools=[qa_tool],
    llm=llm1,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    agent_kwargs={
        "prefix": prefix,
        "suffix": suffix,
    },
    verbose=True,
    handle_parsing_errors=True,  
)
openai.api_request_timeout = 60  

  llm1 = ChatOpenAI(
  embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
  classification_agent = initialize_agent(


## Large Scale OR

### NRM

In [4]:


def retrieve_similar_docs(query,retriever):
    
    similar_docs = retriever.get_relevant_documents(query)

    results = []
    for doc in similar_docs:
        results.append({
            "content": doc.page_content,
            "metadata": doc.metadata
        })
    return results


def process_dataset_address(dataset_address: str) -> List[Document]:

    documents = []
    file_addresses = dataset_address.strip().split('\n')  
    for file_idx, file_address in enumerate(file_addresses, start=1):
        try:
            df = pd.read_csv(file_address.strip())  
            file_name = file_address.strip().split('/')[-1]  
            for row_idx, row in df.iterrows():
                page_content = ", ".join([f"{col} = {row[col]}" for col in df.columns])
                documents.append(Document(page_content=page_content))
                
        except Exception as e:
            print(f"Error processing file {file_address}: {e}")
            continue
    
    return documents

def get_NRM_response(query,dataset_address):
    retrieve='product'
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_NRM2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 1})
    few_shot_examples = []

    similar_results = retrieve_similar_docs(query,retriever)

    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()  
        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()
        information = pd.read_csv(data_address)
        information_head = information[:36]

        example_data_description = "\nHere is the product data:\n"
        for i, r in information_head.iterrows():
            example_data_description += f"Product {i + 1}: {r['Product Name']}, revenue w_{i + 1} = {r['Revenue']}, demand rate a_{i + 1} = {r['Demand']}, initial inventory c_{i + 1} = {r['Initial Inventory']}\n"


        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append(f"""

Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order, row by row. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, construct the objective function and constraints using the retrieved data as parameters of the formula. Ensure to include any additional detailed constraints present in the problem description. Always pay attention to the variable type. If not mentioned, use nonnegative integer. Do NOT include any explanations, notes, or extra text. Format the expressions strictly in markdown ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address)  
            file_name = file_address.split('/')[-1] 
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        for i, r in df.iterrows():
            description = ""
            description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
            data.append(description + "\n")
    document=data
   
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(document, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 1000})
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1, openai_api_key=user_api_key)

    system_prompt = (
        "Retrieve the documents in order, row by row. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']


### RA

In [5]:
def get_RA_response(query,dataset_address):

    retrieve="product"
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_RA2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 3})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']

        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()

        datas=data_address.split()
        information = []

        for data in datas:
            information.append(pd.read_csv(data))
        example_data_description = "\nHere is the data:\n"
        for df_index, df in enumerate(information):
            if df_index == 0:
                example_data_description += f"\nDataFrame {df_index + 1} - Capacity\n"
            elif df_index == 1:
                example_data_description += f"\nDataFrame {df_index + 1} - Products\n"

            for z, r in df.iterrows():
                description = ""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                example_data_description += description + "\n"
        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula according to the retrieved 'product id'. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1]  
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        if file_name=='products.csv' or file_name=='Products.csv':
            for i, r in df.iterrows():
                description = f"Product id: {i+1}; "
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                data.append(description + "\n")
        else:
            for i, r in df.iterrows():
                description = f""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                data.append(description + "\n")

    
    documents = [content for content in data]
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 220})
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1',top_p=1,n = 1, openai_api_key=user_api_key)


    system_prompt = (
        "Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']



### TP

In [6]:
def get_TP_response(query,dataset_address):
    retrieve="capacity data and products data, "
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_TP2_MD.csv", encoding="utf-8")
    data = loader.load()
    documents = data
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 3})
    few_shot_examples = []
    similar_results = retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip()  
        Related = split_at_label[1].strip()

        datas=data_address.split()
        information = []

        for data in datas:
            information.append(pd.read_csv(data))
        example_data_description = "\nHere is the data:\n"
        for df_index, df in enumerate(information):
            if df_index == 0:
                example_data_description += f"\nDataFrame {df_index + 1} - Customer Demand\n"
            elif df_index == 1:
                example_data_description += f"\nDataFrame {df_index + 1} - Supply Capacity\n"
            elif df_index == 2:
                example_data_description += f"\nDataFrame {df_index + 1} - Transportation Cost\n"

            for z, r in df.iterrows():
                description = ""
                description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
                example_data_description += description + "\n"
            retrieve += ', '.join(df.columns)+', '
        label = label.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")
    data = []
    dfs=[]

    file_addresses = dataset_address.strip().split('\n')
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    for df_index, (file_name, df) in enumerate(dfs):
        data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")

        for i, r in df.iterrows():
            description = ""
            description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
            data.append(description + "\n")

    print(data)

    documents = [content for content in data]
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 300})

    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1,openai_api_key=user_api_key)


    system_prompt = (
        "Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )

    result = agent2.invoke(query)

    return result['output']


### AP

In [7]:
def get_AP_response(query,dataset_address):
    retrieve=''
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_AP2_MD.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(max_tokens_limit=400,search_kwargs={'k': 1})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        file_addresses = data_address.strip().split('\n')
        dfs = []
        df_index = 0
        example_data_description = " "
        for file_address in file_addresses:
            try:
                df = pd.read_csv(file_address) 
                file_name = file_address.split('/')[-1]  
                matrix = df.iloc[:,1:].values
                example_data_description +="C=" + np.array_str(matrix)+ "."
                dfs.append((file_name, df))
            except Exception as e:
                print(f"Error reading file {file_address}: {e}")
        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 
        label = label.replace("{", "{{").replace("}", "}}")
        Related=''
        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula according to the retrieved 'product id'. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")
        
    data = []
    dfs=[]
    file_addresses = dataset_address.strip().split('\n')
    df_index = 0
    data_description = " "
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            matrix = df.iloc[:,1:].values
            data_description +="C=" + np.array_str(matrix)+ "."
            dfs.append((file_name, df))
            df_index += 1
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts([data_description], embeddings)

    retriever = vectors.as_retriever(max_tokens_limit=400, search_kwargs={'k': 1}) 
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1,openai_api_key=user_api_key)

    system_prompt = (
        "Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical model based on the user's description and provided CSV data.

            Please refer to the following example and generate the answer in the same format:

            {few_shot_examples}

            Note: Please retrieve all neccessary information from the CSV file to generate the answer. When you generate the answer, please output required parameters in a whole text, including all vectors and matrices.

            When you need to retrieve information from the CSV file, use the provided tool.

            """

    suffix = """

            Begin!

            User Description: {input}
            {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True
    )

    result = agent2.invoke(query)
    output = result['output']
    return output


### FLP

In [8]:
def get_FLP_response(query,dataset_address):
    retrieve='supplier'
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_FLP2_MD.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)
    retriever = vectors.as_retriever(max_tokens_limit=400,search_kwargs={'k': 1})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']
        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip() 

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        file_addresses = data_address.strip().split('\n')
        dfs = []
        df_index = 0
        example_data_description = " "
        for file_address in file_addresses:
            try:
                df = pd.read_csv(file_address) 
                file_name = file_address.split('/')[-1]  
                if 'demand' in df.columns:
                    result = df['demand'].values.tolist()
                    example_data_description += "d=" + str(result) + "\n"
                elif 'fixed_costs' in df.columns:
                    result = df['fixed_costs'].values.tolist()
                    example_data_description +="c=" + str(result) + "\n"
                elif df_index == 2:
                    matrix = df.iloc[:,1:].values
                    example_data_description +="A=" + np.array_str(matrix)+ "."
                else:
                    for row_idx, row in df.iterrows():
                        example_data_description += ", ".join([f"{col} = {row[col]}" for col in df.columns])
                df_index += 1
                dfs.append((file_name, df))
            except Exception as e:
                print(f"Error reading file {file_address}: {e}")
        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 
        label = label.replace("{", "{{").replace("}", "}}")
        Related=''

        few_shot_examples.append( f"""
Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. Pay attention: 1. If the data to be retrieved is not specified, retrieve the whole dataset instead. 2. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. 3. The final expressions should not be simplified or abbreviated.

Action: CSVQA

Action Input: Retrieve all the {retrieve} data {Related} to formulate the mathematical model with no simplification or abbreviation. Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved. Only present final answer in details of row, instead of giving a sheet format.

Observation: {example_data_description}

Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. Do NOT include any explanations, notes, or extra text. Respond ONLY in this exact format: {label}. Following this example. The expressions should not be simplified or abbreviated. Besides, I need to use the $$ or $ to wrap the mathematical expressions instead of \[, \], \( or \). I also should avoid using align, align* and other latex environments. Besides, I should also avoid using \begin, \end, \text.

Final Answer: 
{label}
""")

    data = []
    dfs=[]
    file_addresses = dataset_address.strip().split('\n')
    df_index = 0
    data_description = " "
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            if 'demand' in df.columns:
                result = df['demand'].values.tolist()
                data_description += "d=" + str(result) + "\n"
            elif 'fixed_costs' in df.columns:
                result = df['fixed_costs'].values.tolist()
                data_description +="c=" + str(result) + "\n"
            elif df_index == 2:
                matrix = df.iloc[:,1:].values
                data_description +="A=" + np.array_str(matrix)+ "."
            else:
                for row_idx, row in df.iterrows():
                    data_description += ", ".join([f"{col} = {row[col]}" for col in df.columns])
            df_index += 1
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts([data_description], embeddings)

    retriever = vectors.as_retriever(max_tokens_limit=400, search_kwargs={'k': 1}) 
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', top_p=1,n = 1, openai_api_key=user_api_key)
    
    system_prompt = (
        "Retrieve the documents in order. Use the given context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, make sure that all the data is retrieved."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    def qa_wrapper(query: str):
        return qa_chain.invoke({"input": query})['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical model based on the user's description and provided CSV data.

            Please refer to the following example and generate the answer in the same format:

            {few_shot_examples}

            Note: Please retrieve all neccessary information from the CSV file to generate the answer. When you generate the answer, please output required parameters in a whole text, including all vectors and matrices.

            When you need to retrieve information from the CSV file, use the provided tool.

            """

    suffix = """
            Begin!

            User Description: {input}
            {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True
    )

    result = agent2.invoke(query)
    output = result['output']
    return output


### Others With CSV

In [9]:
def get_Others_response(query, dataset_address):

    retrieve="all data"
    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_example_Others.csv", encoding="utf-8")
    data = loader.load()

    # Each line is a document
    documents = data

    # Create embeddings and vector store
    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)

    # Create a retriever
    retriever = vectors.as_retriever(search_kwargs={'k': 8})
    few_shot_examples = []
    similar_results =  retrieve_similar_docs(query,retriever)
    for i, result in enumerate(similar_results, 1):
        content = result['content']

        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("Query:", "").strip() 
        if problem_description != query:

            split_at_address = split_at_formulation[1].split("Label:", 1)
            data_address = split_at_address[0].strip()

            split_at_label = split_at_address[1].split("Related:", 1)
            print(split_at_label)
            label = split_at_label[0].strip()  
            Related = split_at_label[1].strip()

            datas=data_address.split()
            information = []

            for data in datas:
                information.append(pd.read_csv(data))
            example_data_description = "\nHere is the data:\n"
            for df_index, df in enumerate(information):
                for z, r in df.iterrows():
                    example_data_description += " ".join([f"{col} = {r[col]}" for col in df.columns])+ "\n"
            label = label.replace("{", "{{").replace("}", "}}")

            example = f"""
    Question: Based on the following problem description and data, please formulate a complete mathematical model using real data from retrieval. {problem_description}

    Thought: I need to formulate the objective function and constraints of the linear programming model based on the user's description and the provided data. I should retrieve the relevant information from the CSV file. If the data to be retrieved is not specified, retrieve the whole dataset instead. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula. The final expressions should not be simplified or abbreviated.

    Action: CSVQA

    Action Input: Retrieve all the {retrieve} data related to {Related} to formulate the mathematical model with no simplification or abbreviation.

    Observation: {example_data_description}

    Thought: Now that I have the necessary data, I would construct the objective function and constraints using the retrieved data as parameters of the formula. I should pay attention if there is further detailed constraint in the problem description. If so, I should generate additional constraint formula according to the retrieved 'product id'.  Respond ONLY in this exact format: {label}. Do NOT include any explanations, notes, or extra text. The expressions should not be simplified or abbreviated. 

    Final Answer: 
    {label}
    """
            example = example.replace("{", "{{").replace("}", "}}")
            few_shot_examples.append(example)

    # data = []
    # dfs=[]

    # file_addresses = dataset_address.strip().split('\n')
    # for file_address in file_addresses:
    #     try:
    #         df = pd.read_csv(file_address) 
    #         file_name = file_address.split('/')[-1]  
    #         dfs.append((file_name, df))
    #     except Exception as e:
    #         print(f"Error reading file {file_address}: {e}")

    # for df_index, (file_name, df) in enumerate(dfs):
    #     data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")
    #     for i, r in df.iterrows():
    #         description = f""
    #         description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
    #         data.append(description + "\n")



    data = []
    dfs=[]
    file_addresses = dataset_address.strip().split('\n')
    df_index = 0
    data_description = " "
    for file_address in file_addresses:
        try:
            df = pd.read_csv(file_address) 
            file_name = file_address.split('/')[-1] 
            if 'demand' in df.columns:
                result = df['demand'].values.tolist()
                data_description += "d=" + str(result) + "\n"
            elif 'fixed_costs' in df.columns:
                result = df['fixed_costs'].values.tolist()
                data_description +="c=" + str(result) + "\n"
            else:
                for row_idx, row in df.iterrows():
                    data_description += ", ".join([f"{col} = {row[col]}" for col in df.columns])
            df_index += 1
            dfs.append((file_name, df))
        except Exception as e:
            print(f"Error reading file {file_address}: {e}")

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_texts([data_description], embeddings)


    # for df_index, (file_name, df) in enumerate(uploaded_files):
    #     data.append(f"\nDataFrame {df_index + 1} - {file_name}:\n")
    #     for i, r in df.iterrows():
    #         description = f""
    #         description += ", ".join([f"{col} = {r[col]}" for col in df.columns])
    #         data.append(description + "\n")

    
    # documents = [content for content in data]

    # embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    # vectors = FAISS.from_texts(documents, embeddings)
    retriever = vectors.as_retriever(search_kwargs={'k': 400})
    llm2 = ChatOpenAI(temperature=0.0, model_name='gpt-4.1', openai_api_key=user_api_key)


    system_prompt = (
        "Retrieve the documents in order from top to bottom. Use the retrieved context to answer the question. If mention a certain kind of product, retrieve all the relavant product information detail judging by its product name. If not mention a certain kind of product, retrieve all the data instead."
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm2, prompt)
    qa_chain = create_retrieval_chain(retriever, question_answer_chain)
    # def qa_wrapper(query: str):
    #     return qa_chain.invoke({"input": query})
    
    def qa_wrapper(query: str):
    # 修正：只返回 LLM 生成的 'answer' 字符串，而不是整个字典
        result = qa_chain.invoke({"input": query})
        return result['answer']
    qa_tool = Tool(
        name="CSVQA",
        func=qa_wrapper,
        description="Use this tool to answer Querys based on the provided CSV data and retrieve product data similar to the input query."
    )

    prefix = f"""You are an assistant that generates a mathematical models based on the user's description and provided CSV data.

    Please refer to the following example and generate the answer in the same format:

    {few_shot_examples}

    When you need to retrieve information from the CSV file, use the provided tool.

    """

    suffix = """

    Begin!

    User Description: {input}
    {agent_scratchpad}"""

    agent2 = initialize_agent(
        tools=[qa_tool],
        llm=llm2,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,
    )
    query = query.replace("{", "{{").replace("}", "}}")

    result = agent2.invoke(query)
    output = result['output']
    return output


## Others Without CSV

In [10]:
def get_others_without_CSV_response(query):
    llm = ChatOpenAI(
                    temperature=0.0, model_name="gpt-4.1", top_p=1, n = 1, openai_api_key=user_api_key
                )

    loader = CSVLoader(file_path="Large_Scale_Or_Files/RAG_Example_Others_Without_CSV.csv", encoding="utf-8")
    data = loader.load()

    documents = data

    embeddings = OpenAIEmbeddings(openai_api_key=user_api_key)
    vectors = FAISS.from_documents(documents, embeddings)

    retriever = vectors.as_retriever(search_kwargs={'k': 5})

    
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
    )

    qa_tool = Tool(
        name="ORLM_QA",
        func=qa_chain.invoke,
        description=(
            "Use this tool to answer Querys."
            "Provide the Query as input, and the tool will retrieve the relevant information from the file and use it to answer the Query."
        ),
    )

    few_shot_examples = []
    similar_results = retrieve_similar_docs(query,retriever)

    for i, result in enumerate(similar_results, 1):
        content = result['content']

        split_at_formulation = content.split("Data_address:", 1)
        problem_description = split_at_formulation[0].replace("prompt:", "").strip()

        split_at_address = split_at_formulation[1].split("Label:", 1)
        data_address = split_at_address[0].strip()

        split_at_label = split_at_address[1].split("Related:", 1)
        label = split_at_label[0].strip() 

        split_at_type = split_at_address[1].split("problem type:", 1)
        Related = split_at_type[0].strip() 

        selected_problem = split_at_type[1].strip()

        label = label.replace("{", "{{").replace("}", "}}")

        example = f"""

Query: {problem_description}

Thought: I need to formulate the mathematical model for this problem. I'll use the ORLM_QA tool to retrieve the most similar use case and learn the method and formulation for generating the answer (label) for user's query. Always note whether to add additional integer constraints (or real number) is decided according to the realistic significance of the problem and the characteristics of the variables.

Action: ORLM_QA

Action Input: {problem_description}

Observation: 

Thought: Respond ONLY in this exact format: {label}. Do NOT include any explanations, notes, or extra text. The expressions should not be simplified or abbreviated. Just give the formula, no need to generate final answer. Add default constraints that the variables are nonnegative integers, or nonnegative real numbers if specified in the query.

Final Answer: 
{label}

"""
        example = example.replace("{", "{{").replace("}", "}}")
        few_shot_examples.append(example)

    prefix = f"""You are a helpful assistant that can answer Querys about operation problems. 

    Use the following examples as a guide. Always use the ORLM_QA tool when you need to retrieve information from the file:

    {few_shot_examples}

    When you need to find information from the file, use the provided tools.

    """

    suffix = """

    Begin!

    Query: {input}
    {agent_scratchpad}"""

    agent = initialize_agent(
        tools=[qa_tool],
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        agent_kwargs={
            "prefix": prefix,
            "suffix": suffix,
        },
        verbose=True,
        handle_parsing_errors=True,  # Enable error handling
    )

    openai.api_request_timeout = 60  
    query = query.replace('{','{{').replace('}','}}')
    output = agent.invoke(query)

    return output

## Code Generation

In [11]:
def get_code(output,selected_problem):
    llm_code = ChatOpenAI(
        temperature=0.0, model_name="gpt-4.1",top_p=1,n = 1, openai_api_key=user_api_key
    )

    prompt = f"""
    You are an expert in mathematical optimization and Python programming. Your task is to write Python code to solve the provided mathematical optimization model using the Gurobi library. The code should include the definition of the objective function, constraints, and decision variables. Please don't add additional explanations. Please don't include ```python and ```.Below is the provided mathematical optimization model:

    Mathematical Optimization Model:
    {output}
    """

    prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

## Mathematical Model

**Decision Variables**

Let $x_i$ be the weekly production quantity of product $i$, where $i$ indexes all products listed in 41.csv (see below for mapping).

**Maximize**

$$
\sum_{i=1}^{n} \left[ (\text{Selling Price}_i - \text{Variable Cost}_i) \, x_i \right] - 4500
$$

where $n$ is the total number of products in 41.csv.

---

**subject to**

**Labor constraint:**
$$
\sum_{i=1}^{n} (\text{Labor per unit}_i) \, x_i \leq 1650
$$

**Material constraint:**
$$
\sum_{i=1}^{n} (\text{Material per unit}_i) \, x_i \leq 1850
$$

**Non-negativity:**
$$
x_i \geq 0 \quad \text{for all } i=1,2,\ldots,n
$$

---

**Where:**

For each product $i$ (in order as listed below):

| $i$ | Product Name                        | Labor per unit | Material per unit | Selling Price | Variable Cost |
|-----|-------------------------------------|----------------|-------------------|---------------|---------------|
| 1   | Herringbone Casual Shirt            | 3.0            | 4.1               | 123           | 62            |
| 2   | Textured Grid Shirt                 | 3.1            | 4.2               | 125           | 63            |
| 3   | Madras Plaid Shirt                  | 3.3            | 4.4               | 129           | 66            |
| 4   | Supersoft Modal Shirt               | 3.0            | 4.1               | 123           | 61            |
| 5   | Tropical Print Shirt                | 3.8            | 4.9               | 140           | 73            |
| 6   | Longline T-Shirt                    | 2.6            | 3.5               | 94            | 51            |
| 7   | Wool Cargo Trousers                 | 6.7            | 7.0               | 201           | 101           |
| 8   | Chambray Work Shirt                 | 3.0            | 4.1               | 122           | 61            |
| 9   | Brushed Cotton Shirt                | 3.2            | 4.3               | 127           | 65            |
| 10  | Muscle Fit T-Shirt                  | 2.1            | 3.0               | 83            | 41            |
| 11  | Textured Stripe Shirt               | 3.2            | 4.3               | 127           | 64            |
| 12  | Striped Rugby Shirt                 | 3.4            | 4.4               | 129           | 66            |
| 13  | Linen Safari Shirt                  | 3.4            | 4.5               | 131           | 68            |
| 14  | Printed Geometric Shirt             | 3.9            | 5.0               | 143           | 76            |
| 15  | Graphic Print Tee                   | 2.0            | 3.0               | 82            | 40            |
| 16  | Silk-Touch Poplin Shirt             | 3.3            | 4.4               | 129           | 66            |
| 17  | Slim Fit Chambray                   | 3.0            | 4.1               | 124           | 62            |
| 18  | Non-Iron Twill Shirt                | 3.1            | 4.2               | 126           | 64            |
| 19  | Classic Oxford Shirt                | 3.1            | 4.2               | 125           | 63            |
| 20  | Brushed Cotton Trousers             | 6.6            | 6.9               | 198           | 97            |
| 21  | Fine Poplin Check Shirt             | 3.5            | 4.6               | 133           | 68            |
| 22  | Melange Knit Shirt                  | 3.1            | 4.2               | 126           | 64            |
| 23  | Brushed Twill Shirt                 | 3.5            | 4.6               | 134           | 69            |
| 24  | Stretch Performance Shirt           | 3.5            | 4.6               | 133           | 68            |
| 25  | Minimalist Plain Tee                | 2.0            | 2.9               | 78            | 37            |
| 26  | Spread Collar Shirt                 | 3.8            | 4.9               | 140           | 73            |
| 27  | Heavyweight Denim Shirt             | 4.0            | 5.1               | 145           | 77            |
| 28  | Technical Cargo Vest                | 6.9            | 7.2               | 206           | 106           |
| 29  | Camp Collar Floral Shirt            | 2.9            | 4.0               | 121           | 61            |
| 30  | Printed Geometric Tee               | 2.5            | 3.4               | 91            | 48            |
| 31  | Microfiber Sport Shirt              | 2.3            | 3.3               | 88            | 45            |
| 32  | Graphic Back Print Tee              | 2.4            | 3.3               | 89            | 46            |
| 33  | Sun-Protective T-Shirt              | 2.1            | 3.0               | 81            | 40            |
| 34  | Performance Dry-Fit Tee             | 2.0            | 3.0               | 80            | 39            |
| 35  | Button-Down Poplin Shirt            | 3.6            | 4.7               | 135           | 70            |
| 36  | Water-Resistant Shell               | 7.1            | 7.4               | 213           | 113           |
| 37  | Brushed Flannel Shirt               | 3.6            | 4.7               | 135           | 70            |
| 38  | Micro-Dot Shirt                     | 3.0            | 4.1               | 123           | 62            |
| 39  | Twill Utility Shirt                 | 3.3            | 4.5               | 130           | 67            |
| 40  | Basic White T-Shirt                 | 2.3            | 3.2               | 87            | 44            |
| 41  | Cotton Crew Neck T-Shirt            | 2.1            | 3.1               | 83            | 41            |
| 42  | Silk Blend Dress Shirt              | 4.0            | 5.1               | 145           | 78            |
| 43  | Plaid Flannel Overshirt             | 3.0            | 4.1               | 124           | 62            |
| 44  | Western Style Shirt                 | 3.2            | 4.3               | 128           | 65            |
| 45  | Dobby Weave Shirt                   | 3.3            | 4.4               | 130           | 67            |
| 46  | Contrast Collar Shirt               | 3.8            | 4.9               | 141           | 74            |
| 47  | Slogan Print T-Shirt                | 2.6            | 3.5               | 93            | 50            |
| 48  | Heavy Knit Sweater                  | 7.3            | 7.6               | 218           | 117           |
| 49  | Mohair Blend Cardigan               | 7.0            | 7.3               | 211           | 110           |
| 50  | Reflective Detail Tee               | 2.0            | 2.9               | 80            | 39            |
| 51  | Non-Iron Travel Shirt               | 3.9            | 5.0               | 142           | 75            |
| 52  | Mohair Knit Sweater                 | 7.5            | 7.8               | 224           | 123           |
| 53  | Heavyweight Boxy Tee                | 2.4            | 3.3               | 89            | 46            |
| 54  | Lattice Weave Shirt                 | 3.2            | 4.3               | 128           | 65            |
| 55  | Micro-Waffle Knit Shirt             | 3.1            | 4.2               | 125           | 63            |
| 56  | Supersoft Modal T-Shirt             | 2.5            | 3.4               | 90            | 47            |
| 57  | Heavyweight Sweatshirt              | 6.2            | 6.4               | 190           | 92            |
| 58  | Technical Shell Jacket              | 7.6            | 7.9               | 230           | 130           |
| 59  | Shantung Silk Shirt                 | 4.1            | 5.2               | 148           | 80            |
| 60  | Boxy Cropped Tee                    | 2.2            | 3.1               | 84            | 42            |
| 61  | Cashmere Cable Knit                 | 7.6            | 7.9               | 230           | 129           |
| 62  | Waterproof Rain Jacket              | 7.1            | 7.4               | 212           | 112           |
| 63  | Pinpoint Oxford Shirt               | 3.6            | 4.7               | 136           | 71            |
| 64  | Canvas Chore Coat                   | 7.2            | 7.5               | 216           | 116           |
| 65  | Relaxed Fit Tee                     | 2.7            | 3.6               | 95            | 51            |
| 66  | Grandad Collar Shirt                | 3.8            | 4.9               | 140           | 74            |
| 67  | Linen/Cotton Blend Shirt            | 3.6            | 4.7               | 136           | 71            |
| 68  | Knit Cardigan Sweater               | 6.1            | 6.3               | 188           | 90            |
| 69  | Contrast Cuff Shirt                 | 3.3            | 4.4               | 129           | 66            |
| 70  | Flannel Plaid Shirt                 | 3.5            | 4.0               | 128           | 65            |
| 71  | End-on-End Fabric Shirt             | 3.5            | 4.6               | 134           | 69            |
| 72  | Slim Fit Linen T-Shirt              | 2.2            | 3.1               | 85            | 43            |
| 73  | Lightweight Jersey Tee              | 2.5            | 3.4               | 91            | 48            |
| 74  | Slim Fit Pique Polo                 | 2.7            | 3.6               | 94            | 50            |
| 75  | Cashmere Blend Sweater              | 7.5            | 7.8               | 225           | 125           |
| 76  | Washed Effect T-Shirt               | 2.6            | 3.5               | 92            | 49            |
| 77  | Cuban Collar Shirt                  | 3.9            | 5.0               | 142           | 76            |
| 78  | Two-Pocket Utility Shirt            | 3.7            | 4.8               | 139           | 72            |
| 79  | Band Collar Shirt                   | 3.1            | 4.2               | 126           | 64            |
| 80  | Heavy Cotton T-Shirt                | 2.5            | 3.4               | 90            | 48            |
| 81  | Woven Stripe Shirt                  | 3.2            | 4.3               | 128           | 65            |
| 82  | Seersucker Popover Shirt            | 3.4            | 4.5               | 131           | 67            |
| 83  | Ribbed Knit T-Shirt                 | 2.1            | 3.0               | 83            | 41            |
| 84  | Semi-Spread Collar Shirt            | 3.2            | 4.3               | 127           | 64            |
| 85  | Distressed Cotton Tee               | 2.3            | 3.2               | 88            | 45            |
| 86  | Pin Dot Print Shirt                 | 3.5            | 4.6               | 133           | 69            |
| 87  | Faux Fur Coat                       | 7.7            | 8.0               | 235           | 134           |
| 88  | Fitted Basic Tee                    | 2.4            | 3.3               | 88            | 46            |
| 89  | Casual Wool Overcoat                | 8.0            | 8.3               | 240           | 140           |
| 90  | Lightweight Windbreaker             | 5.8            | 6.0               | 180           | 85            |
| 91  | Performance Polo Shirt              | 2.9            | 3.9               | 118           | 57            |
| 92  | Twill Blazer (Unlined)              | 6.9            | 7.1               | 205           | 100           |
| 93  | Bamboo Fiber Tee                    | 2.4            | 3.3               | 89            | 46            |
| 94  | Sustainable Cotton Tee              | 2.6            | 3.5               | 93            | 50            |
| 95  | Cotton Slub Polo                    | 2.9            | 3.8               | 98            | 54            |
| 96  | Classic Barrel Cuff Shirt           | 3.7            | 4.8               | 139           | 73            |
| 97  | Basic Scoop Neck Tee                | 2.2            | 3.2               | 84            | 42            |
| 98  | Printed Art Graphic Tee             | 2.2            | 3.1               | 86            | 43            |
| 99  | Drop Shoulder T-Shirt               | 2.2            | 3.1               | 87            | 44            |
| 100 | Linen Blend Shirt                   | 3.2            | 4.3               | 127           | 64            |
| 101 | Two-Tone Oxford Shirt               | 3.3            | 4.4               | 130           | 67            |
| 102 | Casual Knit Blazer                  | 6.5            | 6.8               | 196           | 96            |
| 103 | Micro-check Business Shirt          | 3.7            | 4.8               | 138           | 72            |
| 104 | Fleece-Lined Track Suit             | 7.0            | 7.2               | 210           | 110           |
| 105 | Speckled Slub Shirt                 | 3.0            | 4.1               | 124           | 62            |
| 106 | Contrast Stitch Shirt               | 3.9            | 5.0               | 142           | 75            |
| 107 | Embroidered T-Shirt                 | 2.3            | 3.2               | 88            | 45            |
| 108 | Corduroy Shacket                    | 6.7            | 6.9               | 202           | 97            |
| 109 | Tailored Fit Dress Shirt            | 3.7            | 4.8               | 138           | 72            |
| 110 | Bamboo Viscose Tee                  | 2.0            | 2.9               | 79            | 38            |
| 111 | Half-Zip Pullover Shirt             | 3.6            | 4.7               | 137           | 71            |
| 112 | Convertible Zip-Off Trousers        | 6.5            | 6.8               | 195           | 94            |
| 113 | Corduroy Field Jacket               | 6.7            | 7.0               | 200           | 98            |
| 114 | Pocket T-Shirt                      | 2.4            | 3.3               | 87            | 44            |
| 115 | Embroidered Logo Tee                | 2.3            | 3.2               | 87            | 45            |
| 116 | Triblend Jersey Tee                 | 2.5            | 3.5               | 91            | 49            |
| 117 | Fine Wale Corduroy Shirt            | 3.7            | 4.8               | 138           | 72            |
| 118 | Supima Cotton Tee                   | 2.8            | 3.7               | 97            | 53            |
| 119 | Waffle Knit Pullover                | 6.1            | 6.3               | 187           | 91            |
| 120 | Heavyweight Denim Jacket            | 7.4            | 7.7               | 220           | 119           |
| 121 | Jersey V-Neck T-Shirt               | 2.5            | 3.4               | 90            | 48            |
| 122 | Wool Blend Trousers                 | 6.8            | 7.1               | 203           | 103           |
| 123 | Printed Patchwork Shirt             | 3.4            | 4.5               | 130           | 66            |
| 124 | Checked Seersucker Shirt            | 3.9            | 5.0               | 141           | 75            |
| 125 | Heather Grey T-Shirt                | 2.3            | 3.2               | 86            | 45            |
| 126 | Down-filled Parka                   | 7.8            | 8.1               | 235           | 135           |
| 127 | Organic Cotton Tee                  | 2.1            | 3.1               | 85            | 43            |
| 128 | Drawstring Cargo Pants              | 6.4            | 6.6               | 192           | 93            |
| 129 | Reversible Bomber Jacket            | 7.5            | 7.8               | 225           | 124           |
| 130 | Shearling Trench Coat               | 8.1            | 8.4               | 245           | 145           |
| 131 | Moisture-Wicking Polo               | 2.3            | 3.2               | 87            | 45            |
| 132 | Heavyweight Jersey T-Shirt          | 2.7            | 3.6               | 96            | 52            |
| 133 | Oversized Graphic Tee               | 2.8            | 3.8               | 98            | 54            |
| 134 | Performance Stretch Tee             | 2.6            | 3.5               | 93            | 50            |
| 135 | Slub Cotton T-Shirt                 | 2.6            | 3.5               | 92            | 49            |
| 136 | Long-sleeve Henley                  | 2.8            | 3.7               | 95            | 50            |
| 137 | Tie-Dye T-Shirt                     | 2.4            | 3.3               | 89            | 46            |
| 138 | Trench Coat (Lined)                 | 8.3            | 8.6               | 255           | 155           |
| 139 | Stretch Hybrid Shorts               | 6.0            | 6.2               | 186           | 89            |
| 140 | High-Tech Ski Jacket                | 8.5            | 8.8               | 260           | 160           |
| 141 | Denim Snap Shirt                    | 3.4            | 4.6               | 132           | 68            |
| 142 | Mock Neck Long-sleeve               | 2.2            | 3.1               | 86            | 43            |
| 143 | High-Waist Trousers                 | 6.2            | 6.4               | 189           | 91            |
| 144 | Raglan Sleeve Tee                   | 2.6            | 3.6               | 94            | 51            |
| 145 | Pima Cotton Dress Shirt             | 3.8            | 4.9               | 140           | 73            |
| 146 | Tencel Blend Casual Shirt           | 3.4            | 4.5               | 131           | 67            |
| 147 | Bio-Washed T-Shirt                  | 2.8            | 3.7               | 97            | 53            |
| 148 | Printed Stripe Tee                  | 2.7            | 3.6               | 97            | 53            |
| 149 | Washed Pigment Dye Tee              | 2.1            | 3.0               | 82            | 40            |
| 150 | Tailored Wool Trousers              | 6.6            | 6.9               | 200           | 99            |
| 151 | Water-Resistant Chinos              | 6.4            | 6.6               | 192           | 93            |
| 152 | Corduroy Carpenter Pants            | 6.2            | 6.4               | 188           | 90            |
| 153 | Seersucker Short-sleeve             | 2.7            | 3.6               | 93            | 47            |
| 154 | Hybrid Hiking Trousers              | 6.0            | 6.2               | 185           | 87            |
| 155 | Wide Leg Trousers                   | 6.1            | 6.3               | 187           | 89            |
| 156 | French Terry Hoodie                 | 6.5            | 6.8               | 195           | 95            |
| 157 | Contrast Stitch Polo                | 2.9            | 3.8               | 99            | 55            |
| 158 | Cutaway Collar Shirt                | 3.6            | 4.7               | 136           | 70            |
| 159 | Unisex Jogger Pants                 | 5.9            | 6.1               | 185           | 88            |
| 160 | Eco-Friendly Hemp Shirt             | 3.1            | 4.2               | 126           | 63            |
| 161 | Straight Leg Corduroys              | 6.7            | 7.0               | 200           | 99            |
| 162 | Distressed Denim Tee                | 2.0            | 2.9               | 79            | 38            |
| 163 | Patchwork Denim Shirt               | 3.5            | 4.6               | 133           | 69            |
| 164 | Sublimation Print Tee               | 2.4            | 3.3               | 89            | 46            |
| 165 | Printed Camp Collar Shirt           | 2.9            | 4.0               | 120           | 60            |
| 166 | Lycra Blend Active Tee              | 2.1            | 3.0               | 81            | 40            |
| 167 | Suede Bomber Jacket                 | 7.7            | 8.0               | 232           | 132           |
| 168 | Faux Leather Jacket                 | 7.0            | 7.3               | 210           | 111           |
| 169 | Slim Fit Chinos                     | 6.3            | 6.5               | 190           | 90            |
| 170 | Shearling Lined Coat                | 7.9            | 8.2               | 238           | 138           |
| 171 | Reversible Quilted Jacket           | 7.4            | 7.7               | 222           | 122           |
| 172 | Ripstop Utility Pants               | 6.3            | 6.5               | 190           | 91            |
| 173 | Tapered Denim Jeans                 | 6.6            | 6.9               | 198           | 96            |
| 174 | Recycled Material Tee               | 2.0            | 2.9               | 79            | 38            |
| 175 | Tech Fleece Pullover                | 6.6            | 6.9               | 196           | 95            |
| 176 | Slim Fit Ankle Grazer               | 6.0            | 6.2               | 186           | 88            |
| 177 | Slim Tapered Jeans                  | 6.8            | 7.1               | 203           | 102           |
| 178 | Wide Leg Culottes                   | 6.3            | 6.5               | 191           | 92            |
| 179 | Organic Hemp T-Shirt                | 2.8            | 3.7               | 96            | 52            |
| 180 | Down-Alternative Vest               | 6.9            | 7.2               | 205           | 104           |
| 181 | Woven Utility Vest                  | 6.4            | 6.6               | 193           | 93            |
| 182 | Wool Blend Peacoat                  | 7.2            | 7.5               | 215           | 115           |
| 183 | High Performance Hiking Pants       | 6.5            | 6.8               | 194           | 94            |
| 184 | Ribbed Crew Neck Tee                | 2.7            | 3.6               | 95            | 51            |
| 185 | Deep V-Neck Tee                     | 2.3            | 3.2               | 87            | 44            |
| 186 | Insulated Puffer Jacket             | 7.4            | 7.7               | 220           | 120           |
| 187 | Patchwork Denim Jacket              | 7.2            | 7.5               | 215           | 114           |
| 188 | Velour Track Pants                  | 5.8            | 6.0               | 182           | 86            |
| 189 | Garment Dyed Tee                    | 2.5            | 3.4               | 90            | 47            |
| 190 | Leather Biker Jacket                | 8.2            | 8.5               | 250           | 150           |
| 191 | Camo Print Hoodie                   | 6.1            | 6.3               | 187           | 89            |
| 192 | Velvet Bomber Jacket                | 7.3            | 7.6               | 218           | 118           |
| 193 | Microfiber V-Neck                   | 2.0            | 2.9               | 78            | 37            |
| 194 | Embroidered Pocket Tee              | 2.5            | 3.4               | 90            | 47            |
| 195 | Recycled Poly T-Shirt               | 2.7            | 3.7               | 96            | 52            |
| 196 | Sherpa Lined Pullover               | 6.9            | 7.1               | 208           | 108           |
| 197 | Faded Pigment Tee                   | 2.8            | 3.7               | 98            | 54            |
| 198 | Quilted Vest                        | 6.8            | 7.0               | 205           | 105           |

---

**All coefficients are taken directly from the provided 41.csv data, with no simplification or abbreviation.**

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB
import pandas as pd
df = pd.read_csv('/Users/cora/Documents/GitHub/lean-llm-opt/Large_Scale_Or_Files/Other_example/41.csv').set_index('Product Name')

df['CM'] = df['Selling Price'] - df['Variable Cost']

labor_capacity = 1650
material_capacity = 1850

total_fixed_cost = 4500

try:
    model = gp.Model("RedBeanClothingProfitMaximization")

    x = model.addVars(df.index, lb=0, vtype=GRB.INTEGER, name="x")

    obj = gp.quicksum(df.loc[p, 'CM'] * x[p] for p in df.index)
    model.setObjective(obj-4500, GRB.MAXIMIZE)

    model.addConstr(
        gp.quicksum(df.loc[p, 'Labor per unit'] * x[p] for p in df.index) <= labor_capacity,
        "Labor_Constraint"
    )

    model.addConstr(
        gp.quicksum(df.loc[p, 'Material per unit'] * x[p] for p in df.index) <= material_capacity,
        "Material_Constraint"
    )

    model.optimize()

    model.write('1.lp')
    if model.status == GRB.OPTIMAL:
        
        total_cm = model.objVal
        
        print(f" (Max Weekly Profit): ${total_cm:,.2f}\n")

        """

    if selected_problem == "Network Revenue Management" or selected_problem == "NRM" or selected_problem == "Network Revenue Management Problem":

        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \max \quad \sum_i A_i \cdot x_i$
Constraints
1. Inventory Constraints:
$\quad \quad x_i \leq I_i, \quad \forall i$
2. Demand Constraints:
$x_i \leq d_i, \quad \forall i$
3. Startup Constraint:
$\sum_i x_i \geq s$
Retrieved Information
$\small I = [7550, 6244]$
$\small A = [149, 389]$
$\small d = [15057, 12474]$
$\small s = 100$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Product_Optimization")

# Decision variables for the number of units of each product
x_1 = m.addVar(vtype=GRB.INTEGER, name="x_1") # Number of units of product 1
x_2 = m.addVar(vtype=GRB.INTEGER, name="x_2") # Number of units of product 2

# Objective function: Maximize 149 x_1 + 389 x_2
m.setObjective(149 * x_1 + 389 * x_2, GRB.MAXIMIZE)

# Constraints
m.addConstr(x_1 <= 7550, name="inventory_constraint_1")
m.addConstr(x_2 <= 6244, name="inventory_constraint_2")
m.addConstr(x_1 <= 15057, name="demand_constraint_1")
m.addConstr(x_2 <= 12474, name="demand_constraint_2")

# Non-negativity constraints are implicitly handled by the integer constraints (x_1, x_2 >= 0)

# Solve the model
m.optimize()

        """

    elif selected_problem == "Facility Location Problem" or selected_problem == "FLP" or selected_problem == "Facility Location":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_{i} \sum_{j} A_{ij} \cdot x_{ij} + \sum_{i} c_i \cdot y_i$

Constraints
1. Demand Constraint:
$\quad \quad \sum_i x_{ij} = d_j, \quad \forall j$
2. Capacity Constraint:
$\quad \quad \sum_j x_{ij} \leq M \cdot y_i, \quad \forall i$
3. Non-negativity:
$\quad \quad x_{ij} \geq 0, \quad \forall i,j$
4. Binary Requirement:
$\quad \quad y_i \in \{0,1\}, \quad \forall i$

Retrieved Information
$\small d = [1083, 776, 16214, 553, 17106, 594, 732]$
$\small c = [102.33, 94.92, 91.83, 98.71, 95.73, 99.96, 98.16]$
$\small A = \begin{bmatrix}
1506.22 & 70.90 & 8.44 & 260.27 & 197.47 & 71.71 & 61.19 \\  
1732.65 & 1780.72 & 567.44 & 448.68 & 29.00 & 1484.91 & 963.92 \\  
115.66 & 100.76 & 64.68 & 1324.53 & 64.99 & 134.88 & 2102.83 \\  
1254.78 & 1115.63 & 52.31 & 1036.16 & 892.63 & 1464.04 & 1383.41 \\  
42.90 & 891.01 & 1013.94 & 1128.72 & 58.91 & 42.89 & 1570.31 \\  
0.70 & 139.46 & 70.03 & 79.15 & 1482.00 & 0.91 & 110.46 \\  
1732.30 & 1780.44 & 486.50 & 523.74 & 522.08 & 82.48 & 826.41
\end{bmatrix}$
$\small M = \sum_j d_j = 1083 + 776 + 16214 + 553 + 17106 + 594 + 732 = 38058 $


The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
d = np.array([1083, 776, 16214, 553, 17106, 594, 732])
c = np.array([102.33, 94.92, 91.83, 98.71, 95.73, 99.96, 98.16])
A = np.array([[1506.22, 70.90, 8.44, 260.27, 197.47, 71.71, 61.19],  
[1732.65, 1780.72, 567.44, 448.68, 29.00, 1484.91, 963.92],  
[115.66, 100.76, 64.68, 1324.53, 64.99, 134.88, 2102.83],  
[1254.78, 1115.63, 52.31, 1036.16, 892.63, 1464.04, 1383.41],  
[42.90, 891.01, 1013.94, 1128.72, 58.91, 42.89, 1570.31],  
[0.70, 139.46, 70.03, 79.15, 1482.00, 0.91, 110.46],  
[1732.30, 1780.44, 486.50, 523.74, 522.08, 82.48, 826.41]])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(A.shape[0], A.shape[1], lb=0, name="x")
y = m.addVars(A.shape[0], vtype=GRB.BINARY, name="y")

# Objective function
m.setObjective(gp.quicksum(A[i, j]*x[i, j] for i in range(A.shape[0]) for j in range(A.shape[1])) + gp.quicksum(c[i]*y[i] for i in range(A.shape[0])), GRB.MINIMIZE)

# Constraints
for j in range(A.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(A.shape[0])) == d[j], name=f"demand_constraint_{j}")

M = 1000000  # large number
for i in range(A.shape[0]):
    m.addConstr(-M*y[i] + gp.quicksum(x[i, j] for j in range(A.shape[1])) <= 0, name=f"M_constraint_{i}")

# Solve the model
m.optimize()
        """

    elif selected_problem == "Assignment Problem" or selected_problem == "AP" or selected_problem == "Assignment":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_{i=1}^3 \sum_{j=1}^3 c_{ij} \cdot x_{ij}$

Constraints
1. Row Assignment Constraint:
$\quad \quad \sum_{j=1}^3 x_{ij} = 1, \quad \forall i \in \{1,2,3\}$
2. Column Assignment Constraint:
$\quad \quad \sum_{i=1}^3 x_{ij} = 1, \quad \forall j \in \{1,2,3\}$
3. Binary Constraint:
$\quad \quad x_{ij} \in \{0,1\}, \quad \forall i,j$

Retrieved Information
$\small c = \begin{bmatrix}
3000 & 3200 & 3100 \\
2800 & 3300 & 2900 \\
2900 & 3100 & 3000 
\end{bmatrix}$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
c = np.array([
    [3000, 3200, 3100],
    [2800, 3300, 2900],
    [2900, 3100, 3000]
])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(c.shape[0], c.shape[1], vtype=GRB.BINARY, name="x")

# Objective function
m.setObjective(gp.quicksum(c[i, j]*x[i, j] for i in range(c.shape[0]) for j in range(c.shape[1])), GRB.MINIMIZE)

# Constraints
for i in range(c.shape[0]):
    m.addConstr(gp.quicksum(x[i, j] for j in range(c.shape[1])) == 1, name=f"row_constraint_{i}")

for j in range(c.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(c.shape[0])) == 1, name=f"col_constraint_{j}")

# Solve the model
m.optimize()
"""

    
    elif selected_problem == "Transportation Problem" or selected_problem == "TP" or selected_problem == "Transportation":
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:

Objective Function:
$\quad \quad \min \quad \sum_i \sum_j c_{ij} \cdot x_{ij}$

Constraints
1. Demand Constraint:
$\quad \quad \sum_i x_{ij} \geq d_j, \quad \forall j$
2. Capacity Constraint:
$\quad \quad \sum_j x_{ij} \leq s_i, \quad \forall i$

Retrieved Information
$\small d = [94, 39, 65, 435]$
$\small s = [2531, 20, 210, 241]$
$\small c = \begin{bmatrix}
883.91 & 0.04 & 0.03 & 44.45 \\
543.75 & 23.68 & 23.67 & 447.75 \\
537.34 & 23.76 & 498.95 & 440.60 \\
1791.49 & 68.21 & 1432.48 & 1527.76
\end{bmatrix}$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization")

# Decision variables
x_S1_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C1")
x_S1_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C2")
x_S1_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C3")
x_S1_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S1_C4")
x_S2_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C1")
x_S2_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C2")
x_S2_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C3")
x_S2_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S2_C4")
x_S3_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C1")
x_S3_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C2")
x_S3_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C3")
x_S3_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S3_C4")
x_S4_C1 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C1")
x_S4_C2 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C2")
x_S4_C3 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C3")
x_S4_C4 = m.addVar(vtype=GRB.INTEGER, name="x_S4_C4")

# Objective function
m.setObjective(883.91 * x_S2_C1 + 0.04 * x_S2_C2 + 0.03 * x_S2_C3 + 44.45 * x_S2_C4 + 543.75 * x_S1_C1 + 23.68 * x_S1_C2 + 23.67 * x_S1_C3 + 447.75 * x_S1_C4 + 537.34 * x_S3_C1 + 23.76 * x_S3_C2 + 498.95 * x_S3_C3 + 440.60 * x_S3_C4 + 1791.49 * x_S4_C1 + 68.21 * x_S4_C2 + 1432.48 * x_S4_C3 + 1527.76 * x_S4_C4, GRB.MINIMIZE)

# Constraints
m.addConstr(x_S1_C1 + x_S2_C1 + x_S3_C1 + x_S4_C1 >= 94, name="demand_constraint1")
m.addConstr(x_S1_C2 + x_S2_C2 + x_S3_C2 + x_S4_C2 >= 39, name="demand_constraint2")
m.addConstr(x_S1_C3 + x_S2_C3 + x_S3_C3 + x_S4_C3 >= 65, name="demand_constraint3")
m.addConstr(x_S1_C4 + x_S2_C4 + x_S3_C4 + x_S4_C4 >= 435, name="demand_constraint4")
m.addConstr(x_S1_C1 + x_S1_C2 + x_S1_C3 + x_S1_C4 <= 2531, name="capacity_constraint1")
m.addConstr(x_S2_C1 + x_S2_C2 + x_S2_C3 + x_S2_C4 <= 20, name="capacity_constraint2")
m.addConstr(x_S3_C1 + x_S3_C2 + x_S3_C3 + x_S3_C4 <= 210, name="capacity_constraint3")
m.addConstr(x_S4_C1 + x_S4_C2 + x_S4_C3 + x_S4_C4 <= 241, name="capacity_constraint4")

# Solve the model
m.optimize()
        """
    
    elif selected_problem == "Resource Allocation" or selected_problem == "RA" or selected_problem == "Resource Allocation Problem":
        prompt += """
For example, here is a simple instance for reference:

Always remember: If not specified. All the variables are non-negative interger.

Mathematical Optimization Model:

Objective Function:
$\quad \quad \max \quad \sum_i \sum_j p_i \cdot x_{ij}$

Constraints
1. Capacity Constraint:
$\quad \quad \sum_i a_i \cdot x_{ij} \leq c_j, \quad \forall j$
2. Non-negativity Constraint:
$\quad \quad x_{ij} \geq 0, \quad \forall i,j$

Retrieved Information
$\small p = [321, 309, 767, 300, 763, 318, 871, 522, 300, 275, 858, 593, 126, 460, 685, 443, 700, 522, 940, 598]$
$\small a = [495, 123, 165, 483, 472, 258, 425, 368, 105, 305, 482, 387, 469, 341, 318, 104, 377, 213, 56, 131]$
$\small c = [4466]$

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(20, vtype=GRB.INTEGER, name="x")

# Objective function
m.setObjective(sum(x[i]*c[i] for i in range(20)), GRB.MAXIMIZE)

# Constraints
m.addConstr(sum(x[i]*w[i] for i in range(20)) <= 4466, name="capacity_constraint")

# Coefficients for the objective function
c = [321, 309, 767, 300, 763, 318, 871, 522, 300, 275, 858, 593, 126, 460, 685, 443, 700, 522, 940, 598]

# Coefficients for the capacity constraint
w = [495, 123, 165, 483, 472, 258, 425, 368, 105, 305, 482, 387, 469, 341, 318, 104, 377, 213, 56, 131]

# Solve the model
m.optimize()
```

-----
Here is another simple instance for reference:

Objective Function:
$\quad \quad \max \quad \sum_i p_i \cdot x_i$

Constraints
1. Capacity Constraint:
$\quad \quad \sum_i a_i \cdot x_i \leq 180$
2. Dependency Constraint:
$\quad \quad x_1 \leq x_3$
3. Non-negativity Constraint:
$\quad \quad x_i \geq 0, \quad \forall i$

Retrieved Information
$\small p = [888, 134, 129, 370, 921, 765, 154, 837, 584, 365]$
$\small a = [4, 2, 4, 3, 2, 1, 2, 1, 3, 3]$

The corresponding Python code for this instance is as follows:

import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(10, vtype=GRB.INTEGER, name="x")

# Objective function
p = [888, 134, 129, 370, 921, 765, 154, 837, 584, 365]
m.setObjective(sum(x[i]*p[i] for i in range(10)), GRB.MAXIMIZE)

# Constraints
a = [4, 2, 4, 3, 2, 1, 2, 1, 3, 3]
m.addConstr(sum(x[i]*a[i] for i in range(10)) <= 180, name="capacity_constraint")
m.addConstr(x[0] <= x[2], name="dependency_constraint")

# Solve the model
m.optimize()
        
        """
    else:
        prompt += """
For example, here is a simple instance for reference:

Mathematical Optimization Model:
Maximize 5x_S + 8x_F
Subject to
    2x_S + 5x_F <= 200
    x_S <= 0.3(x_S + x_F)
    x_F >= 10
    x_S, x_F _ Z+

The corresponding Python code for this instance is as follows:

```python
import gurobipy as gp
from gurobipy import GRB

# Create the model
m = gp.Model("Worker_Optimization")

# Decision variables for the number of seasonal (x_S) and full-time (x_F) workers
x_S = m.addVar(vtype=GRB.INTEGER, lb=0, name="x_S")  # Number of seasonal workers
x_F = m.addVar(vtype=GRB.INTEGER, lb=0, name="x_F")  # Number of full-time workers

# Objective function: Maximize Z = 5x_S + 8x_F
m.setObjective(5 * x_S + 8 * x_F, GRB.MAXIMIZE)

# Constraints
m.addConstr(2 * x_S + 5 * x_F <= 200, name="resource_constraint")
m.addConstr(x_S <= 0.3 * (x_S + x_F), name="seasonal_ratio_constraint")
m.addConstr(x_F >= 10, name="full_time_minimum_constraint")

# Non-negativity constraints are implicitly handled by the integer constraints (x_S, x_F >= 0)

# Solve the model
m.optimize()
```
The another example is:

Mathematical Optimization Model:
Minimize 919x_11 + 556x_12 + 951x_13 + 21x_21 + 640x_22 + 409x_23 + 59x_31 + 786x_32 + 304x_33
Subject to
    x_11 + x_12 + x_13 = 1
    x_21 + x_22 + x_23 = 1
    x_31 + x_32 + x_33 = 1
    x_11 + x_21 + x_31 = 1
    x_12 + x_22 + x_32 = 1
    x_13 + x_23 + x_33 = 1
    x_11, x_12, x_13, x_21, x_22, x_23, x_31, x_32, x_33 ∈ {{0,1}}


The corresponding Python code for this instance is as follows:

```python

import gurobipy as gp
from gurobipy import GRB
import numpy as np

# Data
c = np.array([
    [919, 556, 951],
    [21, 640, 409],
    [59, 786, 304]
])

# Create the model
m = gp.Model("Optimization_Model")

# Decision variables
x = m.addVars(c.shape[0], c.shape[1], vtype=GRB.BINARY, name="x")

# Objective function
m.setObjective(gp.quicksum(c[i, j]*x[i, j] for i in range(c.shape[0]) for j in range(c.shape[1])), GRB.MINIMIZE)

# Constraints
for i in range(c.shape[0]):
    m.addConstr(gp.quicksum(x[i, j] for j in range(c.shape[1])) == 1, name=f"row_constraint_{i}")

for j in range(c.shape[1]):
    m.addConstr(gp.quicksum(x[i, j] for i in range(c.shape[0])) == 1, name=f"col_constraint_{j}")

# Solve the model
m.optimize() 
```
"""

    
    messages = [
        HumanMessage(content=prompt) 
    ]

    response = llm_code(messages)

    return response.content

## Run!

In [12]:
def run_test(test, agent):
    output_model = []
    output_code = []
    classification = []
    for index, row in test.iterrows():
        try:
            query = row['Query']
            response = agent.invoke(f"What is the problem type of the text? text:{query}")
            
            def extract_problem_type(output_text):
                pattern = r'(Network Revenue Management|Network Revenue Management Problem|Resource Allocation|Resource Allocation Problem|Transportation|Transportation Problem|Facility Location Problem|Assignment Problem|AP|Uncapacited Facility Location Problem|NRM|RA|TP|FLP|UFLP|Others without CSV|Sales-Based Linear Programming|SBLP|Others with CSV)'
                match = re.search(pattern, output_text, re.IGNORECASE)
                return match.group(0) if match else None
            
            def csv_detect(row):
                return 1 if 'Dataset_address' in row.index else 0
    
            selected_problem = extract_problem_type(response['output'])
            classification.append(selected_problem)
            
            if csv_detect(row):
                dataset_address = row['Dataset_address']
                if selected_problem == "Network Revenue Management" or selected_problem == "NRM" or selected_problem == "Network Revenue Management Problem":
                    print("----------Network Revenue Management-----------")
                    output = get_NRM_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
    
                elif selected_problem == "Resource Allocation" or selected_problem == "RA" or selected_problem == "Resource Allocation Problem":
                    print("----------Resource Allocation-----------")
                    output = get_RA_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
    
                elif selected_problem == "Transportation" or selected_problem == "TP" or selected_problem == "Transportation Problem":
                    print("----------Transportation-----------")
                    output = get_TP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)    
    
                elif selected_problem == "Facility Location Problem" or selected_problem == "FLP" or selected_problem == "Uncapacited Facility Location" or selected_problem == "UFLP":
                    print("----------Facility Location Problem-----------")
                    output = get_FLP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
                
                elif selected_problem == "Assignment Problem" or selected_problem == "AP":
                    print("----------Assignment Problem-----------")
                    output = get_AP_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
                else:
                    print("----------Others with CSV-----------")
                    output = get_Others_response(query,dataset_address)
                    output_model.append(output)
                    code_response = get_code(output,selected_problem)
                    output_code.append(code_response)
    
            else:
                print("----------Others without CSV-----------")
                output = get_others_without_CSV_response(query)
                output_model.append(output)
                code_response = get_code(output,selected_problem)
                output_code.append(code_response)
        except requests.exceptions.RequestException as e:
            print(f"Connection error: {e}")
            continue
        time.sleep(15)
    return output_model, output_code,classification

# Testing

## Preparation:

In [13]:
def read_and_combine_csvs(file_order):
    dfs = []
    for fname in file_order:
        if os.path.exists(fname):
            df = pd.read_csv(fname)
            dfs.append(df)
            print(f"Read file: {fname} (Row length: {len(df)})")
        else:
            print(f"File doesn't exist: {fname}, already skipped")
    
    if not dfs:
        raise ValueError("No effective files")
    
    return pd.concat(dfs, ignore_index=True)

def run_gurobi_code(code_str):
 
    try:
      
        with StringIO() as buf, contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
            env = {
                '__builtins__': __builtins__,
                'gp': gp,
                'GRB': GRB
            }
            
           
            code_str += "\n\n# Added by executor\n"
            code_str += "if hasattr(m, 'status') and m.status == GRB.OPTIMAL:\n"
            code_str += "    __result__ = m.ObjVal\n"
            code_str += "else:\n"
            code_str += "    __result__ = None\n"
            
            
            exec(code_str, env)
            result = env.get('__result__', None)
            
     
            if 'm' in env:
                env['m'].dispose()
                del env['m']
            
            return result
    except Exception as e:
        print(f"Execution error: {str(e)}")
        return None


## Test Large Scale OR

In [None]:
testnrm = pd.read_csv('/Users/cora/Documents/GitHub/lean-llm-opt/final.csv')
import warnings
import logging
import os

# 1. 忽略 LangChain 和 Python 的弃用警告
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# 2. 忽略 Hugging Face/Transformers 的信息和警告输出
logging.getLogger("transformers").setLevel(logging.ERROR)

# ... (从这里开始你的其他导入，例如 pandas, torch, 等)

In [None]:
output_model_nrm1, output_code_nrm1,classification = run_test(testnrm,classification_agent)
output_df = pd.DataFrame({'Query': testnrm[:15]['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1,'classification':classification})
output_df.to_csv("/Users/cora/Documents/GitHub/lean-llm-opt/final_15_answer.csv", index=False)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descriptions like '.csv'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type of the text? text:As a portfolio manager, you must allocate a $1,000,000 capital budget across 103 available stocks to maximize the total expected return. This allocation must adhere to specific constraints: you are limited to selecting between 3 and 8 unique stocks, and investment in any single sector must not exceed 27% of the total budget. All necessary data, including expected returns and sector membership, is available in stock.csv."
[0m
Observation: [36;1m[1;3m{'query': 'What is the problem type of the text? text:As a portfolio manager, you must allocate a $1,000,000 capital budget across 103 available stocks to maximize the total expected return. This allocation must adhere to specific constraint

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mI now know the final answer
Final Answer: 
## Mathematical Model

**Parameters:**

Let $n = 77$ (number of stocks, indexed $i=1,\ldots,77$).

Let $S$ be the set of all sectors present in the data (here, $S = \{1,2,3,4,5,6,7,8,9,10\}$).

For each stock $i$:
- $r_i$: expected return (%) of stock $i$
- $m_i$: maximum investment allowed in stock $i$ (from data)
- $s_i$: sector of stock $i$

Let $B = 1{,}000{,}000$ (total budget).

Let $b = 0.27 \times B = 270{,}000$ (sector allocation cap).

**Decision Variables:**
- $x_i$: amount invested in stock $i$ ($0 \leq x_i \leq m_i$)
- $y_i \in \{0,1\}$: 1 if stock $i$ is selected, 0 otherwise

---

**Objective:**

Maximize total expected return:
$$
\max \sum_{i=1}^{77} r_i \frac{x_i}{100}
$$

---

**Subject to:**

1. **Budget Constraint:**
$$
\sum_{i=1}^{77} x_i = 1{,}000{,}000
$$

2. **Stock Selection Cardinality:**
$$
3 \leq \sum_{i=1}^{77} y_i \leq 8
$$

3. **Linking Investment to Selection:**
$$
0 \leq x_i \leq m_i y_i \qquad \fo

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mI now know the final answer
Final Answer: 

### Decision Variables

Let $x_{ij}$ denote the number of exposures of product $i$ $(i=1,\ldots,10)$ to user group $j$ $(j=1,\ldots,12)$.

---

### 1. Conversion Rate Matrix $\mathbf{C} = [c_{ij}]$ (probability of conversion per exposure)
$$
\mathbf{C} =
\begin{bmatrix}
0.167 & 0.147 & 0.175 & 0.092 & 0.073 & 0.191 & 0.142 & 0.145 & 0.147 & 0.118 & 0.142 & 0.121 \\
0.174 & 0.141 & 0.189 & 0.169 & 0.066 & 0.127 & 0.175 & 0.111 & 0.152 & 0.195 & 0.199 & 0.093 \\
0.151 & 0.066 & 0.151 & 0.124 & 0.149 & 0.133 & 0.142 & 0.198 & 0.060 & 0.192 & 0.137 & 0.195 \\
0.192 & 0.142 & 0.166 & 0.054 & 0.172 & 0.145 & 0.085 & 0.170 & 0.079 & 0.197 & 0.096 & 0.081 \\
0.170 & 0.103 & 0.068 & 0.125 & 0.098 & 0.093 & 0.159 & 0.187 & 0.200 & 0.130 & 0.161 & 0.085 \\
0.123 & 0.071 & 0.062 & 0.126 & 0.183 & 0.055 & 0.052 & 0.159 & 0.170 & 0.123 & 0.129 & 0.105 \\
0.198 & 0.154 & 0.097 & 0.137 & 0.191 & 0.187 & 0.093 & 0.059 & 0.123 & 0.104 & 0.130 & 0.

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mThought: I need the actual per-bike service times (in minutes per bike) for check-out and check-in at each Zone-B station, not the operating hours. Since the CSV provides only the time windows, I will assume the per-bike service time for check-out at station i is s_i^out and for check-in is s_i^in (to be filled with actual values if available). The available time per station per day is 1,224 minutes. The decision variable x_i is the number of bikes processed at station i. The total service time used at station i is x_i × (s_i^out + s_i^in). The idle time at station i is 1,224 - x_i × (s_i^out + s_i^in), and the objective is to minimize the sum of idle times across all Zone-B stations, subject to non-negativity and time constraints.

Final Answer:
## Mathematical Model

**Minimize**

$$
\sum_{i \in \text{Zone-B}} \left[ 1224 - x_i \cdot (s_i^{\text{out}} + s_i^{\text{in}}) \right]
$$

**subject to**

$$
x_i \cdot (s_i^{\text{out}} + s_i^{\text{in}}) \leq 1224 \quad \forall 

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: A company operates two factories (Factory A and Factory B) which produce a set of products labeled P1, P2, … as listed in the data files. Each product requires three types of resources: labor hours, machine hours, and raw material units. The company must decide how many units of each product to produce at each factory next week to maximize profit.

For each product Pk in the dataset, define two nonnegative decision variables: the quantity produced at Factory A (x_A_Pk) and the quantity produced at Factory B (x_B_Pk). These variables represent units of product and are treated as continuous, nonnegative real numbers by default.

The resource requirements (per unit of product) for each product at each factory are given in Resource_Requirements.csv. The profit earned (in $) per unit of product at each factory is given in Profit_per_Unit.csv.

Each factory has a limited weekly availability of each resource, as follows:



                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mThought: I now know the final answer
Final Answer: 
##### Objective Function: 

$\quad \quad \max \left( \sum_{k=1}^{62} p_{A,k} \cdot x_{A,P_k} + \sum_{k=1}^{62} p_{B,k} \cdot x_{B,P_k} \right)$

##### Constraints

###### 1. Factory A Resource Constraints: 

$\quad \quad \sum_{k=1}^{62} l_{A,k} \cdot x_{A,P_k} \leq 100$

$\quad \quad \sum_{k=1}^{62} m_{A,k} \cdot x_{A,P_k} \leq 90$

$\quad \quad \sum_{k=1}^{62} r_{A,k} \cdot x_{A,P_k} \leq 80$

###### 2. Factory B Resource Constraints: 

$\quad \quad \sum_{k=1}^{62} l_{B,k} \cdot x_{B,P_k} \leq 80$

$\quad \quad \sum_{k=1}^{62} m_{B,k} \cdot x_{B,P_k} \leq 100$

$\quad \quad \sum_{k=1}^{62} r_{B,k} \cdot x_{B,P_k} \leq 90$

###### 3. Non-negativity Constraints: 

$\quad \quad x_{A,P_k} \geq 0, \quad \forall k$

$\quad \quad x_{B,P_k} \geq 0, \quad \forall k$

###### Retrieved Information

For $k=1$ to $62$:

- $l_{A,k}$ = Labor hours per unit for product $P_k$ at Factory A
- $m_{A,k}$ = Machine hours per unit for product 

In [None]:
output_model_nrm1, output_code_nrm1,classification = run_test(testnrm,classification_agent)
output_df = pd.DataFrame({'Query': testnrm[15:]['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1,'classification':classification})
output_df.to_csv("/Users/cora/Documents/GitHub/lean-llm-opt/final_30_answer.csv", index=False)

In [None]:
q

NameError: name 'q' is not defined

In [None]:
testnrm = pd.read_csv('/Users/cora/Documents/GitHub/lean-llm-opt/othersV5.csv')
# test_1 = testnrm[:15]
# test_2 = testnrm[15:30]
# test_3 = testnrm[30:]


In [None]:
output_model_nrm1, output_code_nrm1,classification = run_test(testnrm,classification_agent)
output_df = pd.DataFrame({'Query': testnrm['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1,'classification':classification})
output_df.to_csv("others_total_V2.csv", index=False)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type of the text? text:A consumer electronics factory operates an assembly line with three workstations producing two premium radio models: HiFi-1 and HiFi-2. The assembly times per unit at each station are shown in workstation_times.csv. ..."
[0m
Observation: [36;1m[1;3m{'query': 'What is the problem type of the text? text:A consumer electronics factory operates an assembly line with three workstations producing two premium radio models: HiFi-1 and HiFi-2. The assembly times per unit at each station are shown in workstation_times.csv. ..."\n', 'result': 'The problem type is Others with CSV.', 'source_documents': [Document(id='97568ac3-827a-4ea8-80e7-f5348e96b7fc', metadata={'source': 'Large_Scal

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Decision Variables**

- $x_1$: Number of HiFi-1 radios produced per day
- $x_2$: Number of HiFi-2 radios produced per day
- $I_1$: Idle time (minutes) at Workstation 1 per day
- $I_2$: Idle time (minutes) at Workstation 2 per day
- $I_3$: Idle time (minutes) at Workstation 3 per day

---

**Minimize**

$$
I_1 + I_2 + I_3
$$

**subject to**

_Workstation time constraints (including maintenance):_

$$
6x_1 + 4x_2 + 0.10 \times 480 + I_1 = 480 \\
\quad (\text{Workstation 1: 6 min/HiFi-1, 4 min/HiFi-2, 10% maintenance})
$$

$$
5x_1 + 5x_2 + 0.14 \times 480 + I_2 = 480 \\
\quad (\text{Workstation 2: 5 min/HiFi-1, 5 min/HiFi-2, 14% maintenance})
$$

$$
4x_1 + 6x_2 + 0.12 \times 480 + I_3 = 480 \\
\quad (\text{Workstation 3: 4 min/HiFi-1, 6 min/HiFi-2, 12% maintenance})
$$

_Non-negativity:_

$$
x_1 \geq 0, \quad x_2 \geq 0 \\
I_1 \geq 0, \quad I_2 \geq 0, \quad I_3 \geq 0
$$

---

**where**

- $x_1, x_2$ are integers (if only whole radios c

KeyboardInterrupt: 

In [None]:
q

NameError: name 'q' is not defined

In [None]:
output_model_nrm1, output_code_nrm1,classification = run_test(test_2,classification_agent)
output_df = pd.DataFrame({'Query': test_1['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1,'classification':classification})
output_df.to_csv("others_2.csv", index=False)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type of the text? text:A company is planning a digital marketing campaign for 5 products (P1, P2, P3, P4, P5) targeted to 7 distinct user groups (U1, U2, U3, U4, U5, U6, U7). Given the following data (provided in CSV files): ..."
[0m
Observation: [36;1m[1;3m{'query': 'What is the problem type of the text? text:A company is planning a digital marketing campaign for 5 products (P1, P2, P3, P4, P5) targeted to 7 distinct user groups (U1, U2, U3, U4, U5, U6, U7). Given the following data (provided in CSV files): ..."\n', 'result': 'The problem type is not specified in the provided text.', 'source_documents': [Document(id='28a55979-cd82-4d01-9f2a-62d9ea9e5054', metadata={'source': 'Large_Scale_Or_File

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: the input question you must answer
Thought: I need to formulate the objective function and constraints of the integer programming model based on the user's description and the provided data. I should retrieve all relevant data from the CSV files: ConversionRate.csv, CostperExposure.csv, BrandFitScore.csv, ExposureCaps(per User Group).csv, and ProductMinExposures.csv. I must ensure that the model includes: (1) the objective to maximize expected conversions, (2) budget constraint, (3) per-pair exposure caps, (4) per-product minimum exposures, (5) per-user group total exposure caps, (6) average brand fit constraint, and (7) integrality of exposures. I should not simplify or abbreviate any expressions, and all mathematical expressions should be wrapped in $$. I should only present the final answer in the required format.
Action: CSVQA
Action Input: Retrieve all data from ConversionRate.csv, CostperExposure.csv, BrandFit

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mThought: I now know the final answer
Final Answer: 
##### Objective Function:

$\quad\quad \max \sum_{i=1}^{5} \sum_{j=1}^{7} r_{ij} \cdot x_{ij}$

##### Constraints

###### 1. Budget Constraint:

$\quad\quad \sum_{i=1}^{5} \sum_{j=1}^{7} c_{ij} \cdot x_{ij} \leq 2100$

###### 2. Exposure Cap Constraints (per Product-User Group Pair):

$\quad\quad x_{ij} \leq u_{ij}, \quad \forall i=1,\ldots,5;\ j=1,\ldots,7$

###### 3. Product Minimum Exposure Constraints:

$\quad\quad \sum_{j=1}^{7} x_{ij} \geq m_i, \quad \forall i=1,\ldots,5$

###### 4. User Group Total Exposure Cap Constraints:

$\quad\quad \sum_{i=1}^{5} x_{ij} \leq t_j, \quad \forall j=1,\ldots,7$

###### 5. Average Brand Fit Constraint:

$\quad\quad \frac{\sum_{i=1}^{5} \sum_{j=1}^{7} b_{ij} \cdot x_{ij}}{\sum_{i=1}^{5} \sum_{j=1}^{7} x_{ij}} \geq 0.75$

###### 6. Integrality Constraints:

$\quad\quad x_{ij} \in \mathbb{Z}_{\geq 0}, \quad \forall i=1,\ldots,5;\ j=1,\ldots,7$

##### Retrieved Information

$r_{ij}$ (C

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Maximize**

$$
175\, x_1 + 300\, x_2 + 120\, x_3
+ 0\, x_4 + 0\, x_5
- 400\, y_1 - 3\, y_2
+ 1.8\, z_1 + 2.1\, z_2
$$

**subject to**

$$
x_1 + x_2 + x_3 + 1.5\, y_1 \leq 100
\quad (\text{land constraint, hectares})
$$

$$
400\, y_1 + 3\, y_2 \leq 15{,}000
\quad (\text{capital constraint, yuan})
$$

$$
20\, x_1 + 35\, x_2 + 10\, x_3 + 100\, y_1 + 0.6\, y_2 + z_1 = 3{,}500
\quad (\text{autumn/winter labor, person-days})
$$

$$
50\, x_1 + 75\, x_2 + 40\, x_3 + 50\, y_1 + 0.3\, y_2 + z_2 = 4{,}000
\quad (\text{spring/summer labor, person-days})
$$

$$
y_1 \leq 32
\quad (\text{cow infrastructure limit})
$$

$$
y_2 \leq 3{,}000
\quad (\text{chicken infrastructure limit})
$$

$$
x_1, x_2, x_3 \geq 0
\quad (\text{soybeans, corn, wheat area in hectares})
$$

$$
y_1, y_2 \geq 0
\quad (\text{number of cows, chickens})
$$

$$
z_1, z_2 \geq 0
\quad (\text{external labor in autumn/winter, spring/summer})
$$[0m

[1m> Finished chain.[0m


[1m> E

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: A convenience store plans to open several chain stores in a new residential area in the northwest suburbs of the city. To facilitate shopping, the distance from any residential area to one of the chain stores should not exceed $800 \mathrm{~m}$. 5.csv gives the newly built residential areas and the various areas within a radius of $800 \mathrm{~m}$ of each residential area. How many chain stores should the supermarket build in the above-mentioned areas and in which areas should they be built?
Thought: I need to formulate a set covering model to minimize the number of chain stores, ensuring that each residential area is within 800 meters of at least one store. I need to retrieve all the residential areas and their coverage relationships from 5.csv to construct the model. Each area can potentially be a store location, and the coverage matrix will indicate which areas are covered by a store at each location.
Action: CS

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer: 
##### Objective Function:

$\quad \quad \min \sum_{j=1}^{4} x_j$

##### Constraints

###### 1. Coverage Constraints:

$\quad \quad x_1 + x_2 \geq 1$  (for residential area A)

$\quad \quad x_1 + x_3 \geq 1$  (for residential area B)

$\quad \quad x_2 + x_3 + x_4 \geq 1$  (for residential area C)

$\quad \quad x_3 + x_4 \geq 1$  (for residential area D)

$\quad \quad x_4 \geq 1$  (for residential area E)

###### 2. Variable Constraints:

$ x_j \in \{0, 1\}, \quad j = 1,2,3,4 $

###### Retrieved Information

Residential areas: $I = \{\text{A}, \text{B}, \text{C}, \text{D}, \text{E}\}$

Possible facility locations (chain store sites): $J = \{1, 2, 3, 4\}$

Coverage matrix $A = [a_{ij}]$ (rows: A, B, C, D, E; columns: 1, 2, 3, 4):

$
A =
\begin{bmatrix}
1 & 1 & 0 & 0 \\
1 & 0 & 1 & 0 \\
0 & 1 & 1 & 1 \\
0 & 0 & 1 & 1 \\
0 & 0 & 0 & 1 \\
\end{bmatrix}
$

Each $a_{ij} = 1$ if facility $j$ covers residential area $i$ (i.e., is within 800~m), $0$ otherwise. The mode

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Minimize**

$$
1000\, y_A + 20\, x_A
+ 920\, y_B + 24\, x_B
+ 800\, y_C + 16\, x_C
+ 700\, y_D + 28\, x_D
$$

**subject to**

$$
x_A + x_B + x_C + x_D = 2000
\quad (\text{total production requirement})
$$

$$
x_A \leq 900\, y_A
\quad (\text{device A capacity and activation})
$$

$$
x_B \leq 1000\, y_B
\quad (\text{device B capacity and activation})
$$

$$
x_C \leq 1200\, y_C
\quad (\text{device C capacity and activation})
$$

$$
x_D \leq 1600\, y_D
\quad (\text{device D capacity and activation})
$$

$$
y_A + y_B + y_C + y_D = 1
\quad (\text{only one device can be activated})
$$

**where**

$$
x_A, x_B, x_C, x_D \geq 0
\quad (\text{integer, units produced on each device})
$$

$$
y_A, y_B, y_C, y_D \in \{0,1\}
\quad (\text{binary, activation of each device})
$$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descript

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Maximize**

$$
3\, x_1 + 2\, x_2 + 2.9\, x_3
$$

**subject to**

$$
8\, x_1 + 2\, x_2 + 10\, x_3 \leq 300
\quad (\text{Device A capacity})
$$

$$
10\, x_1 + 5\, x_2 + 8\, x_3 \leq 400
\quad (\text{Device B capacity})
$$

$$
2\, x_1 + 13\, x_2 + 10\, x_3 \leq 420
\quad (\text{Device C capacity})
$$

$$
x_1 \geq 0
\quad (\text{production amount of product I})
$$

$$
x_2 \geq 0
\quad (\text{production amount of product II})
$$

$$
x_3 \geq 0
\quad (\text{production amount of product III})
$$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant information.

Action: FileQA

Action Input: "What is the problem type of the text? text:The number of drivers and crew members required for each time period of a certain day and night service bus 

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Minimize**

$$
x_1 + x_2 + x_3 + x_4 + x_5 + x_6
$$

**subject to**

$$
x_1 + x_6 + x_5 + x_4 \geq 60 \quad (\text{covers Shift 1: 6:00 ~ 10:00, Number Required = 60})
$$

$$
x_2 + x_1 + x_6 + x_5 \geq 70 \quad (\text{covers Shift 2: 10:00 ~ 14:00, Number Required = 70})
$$

$$
x_3 + x_2 + x_1 + x_6 \geq 60 \quad (\text{covers Shift 3: 14:00 ~ 18:00, Number Required = 60})
$$

$$
x_4 + x_3 + x_2 + x_1 \geq 50 \quad (\text{covers Shift 4: 18:00 ~ 22:00, Number Required = 50})
$$

$$
x_5 + x_4 + x_3 + x_2 \geq 20 \quad (\text{covers Shift 5: 22:00 ~ 2:00, Number Required = 20})
$$

$$
x_6 + x_5 + x_4 + x_3 \geq 30 \quad (\text{covers Shift 6: 2:00 ~ 6:00, Number Required = 30})
$$

**where**

$$
x_1, x_2, x_3, x_4, x_5, x_6 \geq 0 \quad (\text{integer, number of drivers and crew members starting at each shift})
$$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: Now we need to determine 4 out of 5 workers to each complete one of the four tasks. Since each worker has different skill sets, the amount of time required for each worker to complete each task is also different. The time required for each worker to complete each task is shown in 15.csv.

Try to find a work assignment plan that minimizes the total working hours.
Thought: I need to retrieve all the data from 15.csv to formulate the mathematical model. Since there are 5 workers and 4 tasks, I need to select 4 workers and assign each to a unique task, minimizing the total time. I need to define variables for assignment and selection, and ensure that only 4 workers are chosen, each assigned to exactly one task, and each task is assigned to exactly one worker.
Action: CSVQA
Action Input: Retrieve all the data from 15.csv to formulate the mathematical model with no simplification or abbreviation.[0m
Observation: [36;1m

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mI now know the final answer
Final Answer: 
##### Objective Function: 

$\quad \quad \min \sum_{i=1}^{5} \sum_{j=1}^{4} c_{ij} x_{ij} $

##### Constraints

###### 1. Assignment Constraints: 

$\sum_{j=1}^{4} x_{ij} \leq 1 \quad \forall i \in \{1,2,3,4,5\} $

$\sum_{i=1}^{5} x_{ij} = 1 \quad \forall j \in \{1,2,3,4\}$

###### 2. Worker Selection Constraints:

$\sum_{i=1}^{5} y_i = 4$

$x_{ij} \leq y_i \quad \forall i \in \{1,2,3,4,5\}, \forall j \in \{1,2,3,4\}$

###### 3. Variable Constraints: 

$\quad \quad x_{ij} \in \{0,1\} \quad \forall i \in \{1,2,3,4,5\}, \forall j \in \{1,2,3,4\}$

$\quad \quad y_i \in \{0,1\} \quad \forall i \in \{1,2,3,4,5\}$

###### Retrieved Information

$c = \begin{bmatrix}
9 & 4 & 3 & 7 \\
4 & 6 & 5 & 6 \\
5 & 4 & 7 & 5 \\
7 & 5 & 2 & 3 \\
10 & 6 & 7 & 4 \\
\end{bmatrix}$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descr

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Minimize**

$$
C_{\max}
$$

**subject to**

$$
\sum_{j=1}^{3} x_{i,j} = 1 \quad (\text{each task assigned to exactly one CPU}), \quad \forall i \in \{1,2,3,4,5,6,7\}
$$

$$
C_j = \sum_{i=1}^{7} \frac{b_i}{f_j} x_{i,j} \quad (\text{completion time of CPU } j), \quad \forall j \in \{1,2,3\}
$$

$$
C_{\max} \geq C_j \quad (\text{makespan at least as large as any CPU's completion time}), \quad \forall j \in \{1,2,3\}
$$

$$
x_{i,j} \in \{0,1\} \quad (\text{binary assignment variable}), \quad \forall i \in \{1,2,3,4,5,6,7\},\; j \in \{1,2,3\}
$$

$$
C_{\max} \geq 0
$$

**where**

- $b_1 = 1.1$, $b_2 = 2.1$, $b_3 = 3$, $b_4 = 1$, $b_5 = 0.7$, $b_6 = 5$, $b_7 = 3$ (number of instructions in BI for each task)
- $f_1 = 1.33$, $f_2 = 2$, $f_3 = 2.66$ (CPU frequencies in GHz)
- $x_{i,j}$ is 1 if task $i$ is assigned to CPU $j$, 0 otherwise
- $C_j$ is the total processing time on CPU $j$
- $C_{\max}$ is the makespan (completion time of the last t

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Minimize**

$$
\sum_{i=1}^{7} \sum_{j=1}^{7} d_{ij} \, x_{ij}
$$

**subject to**

$$
\sum_{j=1,\, j \neq i}^{7} x_{ij} = 1 \quad \forall\, i = 1, \ldots, 7 \quad (\text{leave each city once})
$$

$$
\sum_{i=1,\, i \neq j}^{7} x_{ij} = 1 \quad \forall\, j = 1, \ldots, 7 \quad (\text{enter each city once})
$$

$$
u_1 = 1
$$

$$
2 \leq u_i \leq 7 \quad \forall\, i = 2, \ldots, 7 \quad (\text{MTZ variables})
$$

$$
u_i - u_j + 7\, x_{ij} \leq 6 \quad \forall\, i = 2, \ldots, 7;\; j = 2, \ldots, 7;\; i \neq j \quad (\text{subtour elimination})
$$

$$
x_{ij} \in \{0,1\} \quad \forall\, i, j = 1, \ldots, 7;\; i \neq j
$$

**where**

- \( x_{ij} = 1 \) if the route goes from city \( i \) to city \( j \), 0 otherwise.
- \( u_i \) are auxiliary variables for subtour elimination (Miller-Tucker-Zemlin formulation).
- \( d_{ij} \) is the distance from city \( i \) to city \( j \), given by:

\[
\begin{array}{c|ccccccc}
d_{ij} & 1 & 2 & 3 & 4 & 5 &

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Maximize**

$$
Z = 5.0 \cdot S_{\text{Yellow}} + 5.5 \cdot S_{\text{Red}} + 4.8 \cdot S_{\text{Blue}}
- 6.0 \cdot (x_{\text{Yellow},\text{I}} + x_{\text{Red},\text{I}} + x_{\text{Blue},\text{I}})
- 4.5 \cdot (x_{\text{Yellow},\text{II}} + x_{\text{Red},\text{II}} + x_{\text{Blue},\text{II}})
- 3.0 \cdot (x_{\text{Yellow},\text{III}} + x_{\text{Red},\text{III}} + x_{\text{Blue},\text{III}})
$$

**subject to**

$$
x_{\text{Yellow},\text{I}} + x_{\text{Red},\text{I}} + x_{\text{Blue},\text{I}} \leq 1500
\quad (\text{Grade I daily supply})
$$

$$
x_{\text{Yellow},\text{II}} + x_{\text{Red},\text{II}} + x_{\text{Blue},\text{II}} \leq 2000
\quad (\text{Grade II daily supply})
$$

$$
x_{\text{Yellow},\text{III}} + x_{\text{Red},\text{III}} + x_{\text{Blue},\text{III}} \leq 1000
\quad (\text{Grade III daily supply})
$$

$$
S_{\text{Yellow}} = x_{\text{Yellow},\text{I}} + x_{\text{Yellow},\text{II}} + x_{\text{Yellow},\text{III}}
$$

$$
S_{\te

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Maximize**

$$
z
$$

**subject to**

$$
z \leq x_{A1} + x_{B1} + x_{C1} + x_{D1} \\
z \leq x_{A2} + x_{B2} + x_{C2} + x_{D2} \\
z \leq x_{A3} + x_{B3} + x_{C3} + x_{D3}
$$

$$
\frac{x_{C1}}{20} + \frac{x_{C2}}{5} + \frac{x_{C3}}{10} \leq 80 \quad (\text{Workshop C hours})
$$

$$
\frac{x_{D1}}{10} + \frac{x_{D2}}{15} + \frac{x_{D3}}{20} \leq 200 \quad (\text{Workshop D hours})
$$

$$
\frac{x_{B1}}{15} + \frac{x_{B2}}{10} + \frac{x_{B3}}{5} \leq 150 \quad (\text{Workshop B hours})
$$

$$
\frac{x_{A1}}{10} + \frac{x_{A2}}{15} + \frac{x_{A3}}{5} \leq 100 \quad (\text{Workshop A hours})
$$

$$
x_{ij} \geq 0 \quad \forall i \in \{A, B, C, D\},\ j \in \{1, 2, 3\}
$$

**where**

$$
z \geq 0 \quad (\text{number of completed products})
$$

$$
x_{ij} \geq 0 \quad (\text{units of component } j \text{ produced in workshop } i)
$$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determin

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mI now know the final answer
Final Answer: 
## Mathematical Model

**Maximize**

$$
(124 - 73.3)\, x_1 - 170{,}000\, y_1
+ (109 - 52.9)\, x_2 - 150{,}000\, y_2
+ (115 - 65.4)\, x_3 - 100{,}000\, y_3
$$

**subject to**

$$
x_1 \leq 5300 
\quad (\text{product 1 demand})
$$

$$
x_2 \leq 4500 
\quad (\text{product 2 demand})
$$

$$
x_3 \leq 5400 
\quad (\text{product 3 demand})
$$

$$
x_1 \leq 22 \times 500 
\quad (\text{product 1 monthly quota})
$$

$$
x_2 \leq 22 \times 450 
\quad (\text{product 2 monthly quota})
$$

$$
x_3 \leq 22 \times 550 
\quad (\text{product 3 monthly quota})
$$

$$
x_1 \geq 20\, y_1 
\quad (\text{minimum batch for product 1})
$$

$$
x_2 \geq 20\, y_2 
\quad (\text{minimum batch for product 2})
$$

$$
x_3 \geq 16\, y_3 
\quad (\text{minimum batch for product 3})
$$

$$
x_1 \leq 5300\, y_1 
\quad (\text{linking product 1 activation})
$$

$$
x_2 \leq 4500\, y_2 
\quad (\text{linking product 2 activation})
$$

$$
x_3 \leq 5400\, y_3 
\quad (\text{linking p

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mI now know the final answer
Final Answer:
## Mathematical Model

**Minimize**

$$
Z = 28\, y_{1,1} + 28\, y_{2,1} + 28\, y_{3,1} + 28\, y_{4,1}
+ 45\, y_{1,2} + 45\, y_{2,2} + 45\, y_{3,2}
+ 60\, y_{1,3} + 60\, y_{2,3}
+ 73\, y_{1,4}
$$

**subject to**

$$
y_{1,1} + y_{1,2} + y_{1,3} + y_{1,4} \geq 1500
\quad (\text{warehouse area requirement for month 1})
$$

$$
y_{1,2} + y_{1,3} + y_{1,4} + y_{2,1} + y_{2,2} + y_{2,3} \geq 1000
\quad (\text{warehouse area requirement for month 2})
$$

$$
y_{1,3} + y_{1,4} + y_{2,2} + y_{2,3} + y_{3,1} + y_{3,2} \geq 2000
\quad (\text{warehouse area requirement for month 3})
$$

$$
y_{1,4} + y_{2,3} + y_{3,2} + y_{4,1} \geq 1200
\quad (\text{warehouse area requirement for month 4})
$$

$$
y_{1,1} \geq 0,\; y_{1,2} \geq 0,\; y_{1,3} \geq 0,\; y_{1,4} \geq 0
$$

$$
y_{2,1} \geq 0,\; y_{2,2} \geq 0,\; y_{2,3} \geq 0
$$

$$
y_{3,1} \geq 0,\; y_{3,2} \geq 0
$$

$$
y_{4,1} \geq 0
$$

**where**

$$
y_{k,j} \geq 0 \quad (\text{area in m}^2 \text{

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Maximize**

$$
(120\, x_1 - 60\, x_1) + (80\, x_2 - 40\, x_2) + (180\, x_3 - 80\, x_3) - 4500
$$

**subject to**

$$
3\, x_1 + 2\, x_2 + 6\, x_3 \leq 150 
\quad (\text{labor capacity})
$$

$$
4\, x_1 + 3\, x_2 + 6\, x_3 \leq 160 
\quad (\text{material capacity})
$$

$$
x_1 \geq 0 
\quad (\text{integer, number of shirts produced})
$$

$$
x_2 \geq 0 
\quad (\text{integer, number of short-sleeved shirts produced})
$$

$$
x_3 \geq 0 
\quad (\text{integer, number of casual wear produced})
$$

**where**

$$
x_1 = \text{number of shirts produced per week}
$$

$$
x_2 = \text{number of short-sleeved shirts produced per week}
$$

$$
x_3 = \text{number of casual wear produced per week}
$$[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine the problem type of the text. The Query contains descriptions like '.csv' or 'column'. I'll use the FileQA tool to retrieve the relevant inf

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.


[32;1m[1;3mFinal Answer:
## Mathematical Model

**Minimize**

$$
x_1 + x_2 + x_3 + x_4 + x_5 + x_6
$$

**subject to**

$$
x_1 + x_2 \geq 60 \quad (\text{6:00-10:00 requirement})
$$

$$
x_1 + x_2 + x_3 \geq 70 \quad (\text{10:00-14:00 requirement})
$$

$$
x_2 + x_3 + x_4 \geq 60 \quad (\text{14:00-18:00 requirement})
$$

$$
x_3 + x_4 + x_5 \geq 50 \quad (\text{18:00-22:00 requirement})
$$

$$
x_4 + x_5 + x_6 \geq 20 \quad (\text{22:00-2:00 requirement})
$$

$$
x_5 + x_6 \geq 30 \quad (\text{2:00-6:00 requirement})
$$

**where**

$$
x_1 \geq 0 \quad (\text{integer, number of staff starting at 6:00})
$$

$$
x_2 \geq 0 \quad (\text{integer, number of staff starting at 10:00})
$$

$$
x_3 \geq 0 \quad (\text{integer, number of staff starting at 14:00})
$$

$$
x_4 \geq 0 \quad (\text{integer, number of staff starting at 18:00})
$$

$$
x_5 \geq 0 \quad (\text{integer, number of staff starting at 22:00})
$$

$$
x_6 \geq 0 \quad (\text{integer, number of staff starting at 2:00})
$$[0m

[1m> 

In [None]:
test_3_new = test_3[:-3]

In [None]:
# output_model_nrm1, output_code_nrm1,classification = run_test(test_3_new,classification_agent)
output_df = pd.DataFrame({'Query': test_3_new['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1,'classification':classification})
output_df.to_csv("others_3.csv", index=False)

In [None]:
q

NameError: name 'q' is not defined

### Test NRM

In [None]:
testnrm = pd.read_csv('Test_Dataset/Large-scale-or/final_large_scale_OR_New.csv')
testnrm_1 = testnrm[9:18]
testnrm_2 = testnrm[18:26]
testnrm_3 = testnrm[26:34]

In [None]:
output_model_nrm1, output_code_nrm1 = run_test(testnrm_1,classification_agent)
output_df = pd.DataFrame({'Query': testnrm_1['Query'], 'model_output':output_model_nrm1, 'code_output':output_code_nrm1})
output_df.to_csv("nrm1.csv", index=False)

KeyboardInterrupt: 

In [None]:
output_model_nrm2, output_code_nrm2 = run_test(testnrm_2,classification_agent)
output_df = pd.DataFrame({'Query': testnrm_2['Query'], 'model_output':output_model_nrm2, 'code_output':output_code_nrm2})
output_df.to_csv("nrm2.csv", index=False)

In [None]:
output_model_nrm3, output_code_nrm3 = run_test(testnrm_3,classification_agent)
output_df = pd.DataFrame({'Query': testnrm_3['Query'], 'model_output':output_model_nrm3, 'code_output':output_code_nrm3})
output_df.to_csv("nrm3.csv", index=False)

In [None]:
file_order=[
    "nrm1.csv",
    "nrm2.csv",
    "nrm3.csv"
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)

print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "nrm_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test RA

In [None]:
testra = pd.read_csv('Test_Dataset/Large-scale-or/final_large_scale_OR_New.csv')
testra_1 = testra[34:42]
testra_2 = testra[42:50]
testra_3 = testra[50:57]

In [None]:
output_model_ra1, output_code_ra1 = run_test(testra_1,classification_agent)
output_df = pd.DataFrame({'Query': testra_1['Query'], 'model_output':output_model_ra1, 'code_output':output_code_ra1})
output_df.to_csv("ra1.csv", index=False)

In [None]:
output_model_ra2, output_code_ra2 = run_test(testra_2,classification_agent)
output_df = pd.DataFrame({'Query': testra_2['Query'], 'model_output':output_model_ra2, 'code_output':output_code_ra2})
output_df.to_csv("ra2.csv", index=False)

In [None]:
output_model_ra3, output_code_ra3 = run_test(testra_3,classification_agent)
output_df = pd.DataFrame({'Query': testra_3['Query'], 'model_output':output_model_ra3, 'code_output':output_code_ra3})
output_df.to_csv("ra3.csv", index=False)

In [None]:
file_order=[
    "ra1.csv",
    "ra2.csv",
    "ra3.csv"
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "ra_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test TP

In [None]:
testtp = pd.read_csv('Test_Dataset/Large-scale-or/final_large_scale_OR_New.csv')
testtp = testtp[:9]
testtp

In [None]:
output_model_tp, output_code_tp = run_test(testtp,classification_agent)
output_df = pd.DataFrame({'Query': testtp['Query'], 'model_output':output_model_tp, 'code_output':output_code_tp})
output_df.to_csv("tp.csv", index=False)

In [None]:
file_order=[
    "tp.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "tp_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test AP

In [None]:
testap = pd.read_csv('Test_Dataset/Large-scale-or/final_large_scale_OR_New.csv')
testap = testap[66:]
testap

In [None]:
output_model_ap, output_code_ap = run_test(testap,classification_agent)
output_df = pd.DataFrame({'Query': testap['Query'], 'model_output':output_model_ap, 'code_output':output_code_ap})
output_df.to_csv("ap.csv", index=False)

In [None]:
file_order=[
    "ap.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "ap_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test FLP

In [None]:
testflp = pd.read_csv('Test_Dataset/Large-scale-or/final_large_scale_OR_New.csv')
testflp = testflp[57:66]
testflp

In [None]:
output_model_flp, output_code_flp = run_test(testflp,classification_agent)
output_df = pd.DataFrame({'Query': testflp['Query'], 'model_output':output_model_flp, 'code_output':output_code_flp})
output_df.to_csv("flp.csv", index=False)

In [None]:
file_order=[
    "flp.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "flp_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

## Test Small-Scale Datasets

### Test NL4OPT

In [None]:
test_nl4opt = pd.read_csv('Test_Dataset/Small-scale/NL4OPT.csv')
test_nl4opt1=test_nl4opt[:30]
test_nl4opt2=test_nl4opt[30:60]
test_nl4opt3=test_nl4opt[60:90]
test_nl4opt4=test_nl4opt[90:120]
test_nl4opt5=test_nl4opt[120:150]
test_nl4opt6=test_nl4opt[150:180]
test_nl4opt7=test_nl4opt[180:210]
test_nl4opt8=test_nl4opt[210:]

In [None]:
output_model_nl4opt1, output_code_nl4opt1 = run_test(test_nl4opt1,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt1['Query'], 'model_output':output_model_nl4opt1, 'code_output':output_code_nl4opt1})
output_df.to_csv("NL4OPT_1-30.csv", index=False)

In [None]:
output_model_nl4opt2, output_code_nl4opt2 = run_test(test_nl4opt2,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt2['Query'], 'model_output':output_model_nl4opt2, 'code_output':output_code_nl4opt2})
output_df.to_csv("NL4OPT_31-60.csv", index=False)

In [None]:
output_model_nl4opt3, output_code_nl4opt3 = run_test(test_nl4opt3,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt3['Query'], 'model_output':output_model_nl4opt3, 'code_output':output_code_nl4opt3})
output_df.to_csv("NL4OPT_61-90.csv", index=False)

In [None]:
output_model_nl4opt4, output_code_nl4opt4 = run_test(test_nl4opt4,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt4['Query'], 'model_output':output_model_nl4opt4, 'code_output':output_code_nl4opt4})
output_df.to_csv("NL4OPT_91-120.csv", index=False)

In [None]:
output_model_nl4opt5, output_code_nl4opt5 = run_test(test_nl4opt5,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt5['Query'], 'model_output':output_model_nl4opt5, 'code_output':output_code_nl4opt5})
output_df.to_csv("NL4OPT_121-150.csv", index=False)

In [None]:
output_model_nl4opt6, output_code_nl4opt6 = run_test(test_nl4opt6,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt6['Query'], 'model_output':output_model_nl4opt6, 'code_output':output_code_nl4opt6})
output_df.to_csv("NL4OPT_151-180.csv", index=False)

In [None]:
output_model_nl4opt7, output_code_nl4opt7 = run_test(test_nl4opt7,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt7['Query'], 'model_output':output_model_nl4opt7, 'code_output':output_code_nl4opt7})
output_df.to_csv("NL4OPT_181-210.csv", index=False)

In [None]:
output_model_nl4opt8, output_code_nl4opt8 = run_test(test_nl4opt8,classification_agent)
output_df = pd.DataFrame({'Query': test_nl4opt8['Query'], 'model_output':output_model_nl4opt8, 'code_output':output_code_nl4opt8})
output_df.to_csv("NL4OPT_211-245.csv", index=False)

In [None]:
file_order=[
    "NL4OPT_1-30.csv",
    "NL4OPT_31-60.csv",
    "NL4OPT_61-90.csv",
    "NL4OPT_91-120.csv",
    "NL4OPT_121-150.csv",
    "NL4OPT_151-180.csv",
    "NL4OPT_181-210.csv",
    "NL4OPT_211-245.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

# Add results column
combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

# Step 3: Save results
output_file = "NL4OPT_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")

### Test IndustryOR

In [None]:
test_industryOR = pd.read_csv('Test_Dataset/Small-scale/IndustryOR.csv', encoding='gbk')
test_industryOR1=test_industryOR[:25]
test_industryOR2=test_industryOR[25:50]
test_industryOR3=test_industryOR[50:75]
test_industryOR4=test_industryOR[75:]
test_industryOR

In [None]:
output_model_industryOR1, output_code_industryOR1 = run_test(test_industryOR1,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR1['Query'], 'model_output':output_model_industryOR1, 'code_output':output_code_industryOR1})
output_df.to_csv("IndustryOR_1-25.csv", index=False)

In [None]:
output_model_industryOR2, output_code_industryOR2 = run_test(test_industryOR2,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR2['Query'], 'model_output':output_model_industryOR2, 'code_output':output_code_industryOR2})
output_df.to_csv("IndustryOR_26-50.csv", index=False)

In [None]:
output_model_industryOR3, output_code_industryOR3 = run_test(test_industryOR3,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR3['Query'], 'model_output':output_model_industryOR3, 'code_output':output_code_industryOR3})
output_df.to_csv("IndustryOR_51-75.csv", index=False)

In [None]:
output_model_industryOR4, output_code_industryOR4 = run_test(test_industryOR4,classification_agent)
output_df = pd.DataFrame({'Query': test_industryOR4['Query'], 'model_output':output_model_industryOR4, 'code_output':output_code_industryOR4})
output_df.to_csv("IndustryOR_76-100.csv", index=False)

In [None]:
file_order=[
    "IndustryOR_1-25.csv",
    "IndustryOR_26-50.csv",
    "IndustryOR_51-75.csv",
    "IndustryOR_76-100.csv",
]
try:
    combined_df = read_and_combine_csvs(file_order)
    print(f"Total rows: {len(combined_df)}")
except Exception as e:
    print(f"File processing failed: {str(e)}")
    sys.exit(1)
    
print("\n Running Gurobi Code...")
results = []
start_time = time.time()

for i, row in combined_df.iterrows():
    code = row['code_output']
    code = re.sub(r'^```python|```$', '', code, flags=re.IGNORECASE).strip()
    
    print(f"Processing row {i+1}/{len(combined_df)}...", end='\r')
    result = run_gurobi_code(code)
    results.append(result)

combined_df['objective_value'] = results
print(f"\nCode execution completed! Time used: {time.time()-start_time:.2f} seconds")
print(f"Success: {len([x for x in results if x is not None])}/{len(results)}")

output_file = "IndustryOR_result.csv"
combined_df.to_csv(output_file, index=False)
print(f"Output saved to: {output_file}")