In [1]:
import os
from pathlib import Path
import json
import logging

from procureme.vectordb.lance_vectordb import LanceDBVectorStore
from procureme.clients.ollama_embedder import OllamaEmbeddingClient
from typing import Optional
from procureme.configurations.aimodels import ChatModelSelection
from procureme.configurations.app_configs import Settings
from sqlmodel import Session, create_engine, select
from procureme.dbmodels.doc_summery import ContractSummary
from procureme.dbmodels.doc_metadata import ContractMetadata
from procureme.clients.openai_chat import OpenAIClient
from enum import StrEnum
import pandas as pd

In [2]:
os.environ["RUNTIME_ENV"] = "local"

In [3]:
setting = Settings()

In [4]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("AppAgent")

In [5]:
client = OpenAIClient(api_key=setting.OPENAI_API_KEY, model=ChatModelSelection.GPT4_1_MINI)

In [6]:
# response = client.chat(
#     [
#         {"role": "system", "content": "You are a helpful assistant."},
#         {"role": "user", "content": "Who's the main actor in the movie Matrix and what other movies is that person in?"},
#     ]
# )
# print(response)

In [7]:
def get_vector_store():
    db_path = Path().absolute().parent.joinpath("vectordb", "contracts.db")
    vector_table = "contracts_naive"
    emb_client = OllamaEmbeddingClient()
    vector_store = LanceDBVectorStore(db_path=db_path, table_name=vector_table, embedding_client=emb_client)
    logger.info("Vector DB Loaded successfully.")
    return vector_store

# Test vector store

In [8]:
# VECTOR_STORE = get_vector_store()
# query= "What do you know about Supplier: Alpha Suppliers Inc. and what it supplies?"
# VECTOR_STORE.search(query=query, top_k=5)[1]

In [9]:
query_update_prompt = """
    You are an expert at updating questions to make them ask for one thing only, more atomic, specific and easier to find the answer for.
    You do this by filling in missing information in the question, with the extra information provided to you in previous answers. 
    
    You respond with the updated question that has all information in it.
    Only edit the question if needed. If the original question already is atomic, specific and easy to answer, you keep the original.
    Do not ask for more information than the original question. Only rephrase the question to make it more complete.
    
    JSON template to use:
    {
        "question": "question1"
    }
"""

def query_update(input: str, answers: list[any]) -> str:
    logger.info("=== Entering Query Update Node ===")
    if len(answers) == 0:
        logger.info("Query Update endpoint called with Initial User turn")
    else:
        logger.info(f"Query Update endpoint called with Intermediate User and Assistant turns: {len(answers)}")


    messages = [
        {"role": "system", "content": query_update_prompt},
        *answers,
        {"role": "user", "content": f"The user question to rewrite: '{input}'"},
    ]

    config = {"response_format": {"type": "json_object"}}
    output = client.chat(messages, config=config, )
    try:
        updated_question = json.loads(output)["question"]
        logger.info(f"Updated Query: {updated_question}")
        return updated_question
    except json.JSONDecodeError:
        print("Error decoding JSON")
    return []

In [10]:
query_update("What do you know about Supplier: Alpha Suppliers Inc. and what they usually supply?", answers=[])

2025-06-05 17:04:47,478 - AppAgent - INFO - === Entering Query Update Node ===
2025-06-05 17:04:47,482 - AppAgent - INFO - Query Update endpoint called with Initial User turn
2025-06-05 17:04:48,707 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-05 17:04:48,715 - procureme.clients.openai_chat - INFO - Chat endpoint called with Model gpt-4.1-mini-2025-04-14, Request: {'role': 'user', 'content': "The user question to rewrite: 'What do you know about Supplier: Alpha Suppliers Inc. and what they usually supply?'"}
2025-06-05 17:04:48,717 - AppAgent - INFO - Updated Query: What products or services does Alpha Suppliers Inc. usually supply?


'What products or services does Alpha Suppliers Inc. usually supply?'

# Tools and Their Definitions

In [11]:
answer_given_description = {
    "type": "function",
    "function": {
        "name": "answer_given",
        "description": "If the conversation already contains a complete answer to the question, "
        "use this tool to extract it. Additionally, if the user engages in small talk, "
        "use this tool to remind them that you can only answer questions about contracts.",
        "parameters": {
            "type": "object",
            "properties": {
                "answer": {
                    "type": "string",
                    "description": "Respond directly with the answer",
                }
            },
            "required": ["answer"],
        },
    },
}


def answer_given(answer: str):
    """Extract the answer from a given text."""
    logger.info("Answer found in text: %s", answer)
    return answer

In [None]:
respond_unrelated_question_description = {
    "type": "function",
    "function": {
        "name": "respond_unrelated_question",
        "description": "Respond with a message if the question is not related to the source document.",
        "parameters": {
            "type": "object",
            "properties": {
                "question": {
                    "type": "string",
                    "description": "The user question or query to find the answer for",
                }
            },
            "required": ["question"],
        },
    },
}


def respond_unrelated_question_tool(question: str):
    return f"The source document does not contain the answer to the question: {question}"

In [12]:
vectordb_retriver_tool_description = {
    "type": "function",
    "function": {
        "name": "retriver_tool",
        "description": "Query the vector database with a user question to pull the most relevant chunks. When other tools don't fit, fallback to use this one.",
        "parameters": {
            "type": "object",
            "properties": {
                "question": {
                    "type": "string",
                    "description": "The user question or query to find the answer for",
                }
            },
            "required": ["question"],
        },
    },
}


def vector_db_retriver_tool(question: str):
    """Query the database with a user question."""
    logger.info("=== Entering Retrival Tool ===")
    try:
        vector_store = get_vector_store()
        documents = vector_store.search(query=question, top_k=5)
        context = "\n\n".join([doc["content"] for doc in documents])
        logger.info("Retrival Tool executed successfully.")
        return context
    except Exception as e:
        return [f"Could not query vector store, cause an error: {e}"]

In [13]:
class SummaryType(StrEnum):
    """Summary types to query from databse"""
    SHORT = "short"
    MEDIUM = "medium"
    LONG = "long"

In [14]:
sumery_retriver_tool_description = {
    "type": "function",
    "function": {
        "name": "sumery_retriver_tool",
        "description": "Query the database with a user provide cwid and summary type to pull the most relevant summary. The tool is only when only user ask to get summary of a contract.",
        "parameters": {
            "type": "object",
            "properties": {
                "cwid": {
                    "type": "string",
                    "description": "The CWID of the contract",
                },
                "summary_type": {
                    "type": "string",
                    "enum": ["short", "medium", "long"],
                }
            },
            "required": ["cwid", "summary_type"],
        },
    },
}


def summery_retriver_tool(cwid: str, summary_type: SummaryType = SummaryType.SHORT) -> str:
    logger.info("=== Entering Summery Retrival Tool ===")

    settings = Settings()
    engine = create_engine(settings.pg_database_url)

    with Session(engine) as session:
        statement = select(ContractSummary).where(ContractSummary.cwid == cwid)
        result = session.exec(statement).first()

        if not result:
            return f"No contract found with CWID: {cwid}"

        if summary_type == "short":
            summary = result.summary_short
        elif summary_type == "medium":
            summary = result.summary_medium
        elif summary_type == "long":
            summary = result.summary_long
        else:
            raise ValueError("Invalid summary type. Use 'short', 'medium', or 'long'.")

        if not summary:
            raise ValueError(f"{summary_type.capitalize()} summary is not available for CWID: {cwid}")
        
        logger.info("Summery Retrival Tool executed successfully.")
        return summary

In [15]:
compare_contract_tool_description = {
    "type": "function",
    "function": {
        "name": "compare_contract_tool",
        "description": "Query the database with a list of CWID to compare the contracts. The tool is only when only user ask to compare contracts.",
        "parameters": {
            "type": "object",
            "properties": {
                "cwid": {
                    "type": "array",
                    "items": {
                        "type": "string",
                    },
                    "description": "The CWID of the contracts to compare",
                }
            },
            "required": ["cwid"],
        },
    },
}

def compare_contract_tool(cwid: list[str]) -> str:
    logger.info("=== Entering Compare Contract Tool ===")
    settings = Settings()
    engine = create_engine(settings.pg_database_url)

    with Session(engine) as session:
        statement = select(ContractMetadata).where(ContractMetadata.cwid.in_(cwid))
        result = session.exec(statement).all()
        if not result:
            return f"No contract found with CWID: {(', '.join(cwid))}"
        
        contracts_data = {contract.cwid: contract.model_dump() for contract in result}
        df = pd.DataFrame(contracts_data)
        df.reset_index(inplace=True)
        df.rename(columns={"index": "Field"}, inplace=True)
        markdown_table = df.to_markdown(index=False)
        logger.info("Compare Contract Tool executed successfully.")
        return markdown_table

In [16]:
# temp_compare =compare_contract_tool(["CW0307", "CW0344"])
# print(temp_compare)

In [None]:
tools = {
    "retriver_tool": {
        "description": vectordb_retriver_tool_description,
        "function": vector_db_retriver_tool
    },
    "answer_given": {
        "description": answer_given_description,
        "function": answer_given
    },
    "sumery_retriver_tool": {
        "description": sumery_retriver_tool_description,
        "function": summery_retriver_tool
    },
    "compare_contract_tool": {
        "description": compare_contract_tool_description,
        "function": compare_contract_tool
    },
    "respond_unrelated_question": {
        "description": respond_unrelated_question_description,
        "function": respond_unrelated_question_tool
    }
}

In [None]:
def handle_tool_calls(tools: dict[str, any], llm_tool_calls: list[dict[str, any]]):
    logger.info("=== Selecting Tools ===")
    output = []
    available_tools = [tool["description"]["function"]["name"] for tool in tools.values()]
    if llm_tool_calls:
        tool_list = [tool_call.function.name for tool_call in llm_tool_calls]
        logger.info(f"Follwing tools are select for execution: {tool_list} from available tools: {available_tools}")
        for tool_call in llm_tool_calls:
            function_to_call = tools[tool_call.function.name]["function"]
            function_args = json.loads(tool_call.function.arguments)
            res = function_to_call(**function_args)
            output.append(res)
    logger.info(f"Tool execution finished!")
    return output

In [None]:
tool_picker_prompt = """
    Your job is to chose the right tool needed to respond to the user question. 
    The available tools are provided to you in the prompt.
    Make sure to pass the right and the complete arguments to the chosen tool.
"""

def route_question(question: str, tools: dict[str, any], answers: list[dict[str, str]]):
    logger.info("=== Entering Tool Selector Router ===")
    llm_tool_calls = client.select_tool(
        messages = [
            {
                "role": "system",
                "content": tool_picker_prompt,
            },
            *answers,
            {
                "role": "user",
                "content": f"The user question or satement to find a tool to answer: '{question}'",
            },
        ],
        tools=[tool["description"] for tool in tools.values()],
    )
    return handle_tool_calls(tools, llm_tool_calls)

def handle_user_input(input: str, answers: Optional[list[dict[str, str]]] = None):  
    answers = answers if answers else []
    logger.info(f"User input: {input}, with initial No. of User and Assistant turns: {len(answers)}")
    updated_question = query_update(input, answers)

    response  = route_question(updated_question, tools, answers)
    answers.append({"role": "assistant", "content": f"For the question: '{updated_question}', we have the answer: '{json.dumps(response)}'"})
    return answers

In [20]:
answer_critique_prompt = """
    You are an expert at identifying if questions has been fully answered or if there is an opportunity to enrich the answer.
    The user will provide a question, and you will scan through the provided information to see if the question is answered.
    If anything is missing from the answer, you will provide a set of new questions that can be asked to gather the missing information.
    All new questions must be complete, atomic and specific.
    - If user ask for summery of a certain contract as the summery is pulled by the tool you will respond with an empty list.
    - If user ask to compare contrats as the comparison is done by the tool you will respond with an empty list.
    However, if the provided information is enough to answer the original question, you will respond with an empty list.
    

    JSON template to use for finding missing information:
    {
        "questions": ["question1", "question2"]
    }
    JSON template to use when user asked for summery:
    {
        "questions": []
    }
    JSON template to use when user asked to compare contrats:
    {
        "questions": []
    }
"""

def critique_answers(question: str, answers: list[dict[str, str]]) -> list[str]:
    messages = [
        {
            "role": "system",
            "content": answer_critique_prompt,
        },
        *answers,
        {
            "role": "user",
            "content": f"The original user question to answer: {question}",
        },
    ]
    config = {"response_format": {"type": "json_object"}}
    output = client.chat(messages, config=config)
    logger.info(f"Answer critique response: {output}")
    try:
        return json.loads(output)["questions"]
    except json.JSONDecodeError:
        print("Error decoding JSON")
    return []

In [21]:
main_prompt = """
    Your job is to help the user with their questions.
    You will receive user questions and information needed to answer the questions
    If the information is missing to answer part of or the whole question, you will say that the information 
    is missing. You will only use the information provided to you in the prompt to answer the questions.
    You are not allowed to make anything up or use external information.
"""

def main(input: str):
    answers = handle_user_input(input)
    critique = critique_answers(input, answers)

    if critique:
        answers = handle_user_input(" ".join(critique), answers)

    llm_response = client.chat(
        [
            {"role": "system", "content": main_prompt},
            *answers,
            {"role": "user", "content": f"The user question to answer: {input}"},
        ],
    )

    return llm_response

In [None]:
# response = main("Who's the main actor in the movie Matrix and what other movies is that person in?")
# print(f"Main response: {response}")

In [22]:
response = main("What do you know about Supplier: Alpha Suppliers Inc. and what it supplies?")
print(f"Main response: {response}")

2025-06-05 16:46:05,477 - AppAgent - INFO - User input: What do you know about Supplier: Alpha Suppliers Inc. and what it supplies?, with initial No. of User and Assistant turns: 0
2025-06-05 16:46:05,478 - AppAgent - INFO - === Entering Query Update Node ===
2025-06-05 16:46:05,479 - AppAgent - INFO - Query Update endpoint called with Initial User turn
2025-06-05 16:46:07,113 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-05 16:46:07,120 - procureme.clients.openai_chat - INFO - Chat endpoint called with Model gpt-4.1-mini-2025-04-14, Request: {'role': 'user', 'content': "The user question to rewrite: 'What do you know about Supplier: Alpha Suppliers Inc. and what it supplies?'"}
2025-06-05 16:46:07,122 - AppAgent - INFO - Updated Query: What products or services does Alpha Suppliers Inc. provide?
2025-06-05 16:46:07,123 - AppAgent - INFO - === Entering Tool Selector Router ===
2025-06-05 16:46:08,341 - httpx - INFO - HTTP Reque

Main response: Alpha Suppliers Inc. is a supplier engaged under a procurement contract with XYZ Corporation effective from January 1, 2023, to December 31, 2023. Alpha Suppliers Inc. provides office stationery and supplies, including pens, paper, folders, and other stationery items as specified in the attached Purchase Order. They also supply any additional stationery items requested by the Buyer during the contract period. The Buyer issues Purchase Orders specifying quantities as per their requirements, with pricing and delivery schedules agreed upon between the Buyer and Supplier. Payment terms are net 30 days from the date of receipt and acceptance of the deliverables.


In [26]:
response = main("Provide me a long summery of the contract number CW0307")
print(f"Main response: {response}")

2025-06-05 17:07:30,254 - AppAgent - INFO - User input: Provide me a long summery of the contract number CW0307, with initial No. of User and Assistant turns: 0
2025-06-05 17:07:30,255 - AppAgent - INFO - === Entering Query Update Node ===
2025-06-05 17:07:30,255 - AppAgent - INFO - Query Update endpoint called with Initial User turn
2025-06-05 17:07:31,225 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-05 17:07:31,230 - procureme.clients.openai_chat - INFO - Chat endpoint called with Model gpt-4.1-mini-2025-04-14, Request: {'role': 'user', 'content': "The user question to rewrite: 'Provide me a long summery of the contract number CW0307'"}
2025-06-05 17:07:31,231 - AppAgent - INFO - Updated Query: Provide a detailed summary of the contract with the number CW0307.
2025-06-05 17:07:31,232 - AppAgent - INFO - === Entering Tool Selector Router ===
2025-06-05 17:07:32,306 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1

Main response: The contract number CW0307 is a Procurement Contract established between RBC Suppliers ("Supplier") and Spark Company ("Buyer") to govern the supply of specified products over a defined term.

The main objectives of the contract are to outline the supply of Products A, B, and C by the Supplier to the Buyer, and to define the pricing, volume discounts, payment terms, confidentiality obligations, termination rights, and governing law applicable to the agreement.

The parties involved are:

- Supplier: RBC Suppliers  
  - Address: 123 Main Street, Anytown, USA  
  - Contact: John Smith  
  - Email: john@abcsuppliers.com  
  - Phone: +1-123-456-7890  

- Buyer: Spark Company

The contract term begins on January 1, 2023, and is set to expire five years later on December 31, 2027, unless terminated earlier according to the contract terms.

Regarding purchase details, the Supplier agrees to provide Products A, B, and C. The pricing is set at $10 per unit for Product A, $20 per 

In [27]:
response = main("what is the difference between the contract CW0307 and CW0343")
print(f"Main response: {response}")

2025-06-05 23:14:25,162 - AppAgent - INFO - User input: what is the difference between the contract CW0307 and CW0343, with initial No. of User and Assistant turns: 0
2025-06-05 23:14:25,164 - AppAgent - INFO - === Entering Query Update Node ===
2025-06-05 23:14:25,166 - AppAgent - INFO - Query Update endpoint called with Initial User turn
2025-06-05 23:14:26,910 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-05 23:14:26,913 - procureme.clients.openai_chat - INFO - Chat endpoint called with Model gpt-4.1-mini-2025-04-14, Request: {'role': 'user', 'content': "The user question to rewrite: 'what is the difference between the contract CW0307 and CW0343'"}
2025-06-05 23:14:26,914 - AppAgent - INFO - Updated Query: What are the differences between contract CW0307 and contract CW0343?
2025-06-05 23:14:26,915 - AppAgent - INFO - === Entering Tool Selector Router ===
2025-06-05 23:14:28,079 - httpx - INFO - HTTP Request: POST https://ap

Main response: The differences between contract CW0307 and contract CW0343 are as follows:

| Field                     | CW0307                                                                                         | CW0343                                                                                         |
|---------------------------|------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
| Supplier Name             | RBC Suppliers                                                                                  | C6                                                                                             |
| Expiry Date               | 2028-01-01                                                                                     | 2025-12-01                                                                                    |
| Objective  