In [1]:
# warnings
import warnings
warnings.filterwarnings("ignore")

from langgraph.graph import StateGraph, END
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from typing_extensions import TypedDict
from typing import List, Dict, Any
import re
import logging
from Tools.Logger import setup_logger
from transformers import AutoTokenizer
from huggingface_hub import login
from db_create import CargaDeArchivos

#tools
from Tools.Tool import run_sql_workflow, run_think_task, remove_think_tags

#Prompts
from Tools.Prompts import  plan_prompt, final_ans_prompt




A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.p

In [2]:
# === Logger instantiation ===
setup_logger()
logger = logging.getLogger(__name__)

In [3]:
# === Tokenizer logging ==
try:
    login(token="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")
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
except Exception as e:
    logger.error(f"An error occurred during tokenizer setup: {e}", exc_info=True)
    raise

# === Database population and connection ===
try:
    db_manager = CargaDeArchivos()
    db_manager.run()
    db_conn = db_manager.conn
except Exception as e:
    logger.error(f"An error occurred during database population and connection: {e}", exc_info=True)
    raise


In [4]:
# == Orchestrator state ==
class AgentState(TypedDict):
    question: str
    plan: List[dict]
    current_step: int
    results: Dict[str, Any]
    query_results: List[str]
    db_conn: None
    tokenizer: any
    use_case: str
    ans_req: List[int]
    final_answer: str

## 1

In [5]:
# == Orchestrator nodes ==
def planner_node(state: AgentState) -> AgentState:
    try:
        user_question = state["question"]

        llm = OllamaLLM(model="qwen3:8b", temperature=0.0, enable_thinking=False)
        planner = ChatPromptTemplate.from_messages([
            ("system", plan_prompt),
            ("human", "user question: {task}"),
        ]) | llm | StrOutputParser()

        raw_plan = planner.invoke({"task": user_question})
        raw_plan = remove_think_tags(raw_plan)
        print(raw_plan)

        # Parse JSON-like plan
        steps = []
        pattern = re.compile(r'"?(ACTIVITY\d+)"?\s*:\s*{')
        answer_pattern = re.compile(r'"?(FINAL)"?\s*:\s*{')
        lines = raw_plan.strip().splitlines()
        current_step = None
        final_steps = None
        # use_case = "0"
        
        inside_final = False
        
        for line in lines:
            line = line.strip()
        
            # Detecta si comienza una nueva ACTIVITY
            match = pattern.match(line)
            if match:
                if current_step:
                    current_step.setdefault("type", "[THINK]")
                    current_step.setdefault("description", "")
                    current_step.setdefault("reason", "")
                    current_step.setdefault("steps", [])
                    steps.append(current_step)
                current_step = {"id": match.group(1)}
                inside_final = False  # Reset
                continue
        
            # Detecta si entra en FINAL
            final_match = answer_pattern.match(line)
            if final_match:
                inside_final = True
                current_step = None  # Ya no estamos procesando una actividad
                continue
        
            # Procesa dentro de ACTIVITY
            if current_step:
                if '"type"' in line:
                    task_type = re.search(r'"type"\s*:\s*"([^"]+)",?', line)
                    if task_type:
                        current_step["type"] = task_type.group(1)
                elif '"description"' in line:
                    desc = re.search(r'"description"\s*:\s*"([^"]+)",?', line)
                    if desc:
                        current_step["description"] = desc.group(1)
                elif '"reason"' in line:
                    reason = re.search(r'"reason"\s*:\s*"([^"]+)",?', line)
                    if reason:
                        current_step["reason"] = reason.group(1)
                elif '"steps"' in line:
                    steps_str = re.search(r'"steps"\s*:\s*\[([^\]]*)\]', line)
                    if steps_str:
                        current_step["steps"] = [int(x.strip()) - 1 for x in steps_str.group(1).split(",") if x.strip()]
                
        
            # Procesa los pasos de FINAL
            if inside_final and '"steps"' in line:
                steps_str = re.search(r'"steps"\s*:\s*\[([^\]]*)\]', line)
                if steps_str:
                    final_steps = [int(x.strip()) - 1 for x in steps_str.group(1).split(",") if x.strip()]
                
        
        # Agrega el último ACTIVITY si existe
        if current_step:
            current_step.setdefault("type", "[THINK]")
            current_step.setdefault("description", "")
            current_step.setdefault("reason", "")
            current_step.setdefault("steps", [])
            steps.append(current_step)
      
        return {
            "plan": steps,
            "current_step": 0, # Start from the first step
            "results": {},
            "query_results": [],
            "db_conn": db_conn,
            "tokenizer": tokenizer,
            "question": user_question,
            "ans_req":final_steps
        }

    except Exception as e:
        logger.exception(f"Error in planner_node: {e}")
        raise


def execute_task_node(state: AgentState) -> AgentState:
    try:
        step = state["plan"][state["current_step"]]
        task = step["description"]
        dependencies = step["steps"]
        logger.info(f"Previous steps: {dependencies}")
        task_type = step["type"]
        # dependencies = step["steps"] # Not used in this version. Usar en context con if step in dependencies

        context = "\n".join(f"- {state['results'][step]}" for step in sorted(state["results"], key=int) if int(step) in dependencies)
        logger.info(f"Context: {context}")

        print(f"\n[Task {state['current_step'] + 1}] {task}")

        if "SQL" in task_type:
            # print("SQL!!")
            answer, raw_result = run_sql_workflow(
                task, state["db_conn"], state["tokenizer"], context
            )
        else:
            answer = run_think_task(task, context)
            raw_result = answer

        return {
            "plan": state["plan"],
            "results": {**state["results"],str(state["current_step"]): answer}, #Saves answer before updating the current step
            "current_step": state["current_step"] + 1,            
            "query_results": state["query_results"] + [raw_result],
            "db_conn": state["db_conn"],
            "tokenizer": state["tokenizer"],
            "question": state["question"]
        }

    except Exception as e:
        logger.exception(f"Error in execute_task_node: {e}")
        raise

def final_answer_node(state: AgentState) -> AgentState:
    try:
        user_question = state["question"]
        final_steps = state["ans_req"]
        logger.info(f"Final answer required steps: {final_steps}")
        final_context = "\n".join(f" - {state['results'][step]}" for step in sorted(state["results"], key=int) if int(step) in final_steps)
        logger.info(f"Final context: {final_context}")

        
        llm = OllamaLLM(model="qwen3:8b", temperature=0.0, enable_thinking=False)
        model = ChatPromptTemplate.from_messages([
            ("system", final_ans_prompt.format(context=final_context)),
            ("human", "user question: {task}"),
        ]) | llm | StrOutputParser()

        final_answer = model.invoke({"task": user_question})
        
        state["final_answer"] = remove_think_tags(final_answer)
        return state
   
   
    except Exception as e:
        logger.exception(f"Error in final_answer_node: {e}")
        state["final_answer"] = "Error generating final answer."
        return state

In [6]:
# === Orchestrator routers ===
def node_router(state: AgentState) -> str:
    try:
        next_node = "generate_final_answer" if state["current_step"] >= len(state["plan"]) else "execute_task"
    except Exception as e:
        logger.exception(f"Error in node_router: {e}")
        next_node = "final_answer"
    return next_node

In [7]:
# === Orchetrator workflow ===
def build_orchestrator_workflow():
    try:
        graph = StateGraph(AgentState)
        graph.add_node("planner", planner_node)
        graph.add_node("execute_task", execute_task_node)
        graph.add_node("generate_final_answer", final_answer_node)
        
        graph.set_entry_point("planner")
        graph.add_edge("planner", "execute_task")
        graph.add_conditional_edges("execute_task", node_router)
        graph.set_finish_point("generate_final_answer")
        
        return graph.compile()
    except Exception as e:
        logger.exception(f" Error building orchestrator workflow: {e}")
        raise

In [8]:
workflow = build_orchestrator_workflow()
output = workflow.invoke({"question": "How many invoices are duplicated?"})

{
    "ACTIVITY1": {
        "type": "[SQL]",
        "description": "Retrieve all invoice records from the database.",
        "reason": "To identify all invoices that may be duplicated, we need to first gather all invoice data.",
        "steps": []
    },
    "ACTIVITY2": {
        "type": "[THINK]",
        "description": "Analyze the invoice records to identify potential duplicates.",
        "reason": "Duplicates can be identified by comparing invoice numbers, dates, and amounts, which requires reasoning and interpretation of the data.",
        "steps": [1]
    },
    "ACTIVITY3": {
        "type": "[SQL]",
        "description": "Count the number of invoices for each unique invoice number.",
        "reason": "This will help identify how many times each invoice number appears, which is essential for determining duplicates.",
        "steps": [2]
    },
    "ACTIVITY4": {
        " "type": "[THINK]",
        "description": "Determine which invoice numbers appear more than once."

In [9]:
output["results"]

{'0': 'To retrieve all invoice records from the database, you can use the following SQL query:\n\n```sql\nSELECT * FROM invoices;\n```\n\nThis query will return all columns and rows from the `invoices` table, including the following fields for each invoice:\n\n- **Invoice Number**  \n- **Invoice Date**  \n- **Customer Name**  \n- **Item Description**  \n- **Quantity**  \n- **Unit Price**  \n- **Discount**  \n- **Tax**  \n- **Total Amount**  \n- **Status**  \n\n### Sample Data (as provided):\n| Invoice Number | Invoice Date       | Customer Name     | Item Description | Quantity | Unit Price | Discount | Tax | Total Amount | Status         |\n|----------------|--------------------|-------------------|------------------|----------|------------|----------|-----|--------------|----------------|\n| 1001           | 2025-03-04         | John Doe          | Widget A         | 2        | $50.00     | 5%       | 10% | $105.00      | Paid           |\n| 1002           | 2025-03-19         | Jane

In [10]:
output["final_answer"]

'To determine the number of duplicated invoices, a detailed review and analysis of the invoice records are required. Please provide access to the relevant invoice data or system, and I can assist in identifying duplicates efficiently.'