In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
import uuid
from langchain_core.messages import HumanMessage
from src.ai.autodf_ml_assistant import build_graph

In [4]:
async def visualize_dataframe_assistant_graph():
    graph = await build_graph()
    return graph

In [5]:
assistant_graph = await visualize_dataframe_assistant_graph()
print(assistant_graph.get_graph().draw_mermaid())

---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	conversation(conversation)
	pandas_agent(pandas_agent)
	ml_agent(ml_agent)
	__end__([<p>__end__</p>]):::last
	__start__ --> conversation;
	ml_agent --> __end__;
	pandas_agent --> __end__;
	conversation -.-> pandas_agent;
	conversation -.-> ml_agent;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [6]:
# assistant_graph

In [7]:
thread_id = str(uuid.uuid4())
config = {"configurable": {"thread_id" : thread_id}}

In [8]:
async def log_assistant_graph_steps(query: str, config: dict):
    step = 1
    current_tool = None
    async for token, metadata in assistant_graph.astream(
            {"messages": [HumanMessage(content=query)]}, config=config, stream_mode="messages"
    ):
        current_node = metadata.get('langgraph_node', 'unknown')
        
        # Check if this is a tool call by looking at the message content
        if hasattr(token, 'tool_calls') and token.tool_calls:
            for tool_call in token.tool_calls:
                tool_name = tool_call.get('name', 'unknown_tool')
                print(f"\n🔧 Calling tool: {tool_name}")
                current_tool = tool_name
        
        # Print step metadata when it changes
        if metadata['langgraph_step'] == step:
            print(f"\n🔄 Step {step}: Node '{current_node}'")
            if current_node == 'tools' and current_tool:
                print(f"   └── Executing tool: {current_tool}")
            print("Metadata", metadata, "\n")
            step += 1
            
        print(token.content, end='', flush=True)

In [9]:
query = "List the columns of my dataset."
await log_assistant_graph_steps(query, config)


🔄 Step 1: Node 'conversation'
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 1, 'langgraph_node': 'conversation', 'langgraph_triggers': ('branch:to:conversation', 'start:conversation'), 'langgraph_path': ('__pregel_pull', 'conversation'), 'langgraph_checkpoint_ns': 'conversation:8ece4fa0-2c15-5ee0-3ef1-7491fcd17d91', 'checkpoint_ns': 'conversation:8ece4fa0-2c15-5ee0-3ef1-7491fcd17d91', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 

pandas_agent
🔧 Calling tool: python_repl_ast

🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:aaa4668e-b47e-5b31-40d9-6f8c73d69dfa|tools:c43f3392-911f-8f48-e466-e059c02c71be', 'checkpoint_ns': 'pandas_agent:aaa466

In [10]:
query = "please give me descriptive statistics of the data"
await log_assistant_graph_steps(query, config)

pandas_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'pandas_agent:499fc0d6-4651-af32-ec31-ab799e3d7985|agent:7219d886-d815-3022-d785-02528d8442bd', 'checkpoint_ns': 'pandas_agent:499fc0d6-4651-af32-ec31-ab799e3d7985', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 


🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:499fc0d6-4651-af32-ec31-ab799e3d7985|tools:44c9e6f9-e9a1-7c85-c12f-dc7ad4d3c3a6', 'checkpoint_ns': 'pandas_agent:499fc0d6-4651-af32-ec3

In [11]:
query = "Let's now build a decision tree model using as our target variable 'disease_risk'"
await log_assistant_graph_steps(query, config)

ml_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'ml_agent:766f2c3b-6422-9610-60ef-421efc731223|agent:734c3bdd-302c-45e4-376f-9120c90cc222', 'checkpoint_ns': 'ml_agent:766f2c3b-6422-9610-60ef-421efc731223', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 



2025-10-10 07:08:45,412 - dataframe-chatbot - INFO - #Train examples = 80000
2025-10-10 07:08:45,412 - dataframe-chatbot - INFO - #Test examples = 20000
2025-10-10 07:08:50,555 - dataframe-chatbot - INFO - Best Parameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
2025-10-10 07:08:50,555 - dataframe-chatbot - INFO - Best CV Macro F1: 0.3439806638365158
2025-10-10 07:08:50,556 - dataframe-chatbot - INFO - ----------

2025-10-10 07:08:50,581 - dataframe-chatbot - INFO - ---- Train Results ----
2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - Recall (binary) = 0.6385153850027698
2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - Precision (binary) = 0.27773761801493946
2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - F1-score (binary) = 0.38709775905232946
2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - ----------

2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - ---- Test Results ----
2025-10-10 07:08:50,582 - dataframe-chatbot - INFO - Recall (b


🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'ml_agent:766f2c3b-6422-9610-60ef-421efc731223|tools:370a7aba-e7ae-3fb7-f398-f7068f6fa372', 'checkpoint_ns': 'ml_agent:766f2c3b-6422-9610-60ef-421efc731223'} 

{"model_uri": "/home/davit/medium/dataframe_chatbot/models/decision_tree/model.joblib", "metrics": {"train": {"Recall (binary)": 0.6385153850027698, "Precision (binary)": 0.27773761801493946, "F1-score (binary)": 0.38709775905232946}, "test": {"Recall (binary)": 0.5703062046736502, "Precision (binary)": 0.24561860142287004, "F1-score (binary)": 0.3433596118859915}}}
🔄 Step 3: Node 'agent'
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 3, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'sta

In [12]:
query = "Please give me the value distribution of the column 'disease_risk'"
await log_assistant_graph_steps(query, config)

pandas_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'pandas_agent:b9b7e620-7e2d-472b-e3cb-8f9437d6e547|agent:f2e4c006-f414-0328-2292-aa092231dbb8', 'checkpoint_ns': 'pandas_agent:b9b7e620-7e2d-472b-e3cb-8f9437d6e547', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 


🔧 Calling tool: python_repl_ast

🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'cf078d61-f2af-4783-8ffe-b7eb065df3e3', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:b9b7e620-7e2d-472b-e3cb-8f9437d6e547|tools:1cfe93fa-7cb3-8df7-f119-fa29c7bbb591', 'checkpoint_ns': 'pa