In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
import uuid
from langchain_core.messages import HumanMessage
from src.ai.autodf_ml_assistant import build_graph

In [4]:
async def visualize_dataframe_assistant_graph():
    graph = await build_graph()
    return graph

In [5]:
assistant_graph = await visualize_dataframe_assistant_graph()
print(assistant_graph.get_graph().draw_mermaid())

---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	conversation(conversation)
	pandas_agent(pandas_agent)
	ml_agent(ml_agent)
	__end__([<p>__end__</p>]):::last
	__start__ --> conversation;
	ml_agent --> __end__;
	pandas_agent --> __end__;
	conversation -.-> pandas_agent;
	conversation -.-> ml_agent;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [6]:
# assistant_graph

In [7]:
thread_id = str(uuid.uuid4())
config = {"configurable": {"thread_id" : thread_id}}

In [8]:
async def log_assistant_graph_steps(query: str, config: dict):
    step = 1
    current_tool = None
    async for token, metadata in assistant_graph.astream(
            {"messages": [HumanMessage(content=query)]}, config=config, stream_mode="messages"
    ):
        current_node = metadata.get('langgraph_node', 'unknown')
        
        # Check if this is a tool call by looking at the message content
        if hasattr(token, 'tool_calls') and token.tool_calls:
            for tool_call in token.tool_calls:
                tool_name = tool_call.get('name', 'unknown_tool')
                print(f"\n🔧 Calling tool: {tool_name}")
                current_tool = tool_name
        
        # Print step metadata when it changes
        if metadata['langgraph_step'] == step:
            print(f"\n🔄 Step {step}: Node '{current_node}'")
            if current_node == 'tools' and current_tool:
                print(f"   └── Executing tool: {current_tool}")
            print("Metadata", metadata, "\n")
            step += 1
            
        print(token.content, end='', flush=True)

In [9]:
query = "List the columns of my dataset."
await log_assistant_graph_steps(query, config)


🔄 Step 1: Node 'conversation'
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 1, 'langgraph_node': 'conversation', 'langgraph_triggers': ('branch:to:conversation', 'start:conversation'), 'langgraph_path': ('__pregel_pull', 'conversation'), 'langgraph_checkpoint_ns': 'conversation:03e186b6-01ab-fe02-8955-26f23b30d520', 'checkpoint_ns': 'conversation:03e186b6-01ab-fe02-8955-26f23b30d520', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 

default
🔧 Calling tool: python_repl_ast

🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:466460f0-7701-4739-63ea-a1d3bedc2960|tools:2b5697ff-4463-64fa-bfe3-779b52641683', 'checkpoint_ns': 'pandas_agent:466460f0-77

In [10]:
query = "please give me descriptive statistics of the data"
await log_assistant_graph_steps(query, config)

pandas_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'pandas_agent:1e676f25-48f1-01c7-96af-c5a099235064|agent:aee4ad90-c7cb-1492-835e-9e49be32d1ed', 'checkpoint_ns': 'pandas_agent:1e676f25-48f1-01c7-96af-c5a099235064', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 


🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:1e676f25-48f1-01c7-96af-c5a099235064|tools:debff8e5-d9c9-f0e0-4635-9dd2d6437210', 'checkpoint_ns': 'pandas_agent:1e676f25-48f1-01c7-96a

In [11]:
query = "Let's now build a decision tree model using as our target variable 'disease_risk'"
await log_assistant_graph_steps(query, config)

ml_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'ml_agent:77fb2ef2-f3e2-5584-2833-5a39b6f4bd7b|agent:a11af55c-66c1-1e48-02fd-826dc6dc67f1', 'checkpoint_ns': 'ml_agent:77fb2ef2-f3e2-5584-2833-5a39b6f4bd7b', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 


🔧 Calling tool: python_repl_ast


2025-10-10 06:52:18,188 - dataframe-chatbot - INFO - #Train examples = 80000
2025-10-10 06:52:18,188 - dataframe-chatbot - INFO - #Test examples = 20000
2025-10-10 06:52:23,360 - dataframe-chatbot - INFO - Best Parameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
2025-10-10 06:52:23,361 - dataframe-chatbot - INFO - Best CV Macro F1: 0.3439806638365158
2025-10-10 06:52:23,361 - dataframe-chatbot - INFO - ----------

2025-10-10 06:52:23,387 - dataframe-chatbot - INFO - ---- Train Results ----
2025-10-10 06:52:23,387 - dataframe-chatbot - INFO - Recall (binary) = 0.6385153850027698
2025-10-10 06:52:23,388 - dataframe-chatbot - INFO - Precision (binary) = 0.27773761801493946
2025-10-10 06:52:23,388 - dataframe-chatbot - INFO - F1-score (binary) = 0.38709775905232946
2025-10-10 06:52:23,388 - dataframe-chatbot - INFO - ----------

2025-10-10 06:52:23,388 - dataframe-chatbot - INFO - ---- Test Results ----
2025-10-10 06:52:23,389 - dataframe-chatbot - INFO - Recall (b


🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'ml_agent:77fb2ef2-f3e2-5584-2833-5a39b6f4bd7b|tools:67758101-091f-a790-82d9-f057ef42054e', 'checkpoint_ns': 'ml_agent:77fb2ef2-f3e2-5584-2833-5a39b6f4bd7b'} 

{"model_uri": "/home/davit/medium/dataframe_chatbot/models/decision_tree/model.joblib", "metrics": {"train": {"Recall (binary)": 0.6385153850027698, "Precision (binary)": 0.27773761801493946, "F1-score (binary)": 0.38709775905232946}, "test": {"Recall (binary)": 0.5703062046736502, "Precision (binary)": 0.24561860142287004, "F1-score (binary)": 0.3433596118859915}}}
🔄 Step 3: Node 'agent'
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 3, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'sta

In [12]:
query = "Please give me value distribution of 'disease_risk'"
await log_assistant_graph_steps(query, config)

pandas_agent
🔄 Step 1: Node 'agent'
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 1, 'langgraph_node': 'agent', 'langgraph_triggers': ('branch:to:agent', 'start:agent', 'tools'), 'langgraph_path': ('__pregel_pull', 'agent'), 'langgraph_checkpoint_ns': 'pandas_agent:02c37a5d-216b-b430-caa7-780a1ebf1cf9|agent:e10b3da6-69ee-0272-0ff4-bac2471cfaef', 'checkpoint_ns': 'pandas_agent:02c37a5d-216b-b430-caa7-780a1ebf1cf9', 'ls_provider': 'ollama', 'ls_model_name': 'gpt-oss:20b', 'ls_model_type': 'chat', 'ls_temperature': 0.7} 


🔧 Calling tool: python_repl_ast

🔄 Step 2: Node 'tools'
   └── Executing tool: python_repl_ast
Metadata {'thread_id': 'a712a6b5-1831-4e4e-a86f-6532d5bc649d', 'langgraph_step': 2, 'langgraph_node': 'tools', 'langgraph_triggers': ('branch:to:tools',), 'langgraph_path': ('__pregel_pull', 'tools'), 'langgraph_checkpoint_ns': 'pandas_agent:02c37a5d-216b-b430-caa7-780a1ebf1cf9|tools:26fbe288-892a-e677-1a61-c7339c906dcf', 'checkpoint_ns': 'pa