In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("/root/ProtAgent")
import os
import yaml
import json

import json_repair
import pandas as pd
from agent.tools.tool_manager import ToolManager

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# set pwd to  /root/ProtAgent
os.chdir("/root/ProtAgent")

In [3]:
tool_manager = ToolManager(enable_quick_run=False)
tool_manager.set_out_dir("/home/public/ProtAgent/examples")

#### Backbone


In [4]:
import json_repair
from agent.workflow.workflow import Workflow

config_str = """ \
    { "step_1": { "tool": "fasta2seq", "status": "EXECUTED", "parameter_origins": { "fasta_file": { "source": "user_input" } }, "tool_args": { "fasta_file": "protein_sequence.fasta" }, "results": { "protein_sequence": "MLGYKCLHWNNLIDLPPLKDPETFSLPSSIPHWPPGQGFGSGTINLGKLQVIKITDFEFIWRYRSTEKKKNISFYKPKGLLPKDFHCLGHYCQSDSHPLRGYVLAARDLVDSLEQVEKPALVEPVDFTLVWSSNDSAENECSSKSECGYFWLPQPPEGYRSIGFVVTKTSVKPELNEVRCVRADLTDICEPHNVIVTAVSESLGVPLFIWRTRPSDRGMWGKGVSAGTFFCRTRLVAAREDLGIGIACLKNLDLSLHAMPNVDQIQALIQHYGPTLVFHPGETYLPSSVSWFFKNGAVLCEKGNPIEEPIDENGSNLPQGGSNDKQFWIDLPCDDQQRDFVKRGNLESSKLYIHIKPALGGTFTDLVFWIFCPFNGPATLKLGLVDISLISIGQHVCDWEHFTLRISNFSGELYSIYLSQHSGGEWIEAYDLEIIPGSNKAVVYSSKHGHASFPRAGTYLQGSTMLGIGIRNDTARSELLVDSSSRYEIIAAEYLSGNSVLAEPPWLQYMREWGPKVVYDSREEIERLVNRFPRTVRVSLATVLRKLPVELSGEEGPTGPKEKNNWYGDERC" } }, "step_2": { "tool": "interproscan", "status": "EXECUTED", "parameter_origins": { "fasta_file": { "source": "user_input" }, "goterms": { "source": "default" }, "pathways": { "source": "default" } }, "tool_args": { "fasta_file": "protein_sequence.fasta" }, "results": { "parsed_tsv": "interproscan/20250716_0901/protein_sequence.fasta.parsed.tsv", "output_dir": "interproscan/20250716_0901", "duration": 158.639686, "top_accession": "IPR009291", "top_accession_name": "Vacuolar protein sorting-associated protein 62", "total_hits": 4, "num_unique_annotations": 1 } }, "step_3": { "tool": "chat", "status": "EXECUTED", "parameter_origins": {} } }
    """

config = json_repair.loads(config_str)

workflow_cls = Workflow.from_config(config, tool_manager=tool_manager)


--- Creating Workflow from configuration ---
Initializing node 'step_1' with tool 'fasta2seq':
Initializing node 'step_2' with tool 'interproscan':
Initializing node 'step_3' with tool 'chat':
--- Workflow creation complete ---



In [4]:
query = "What family does af2_predicted_structure.pdb possibly belong to?"


In [7]:
from agent.agent.backbone import MultiAgentBackbone
from agent.workflow.workflow_manager import WorkflowManager
from agent.utils.constants import AGENT_STATUS

agent = MultiAgentBackbone("qwen-turbo", tool_manager)

for response in agent.stream_chat(query): 
    from IPython import display
    display.clear_output(wait=True)
    if response.status == AGENT_STATUS.ERROR:
        print("Error:", response.error)
        break
    
    if response.status == AGENT_STATUS.GENERATING:
        for msg in json_repair.loads(response.content):
            if isinstance(msg, str):
                msg = json_repair.loads(msg)
            print(json.dumps(msg, indent=4))
    
    # if response.status == AGENT_STATUS.WORKFLOW:
    #     print(response.workflow)


{
    "sender": "query_parser",
    "analysis": "The user is asking about the family of a protein structure file named 'af2_predicted_structure.pdb'. This suggests that the file is a PDB-formatted structure, and the goal is to determine its Pfam family. The input is the PDB file, and the expected output would be the Pfam ID or related functional annotation.",
    "content": {
        "input": {
            "protein_1": {
                "STRUCTURE_PATH": [
                    "af2_predicted_structure.pdb"
                ]
            }
        },
        "output": {
            "protein_1": [
                "PFAM_ID",
                "INTERPRO_ID",
                "INTERPRO_NAME"
            ],
            "general": []
        }
    }
}


KeyboardInterrupt: 

In [18]:
for msg in json_repair.loads(response.content):
    if isinstance(msg, str):
        msg = json_repair.loads(msg)
    print(json.dumps(msg, indent=4))

{
    "sender": "query_parser",
    "analysis": "The user is asking about the family of a protein structure file named 'af2_predicted_structure.pdb'. This suggests that the file is a PDB-formatted structure, and the goal is to determine its Pfam family. The most relevant input type is STRUCTURE_PATH, which corresponds to the provided PDB file. The expected output would be the Pfam ID, as it represents the family classification of the protein.",
    "content": {
        "input": {
            "protein_1": {
                "STRUCTURE_PATH": [
                    "af2_predicted_structure.pdb"
                ]
            }
        },
        "output": {
            "protein_1": [
                "PFAM_ID"
            ]
        }
    }
}
{
    "sender": "planner",
    "content": {
        "user_request": "What family does af2_predicted_structure.pdb possibly belong to?",
        "analysis": "The user is asking about the Pfam family of a protein structure file named 'af2_predicted_structu

In [10]:
agent.workflow_manager.workflow2json()

{}

#### Update arg descriptions

In [6]:
input_args = []
output_args = []
for toolname, tool in tool_manager.tools.items():
    document = tool.config["document"]
    for para in document["required_parameters"]:
        if para["detailed_type"] not in input_args:
            input_args.append(para)
    for para in document["return_values"]:
        if para["detailed_type"] not in output_args:
            output_args.append(para)

In [7]:
len(input_args), len(output_args)

(73, 90)

#### Overall startup

In [8]:
# query = "scaffold the residues index 10-30 of pdb id 1xiw chain A. I want both the aa sequence and 3D structure of the designed scaffold."

In [20]:
message_pool = []

In [10]:
# query = "What family does AFASDFADSEWAFAWE possibly belong to?"

In [21]:
from agent.agent.subagents.query_parser import QueryParser
from agent.agent.subagents.plan_generator import PlanGenerator
from agent.agent.subagents.tool_connector import ToolConnector
from agent.agent.subagents.tool_executor import ToolExecutor

from agent.workflow.workflow_manager import WorkflowManager

query_parser = QueryParser(model_name="qwen-turbo", tool_manager=tool_manager)
plan_generator = PlanGenerator(model_name="qwen-turbo", tool_manager=tool_manager)
tool_connector = ToolConnector(model_name="qwen-turbo", tool_manager=tool_manager)
tool_executor = ToolExecutor(model_name="qwen-turbo", tool_manager=tool_manager)


workflow_manager = WorkflowManager(tool_manager=tool_manager)


message_pool = []

message_pool.append("")
for response in query_parser.stream_chat(query):
    message_pool[-1] = response.content
    # from IPython import display
    # display.clear_output(wait=True)
    # print(response.content)

query_parser_output = json_repair.loads(message_pool[-1])["content"]
workflow_manager.set_query_io(query_parser_output)
# print(workflow_manager.workflow2json())

MAX_PLAN_TURN = 3
plan = None

for _ in range(MAX_PLAN_TURN):

    if plan is None:
        message_pool.append("")
        for response in plan_generator.stream_chat(query, message_pool[:-1]):
            message_pool[-1] = response.content
            from IPython import display
            display.clear_output(wait=True)
            print(response.content)

        plan_generator_output = json_repair.loads(message_pool[-1])["content"]
        plan = json_repair.loads(plan_generator_output) if not isinstance(plan_generator_output, dict) else plan_generator_output
        plan = {k: v for k, v in plan.items() if k.startswith("step_")}
    workflow_manager.set_workflow(plan_generator_output)
    # print(workflow_manager.workflow2json())

    for i in range(1, len(plan) + 1):
        step_id = f"step_{i}"
        
        if plan[step_id].get('executed','').lower() == "yes" or plan[step_id].get('status', '').lower() == "executed":
            workflow_manager.current_step = step_id
            continue
        
        message_pool.append("")
        for response in tool_connector.stream_chat(user_request=query, plan=plan, step_id=step_id, message_pool=message_pool[:-1]):
            message_pool[-1] = response.content
            from IPython import display
            display.clear_output(wait=True)
            print(response.content)
        
        
        connector_output = json_repair.loads(message_pool[-1])["content"]
        if "error" in connector_output:
            try:
                workflow_manager.insert_tool_chain(connector_output)
                plan = workflow_manager.workflow2json()
            except:
                connector_msg ={
                    "sender": "tool_connector",
                    "analysis": "Tried all possible tool chains but failed to connect the tools.",
                    "content": json.dumps(connector_output, indent=4)
                }
                message_pool[-1] = json.dumps(connector_msg, indent=4)
                plan = None
            break
        workflow_manager.connect_tool_node(connector_output)
        # print(workflow_manager.workflow2json())
        
        message_pool.append("")
        for response in tool_executor.stream_chat(plan=plan, step_id=step_id, message_pool=message_pool[:-1]):
            message_pool[-1] = response.content
            from IPython import display
            display.clear_output(wait=True)
            print(response.content)
        
        tool_executor_output = json_repair.loads(message_pool[-1])["content"]
        if tool_executor_output.get("status", "error").lower() == "error":
            plan = None
            break
        else:
            workflow_manager.execute_toolnode(tool_executor_output)
        # print(workflow_manager.workflow2json())
        # break
        





{
    "sender": "tool_executor",
    "content": {
        "current_step": "step_4",
        "tool_name": "chat",
        "analysis": "The required parameters for the 'chat' tool are not explicitly defined, and no additional arguments are needed. The final answer can be derived from the results of the previous steps.",
        "tool_arg": {},
        "status": "success"
    }
}
--- Creating Workflow from configuration ---
Initializing node 'step_1' with tool 'pdb2aaseq':
Initializing node 'step_2' with tool 'seq2fasta':
Initializing node 'step_3' with tool 'hmmscan':
Initializing node 'step_4' with tool 'chat':
--- Workflow creation complete ---



KeyError: 'results'

In [12]:
for response in message_pool:
    print(response)

{
    "sender": "query_parser",
    "analysis": "The user is asking about the family of a predicted structure file, 'af2_predicted_structure.pdb'. This suggests that the file is a PDB-formatted structure, and the goal is to determine its Pfam family. The most relevant output type would be PFAM_ID, which identifies the protein family. Since the input is a PDB file, it corresponds to the STRUCTURE_PATH input type. No other inputs are explicitly mentioned in the request.",
    "content": {
        "input": {
            "protein_1": {
                "STRUCTURE_PATH": [
                    "af2_predicted_structure.pdb"
                ]
            }
        },
        "output": {
            "protein_1": [
                "PFAM_ID"
            ]
        }
    }
}

{
    "sender": "planner",
    "content": {
        "user_request": "What family does af2_predicted_structure.pdb possibly belong to?",
        "analysis": "The user is asking about the Pfam family of a predicted structure file

#### Query Parser

In [None]:
from agent.agent.subagents.query_parser import QueryParser


query_parser = QueryParser(tool_manager=tool_manager, model_name="qwen-turbo")

In [None]:
message_pool.append("")
for response in query_parser.stream_chat(query):
    message_pool[-1] = response.content
    from IPython import display
    display.clear_output(wait=True)
    print(response.content)

In [None]:
print(message_pool[-1])

#### Plan Generator

In [None]:
from agent.agent.subagents.plan_generator import PlanGenerator

plan_generator = PlanGenerator(tool_manager=tool_manager, model_name="qwen-turbo")

In [None]:
message_pool.append("")
for response in plan_generator.stream_chat(query, message_pool=message_pool):
    message_pool[-1] = response.content
    from IPython import display
    display.clear_output(wait=True)
    print(response.content)

In [None]:
print(response.content)

#### Initialize workflow

In [None]:
from agent.workflow.workflow_manager import WorkflowManager


plan = json_repair.loads(message_pool[-1])["content"]

# only keep 'step_x' keys
plan = {k: v for k, v in plan.items() if k.startswith('step_')}

workflow_manager = WorkflowManager(tool_manager=tool_manager)
workflow_manager.set_workflow(plan)

#### Tool Executor

In [None]:
from agent.agent.subagents.tool_connector import ToolConnector
from agent.agent.subagents.tool_executor import ToolExecutor

tool_connector = ToolConnector(tool_manager=tool_manager, model_name="qwen-turbo")
tool_executor = ToolExecutor(tool_manager=tool_manager, model_name="qwen-turbo")

In [None]:
message_pool = message_pool[:2]
message_pool

In [None]:
for i in range(len(plan)):
    step_id = f"step_{i + 1}"
    if plan[step_id]["tool"] == "chat":
        break
    
    message_pool.append("")
    for response in tool_connector.stream_chat(
        user_request=query,
        plan = plan,
        step_id = step_id, 
        message_pool = message_pool[:-1]
    ):
        message_pool[-1] = response.content
        from IPython import display
        display.clear_output(wait=True)
        print(response.content)
        
    connector_msg = json_repair.loads(message_pool[-1])["content"]
    if "error" in connector_msg:
        break
    workflow_manager.connect_tool_node(connector_msg)
    
    
    message_pool.append("")
    for response in tool_executor.stream_chat(
        plan = plan,
        step_id = step_id, 
        message_pool = message_pool
    ):
        message_pool[-1] = response.content
        from IPython import display
        display.clear_output(wait=True)
        print(response.content)

In [None]:
message_pool=message_pool[:5]
message_pool