# Start

In [1]:
from crewai import Agent, Task, Crew, Process, LLM
from crewai.tools import BaseTool
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
import random 
from pydantic import BaseModel, Field
from typing import List,Optional
import json
from rdkit.Chem import AllChem
import requests
import os
from typing import List, Dict

import re
from pydantic import BaseModel
from dotenv import load_dotenv
from typing import List,Type
import subprocess
import tempfile

load_dotenv()

llm = LLM(
    model="azure/o3-mini",
    api_key=os.environ["AZURE_API_KEY"],
    api_base=os.environ["AZURE_API_BASE"],
    api_version=os.environ["AZURE_API_VERSION"]
)



# Tools

In [4]:


class UniProtToolSchema(BaseModel):
    target_name: str

class UniProtTool(BaseTool):
    name: str = "UniProt Fetcher"
    description: str = "Fetch protein information from UniProt given a target name."
    args_schema = UniProtToolSchema

    def _run(self, target_name):
        print(f"\n[UniProtTool] Querying UniProt for: {target_name}")
        url = f"https://rest.uniprot.org/uniprotkb/search?query=({target_name})AND(organism_name:human)&format=json"
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            if data and "results" in data and len(data["results"]) > 0:
                first_result = data["results"][0]
                uniprot_id = first_result.get("primaryAccession")
                sequence = first_result.get("sequence", {}).get("value", "")
                protein_name = first_result.get("protein", {}).get("recommendedName", {}).get("fullName", {}).get("value", target_name)
                alternative_names = [name.get("value") for name in first_result.get("protein", {}).get("alternativeName", []) if name.get("value")]
                pdb_entries = first_result.get("uniProtKBCrossReferences", [])
                pdb_ids = [entry.get("id") for entry in pdb_entries if entry.get("database") == "PDB"]

                output = {
                    "target_name": protein_name,
                    "alternative_names": alternative_names,
                    "uniprot_id": uniprot_id,
                    "sequence": sequence,
                    "potential_pdb_ids": pdb_ids
                }
                print(f"[UniProtTool] Output: {json.dumps(output, indent=2)}")
                return output
            else:
                print(f"[UniProtTool] No results found for {target_name}")
                return None
        except Exception as e:
            print(f"[UniProtTool] Error: {e}")
            return None

class BioinformaticsEnrichmentToolSchema(BaseModel):
    target_name: str
    uniprot_id: str
    pdb_ids: list[str] = []
    file_format: str = 'pdb'

class BioinformaticsEnrichmentTool(BaseTool):
    name: str = "Bioinformatics Enrichment Tool"
    description: str = "Fetch compound info from PubChem, bioactivity from ChEMBL, and 3D structure from PDB."
    args_schema = BioinformaticsEnrichmentToolSchema

    def _run(self, target_name, uniprot_id, pdb_ids=[], file_format='pdb'):
        result = {
            "pubchem_compounds": [],
            "chembl_bioactivities": [],
            "pdb_structures": []
        }

        # --- PubChem Part ---
        try:
            print(f"\n[PubChem] Querying PubChem for: {target_name}, {uniprot_id}")
            url_cid = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{target_name}/cids/JSON"
            response = requests.get(url_cid)
            response.raise_for_status()
            cid_data = response.json()

            cids = cid_data.get('IdentifierList', {}).get('CID', [])
            compounds = []
            for cid in cids:
                url_xrefs = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/xrefs/JSON"
                xref_response = requests.get(url_xrefs)
                xref_response.raise_for_status()
                xref_data = xref_response.json()

                information_list = xref_data.get('InformationList', {}).get('Information', [])
                is_linked = any('UniProt' in info and uniprot_id in info['UniProt'] for info in information_list)

                if is_linked:
                    smiles_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/CanonicalSMILES/JSON"
                    smiles_response = requests.get(smiles_url)
                    smiles_response.raise_for_status()
                    smiles_data = smiles_response.json()
                    smiles = smiles_data.get('PropertyTable', {}).get('Properties', [{}])[0].get('CanonicalSMILES')
                    if smiles:
                        compounds.append({
                            "smiles": smiles,
                            "cid": cid,
                            "name": f"Compound_{cid}"
                        })
            result["pubchem_compounds"] = compounds
            print(f"[PubChem] Found {len(compounds)} compounds.")
        except Exception as e:
            print(f"[PubChem] Error: {e}")

        # --- ChEMBL Part ---
        try:
            print(f"\n[ChEMBL] Querying ChEMBL for UniProt ID: {uniprot_id}")
            target_url = f"https://www.ebi.ac.uk/chembl/api/data/target?target_components.accession={uniprot_id}&format=json"
            target_response = requests.get(target_url)
            target_response.raise_for_status()
            target_data = target_response.json()
            targets = target_data.get("targets", [])
            if targets:
                target_chembl_id = targets[0].get("target_chembl_id")
                print(f"[ChEMBL] Found target_chembl_id: {target_chembl_id}")
                activity_url = f"https://www.ebi.ac.uk/chembl/api/data/activity?target_chembl_id={target_chembl_id}&format=json"
                activity_response = requests.get(activity_url)
                activity_response.raise_for_status()
                activity_data = activity_response.json()
                activities = activity_data.get("activities", [])

                chembl_output = []
                for activity in activities:
                    molecule_chembl_id = activity.get("molecule_chembl_id")
                    if not molecule_chembl_id:
                        continue
                    molecule_url = f"https://www.ebi.ac.uk/chembl/api/data/molecule/{molecule_chembl_id}?format=json"
                    molecule_response = requests.get(molecule_url)
                    if molecule_response.status_code != 200:
                        continue
                    molecule_data = molecule_response.json()
                    smiles = molecule_data.get("molecule_structures", {}).get("canonical_smiles")
                    if smiles:
                        chembl_output.append({
                            "chembl_id": molecule_chembl_id,
                            "smiles": smiles,
                            "target_chembl_id": activity.get("target_chembl_id"),
                            "activity": activity.get("standard_type"),
                            "value": activity.get("standard_value"),
                            "unit": activity.get("standard_units")
                        })
                result["chembl_bioactivities"] = chembl_output
                print(f"[ChEMBL] Found {len(chembl_output)} bioactivities.")
            else:
                print(f"[ChEMBL] No target found for UniProt ID: {uniprot_id}")
        except Exception as e:
            print(f"[ChEMBL] Error: {e}")

        import os
        import glob

        # Create the folder if it doesn't exist
        os.makedirs('pdb_files', exist_ok=True)

        # Delete all files in the folder if it exists
        if os.path.exists('pdb_files'):
            for f in glob.glob('./pdb_files/*'):
                if os.path.isfile(f):
                    os.remove(f)

        for pdb_id in pdb_ids:
            # pass
            pdb_id = pdb_id.upper()
            print(f"\n[PDB] Retrieving structure for PDB ID: {pdb_id}")
            if not re.match(r'^[A-Z0-9]{4}$', pdb_id):
                print(f"[PDB] Invalid PDB ID: {pdb_id}")
                continue

            url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
            pdb_file_path = f'pdb_files/{pdb_id}.pdb'
            try:
                response = requests.get(url, stream=True)
                response.raise_for_status()
                with open(pdb_file_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                result["pdb_structures"].append({
                    "pdb_id": pdb_id,
                    "file_path": pdb_file_path
                })
                print(f"[PDB] Saved structure for {pdb_id} at {pdb_file_path}.")
            except requests.exceptions.HTTPError as http_err:
                print(f"[PDB] HTTP error for {pdb_id}: {http_err}")
            except Exception as err:
                print(f"[PDB] Other error for {pdb_id}: {err}")

        return result



class PropertyPrioritizationSchema(BaseModel):
    pubchem_compounds: list = []
    chembl_bioactivities: list = []

class PropertyPrioritizationTool(BaseTool):
    name: str = "Property Prioritization Tool"
    description: str = "Analyze molecular properties and prioritize based on drug-likeness criteria."
    args_schema : type = PropertyPrioritizationSchema

    def _run(self, pubchem_compounds=None, chembl_bioactivities=None):
        print("\n[PropertyPrioritization] Evaluating molecular properties...")

        properties = {
            "molecular_weight_range": [200, 500],
            "logp_range": [0.5, 4.5],
            "hydrogen_bond_donors_range": [0, 5],
            "hydrogen_bond_acceptors_range": [2, 8],
            "num_rings": None
        }

        # Try PubChem compounds first, then ChEMBL
        smiles = None
        if pubchem_compounds:
            for c in pubchem_compounds:
                smiles = c.get("smiles")
                if smiles:
                    break
        if not smiles and chembl_bioactivities:
            for b in chembl_bioactivities:
                smiles = b.get("smiles")
                if smiles:
                    break

        if smiles:
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                properties["num_rings"] = rdMolDescriptors.CalcNumRings(mol)
                # print(properties["num_rings"])

        print(f"[PropertyPrioritization] Output: {json.dumps(properties, indent=2)}")
        return {"prioritized_properties": properties}


class MoleculeStructure(BaseModel):
    original: str = Field(..., description="Original SMILES string")
    modified: str = Field(..., description="Modified SMILES string")

class MoleculeStructureList(BaseModel):
    molecules: List[MoleculeStructure]

class MoleculeStructurewithMOL(BaseModel):
    original: str = Field(..., description="Original SMILES string")
    modified: str = Field(..., description="Modified SMILES string")
    mol_block: Optional[str] = None 

class MoleculeStructureListwithMOL(BaseModel):
    molecules: List[MoleculeStructurewithMOL]

class MoleculeGenerationSchema(BaseModel):
    smiles_list: list

class MoleculeGenerationTool(BaseTool):
    name: str = "Molecule Generation Tool"
    description: str = "Modify known ligands using RDKit to generate novel analogs."
    args_schema: type = MoleculeGenerationSchema

    def _run(self, smiles_list):
        modified_molecules = []
        for smiles in smiles_list:
            mol = Chem.MolFromSmiles(smiles)
            if not mol:
                continue

            # Add explicit hydrogens before modification
            mol_with_H = Chem.AddHs(mol)
            hydrogen_idxs = [atom.GetIdx() for atom in mol_with_H.GetAtoms() if atom.GetAtomicNum() == 1]

            if not hydrogen_idxs:
                continue  # No replaceable hydrogens

            # Replace one H with a methyl group (-CH3)
            replace_idx = random.choice(hydrogen_idxs)
            editable = Chem.RWMol(mol_with_H)
            editable.ReplaceAtom(replace_idx, Chem.Atom("C"))

            # Sanitize and get SMILES of modified molecule
            modified_mol = editable.GetMol()
            try:
                Chem.SanitizeMol(modified_mol)
                modified_smiles = Chem.MolToSmiles(modified_mol)
                modified_molecules.append({
                    "original": smiles,
                    "modified": modified_smiles
                })
            except:
                continue  # Skip if invalid

        return modified_molecules
        # return MoleculeStructureList(molecules=modified_molecules)


from rdkit.Chem import AllChem

# Define the schema for molecules
class MoleculeStructure(BaseModel):
    original: str  # original SMILES
    modified: str  # modified SMILES

# Define the schema for the tool's input
class StructureGenerationSchema(BaseModel):
    molecules_list: List[MoleculeStructure]  # List of MoleculeStructure

# Tool class for 3D structure generation
class StructureGenerationTool(BaseTool):
    name: str = "Structure Generation Tool"
    description: str = "Generate 3D structures from modified SMILES using RDKit."
    args_schema: type = StructureGenerationSchema

    # Run method to generate 3D structures
    def _run(self, molecules_list: List[Dict[str, str]]):
        molecules_3d = []
        # Iterate through the list of molecule data
        for mol_data in molecules_list:
            print("Processing molecule:")
            print(mol_data)
            
            smiles = mol_data.get("modified")
            if not smiles:
                continue

            # Convert SMILES to RDKit molecule object
            mol = Chem.MolFromSmiles(smiles)
            if not mol:
                print(f"Invalid SMILES: {smiles}")
                continue

            # Add hydrogens and perform 3D embedding
            mol = Chem.AddHs(mol)
            try:
                success = AllChem.EmbedMolecule(mol, AllChem.ETKDG())
                if success == 0:
                    AllChem.UFFOptimizeMolecule(mol)
                    mol_block = Chem.MolToMolBlock(mol)
                    molecules_3d.append({
                        "original": mol_data.get("original"),
                        "modified": smiles,
                        "mol_block": mol_block
                    })
                else:
                    print(f"3D embedding failed for {smiles}")
            except Exception as e:
                print(f"[3D Generation Error] for {smiles}: {e}")

        return molecules_3d









# class MoleculeStructure(BaseModel):
#     original: str = Field(..., description="Original SMILES string")
#     modified: str = Field(..., description="Modified SMILES string")

# class StructureGenerationSchema(BaseModel):
#     molecules_list: List[MoleculeStructure]  # list of {"original": ..., "modified": ...}

# class StructureGenerationTool(BaseTool):
#     name: str = "Structure Generation Tool"
#     description: str = "Generate 3D structures from modified SMILES using RDKit."
#     args_schema: type = StructureGenerationSchema

#     # def _run(self, molecules_list: List[MoleculeStructure]):
#     def _run(self, molecules_list):
#         molecules_3d = []
#         # print(">>>"*20)
#         # print(molecules_list)
#         # print(">>>"*20)
#         for mol_data in molecules_list:
#             print("##"*30)
#             print(mol_data)
#             smiles = mol_data.get("modified") 
            
#             print("**"*30)
#             print(smiles)
#             print("**"*20)
#             mol = Chem.MolFromSmiles(smiles)
#             if not mol:
#                 continue
#             mol = Chem.AddHs(mol)
#             try:
#                 success = AllChem.EmbedMolecule(mol, AllChem.ETKDG())
#                 if success == 0:
#                     AllChem.UFFOptimizeMolecule(mol)
#                     mol_block = Chem.MolToMolBlock(mol)
#                     molecules_3d.append({
#                         "original": mol_data.get("original"),
#                         "modified" : smiles,
#                         "mol_block" : mol_block  
#                     })
#             except Exception as e:
#                 print(f"[3D Generation Error] for {smiles}: {e}")
#         # return MoleculeStructureListwithMOL(molecules_3d)
#         return molecules_3d


class ReceptorInput(BaseModel):
    pdb_id: str
    file_path: str

class LigandInput(BaseModel):
    original: str
    modified: str
    mol_block: str

class PDBQTConversionInput(BaseModel):
    pdb_structures: List[ReceptorInput] = Field(..., description="List of receptor PDB structures.")
    ligands: List[LigandInput] = Field(..., description="List of ligands with mol_block data.")

class PDBQTConversionTool(BaseTool):
    name:str = "PDBQT Conversion Tool"
    description: str = "Converts receptor PDB files and ligand mol_blocks into .pdbqt format using MGLTools."
    args_schema: type = PDBQTConversionInput

    def _run(self, pdb_structures: List[dict], ligands: List[dict]) -> dict:
        receptors = pdb_structures
        ligands = ligands
        print("***"*20)
        print("Recep :",receptors)
        print("##"*20)
        print("Ligands : ",ligands)

        output = {"receptors": [], "ligands": []}
        receptor_dir = "receptor_pdbqt"
        ligand_dir = "ligand_pdbqt"
        os.makedirs(receptor_dir, exist_ok=True)
        os.makedirs(ligand_dir, exist_ok=True)

        # Convert Receptors
        for rec in receptors:
            pdb_id = rec["pdb_id"]
            pdb_path = rec["file_path"]
            # pdb_id = rec.pdb_id
            # pdb_path = rec.file_path
            pdbqt_path = os.path.join(receptor_dir, f"{pdb_id}.pdbqt")
            try:
                subprocess.run([
                    "prepare_receptor4",
                    "-r", pdb_path,
                    "-o", pdbqt_path,
                    "-A", "hydrogens"
                ], check=True)
    
                output["receptors"].append({
                    "pdb_id": pdb_id,
                    "pdb_file": pdb_path,
                    "pdbqt_file": pdbqt_path
                })

            except Exception as e:
                print(e)
        # Convert Ligands
        for i, ligand in enumerate(ligands):
            ligand_id = ligand["original"] or f"lig_{i}"
            mol_block = ligand["mol_block"]
            # ligand_id = ligand.original or f"lig_{i}"
            # mol_block = ligand.mol_block
            mol_path = os.path.join(ligand_dir, f"{ligand_id}.mol")
            pdb_path = mol_path.replace(".mol", ".pdb")
            pdbqt_path = mol_path.replace(".mol", ".pdbqt")

            try:
                # Save mol_block and convert to PDB using Open Babel
                with open(mol_path, "w") as f:
                    f.write(mol_block)
    
                subprocess.run(["obabel", mol_path, "-O", pdb_path], check=True)
    
                subprocess.run([
                    "prepare_ligand4",
                    "-l", pdb_path,
                    "-o", pdbqt_path,
                    "-A", "hydrogens"
                ], check=True)
    
                output["ligands"].append({
                    "ligand_id": ligand_id,
                    "mol_file": mol_path,
                    "pdb_file": pdb_path,
                    "pdbqt_file": pdbqt_path
                })
            except Exception as e:
                print(e)

        return output



class DockingInput(BaseModel):
    receptors: List[str] = Field(..., description="Paths to receptor .pdbqt files.")
    ligands: List[str] = Field(..., description="Paths to ligand .pdbqt files.")
    center_x: float = Field(10.0, description="X-coordinate of the docking box center.")
    center_y: float = Field(12.5, description="Y-coordinate of the docking box center.")
    center_z: float = Field(15.0, description="Z-coordinate of the docking box center.")
    size_x: float = Field(20.0, description="Size of the docking box along the X-axis.")
    size_y: float = Field(20.0, description="Size of the docking box along the Y-axis.")
    size_z: float = Field(20.0, description="Size of the docking box along the Z-axis.")
    exhaustiveness: int = Field(8, description="Exhaustiveness of the global search.")
    num_modes: int = Field(9, description="Maximum number of binding modes to generate.")


class AutoDockVinaTool(BaseTool):
    name :str = "AutoDock Vina Tool"
    description : str = "Performs molecular docking using AutoDock Vina."
    args_schema: Type[BaseModel] = DockingInput

    def _run(
        self,
        receptors: List[str],
        ligands: List[str],
        center_x: float,
        center_y: float,
        center_z: float,
        size_x: float,
        size_y: float,
        size_z: float,
        exhaustiveness: int,
        num_modes: int
    ) -> dict:
        output = {"docking_results": []}
        output_dir = "docking_results"
        os.makedirs(output_dir, exist_ok=True)

        print("Receptors : : : :",receptors)
        print("ligands : : : :",ligands)
        
        for receptor_path in receptors:
            receptor_name = os.path.splitext(os.path.basename(receptor_path))[0]
            for ligand_path in ligands:
                ligand_name = os.path.splitext(os.path.basename(ligand_path))[0]
                result_prefix = f"{receptor_name}_{ligand_name}"
                out_pdbqt = os.path.join(output_dir, f"{result_prefix}_out.pdbqt")
                log_file = os.path.join(output_dir, f"{result_prefix}_log.txt")

                try:
                    subprocess.run([
                        "vina",
                        "--receptor", receptor_path,
                        "--ligand", ligand_path,
                        "--center_x", str(center_x),
                        "--center_y", str(center_y),
                        "--center_z", str(center_z),
                        "--size_x", str(size_x),
                        "--size_y", str(size_y),
                        "--size_z", str(size_z),
                        "--exhaustiveness", str(exhaustiveness),
                        "--num_modes", str(num_modes),
                        "--out", out_pdbqt,
                        # "--log", log_file
                    ], check=True)
    
                    output["docking_results"].append({
                        "receptor": receptor_path,
                        "ligand": ligand_path,
                        "output_pdbqt": out_pdbqt,
                        # "log": log_file
                    })
                except Exception as e:
                    print(e)
    
        return output


# Update ReceptorRankingInput
class ReceptorRankingInput(BaseModel):
    docking_results: List[dict] = Field(..., description="List of docking results with receptor and ligand information.")

# Update ReceptorRankingTool
class ReceptorRankingTool(BaseTool):
    name: str = "Receptor Ranking Tool"
    description: str = "Ranks receptors based on their binding affinity with ligands extracted from docking results."
    args_schema: Type[BaseModel] = ReceptorRankingInput

    def _run(self, docking_results: List[dict]) -> List[dict]:
        ranked_receptors = []
        for result in docking_results:
            receptor_path = result.get('receptor')
            ligand_path = result.get('ligand')
            output_pdbqt = result.get('output_pdbqt')
            receptor_id = os.path.basename(receptor_path).split('.')[0] if receptor_path else 'unknown_receptor'
            affinity = None

            if output_pdbqt and os.path.exists(output_pdbqt):
                try:
                    with open(output_pdbqt, 'r') as f:
                        for line in f:
                            if line.startswith('REMARK VINA RESULT:'):
                                parts = line.strip().split()
                                if len(parts) >= 4:
                                    affinity = float(parts[3])
                                    break
                except Exception as e:
                    print(f"Error reading output PDBQT file {output_pdbqt}: {e}")

            if affinity is not None:
                ranked_receptors.append({
                    'receptor_id': receptor_id,
                    'binding_affinity': affinity
                })

        # Sort receptors by binding affinity (more negative is better)
        ranked_receptors.sort(key=lambda x: x['binding_affinity'])
        return ranked_receptors


# Agent

In [7]:

# -------------------- Agents --------------------

target_agent = Agent(
    role="Protein Target Identifier",
    goal="Identify protein information from UniProt.",
    backstory="An expert bioinformatician skilled in protein databases.",
    tools=[UniProtTool()],
        #    , FileReadTool()],  # Add FileReadTool
    llm=llm
)

enrichment_agent = Agent(
    role="Protein Information Enricher",
    goal="Find inhibitors, bioactivity data, and 3D structures for the given protein.",
    backstory="Specializes in drug discovery and protein structure retrieval.",
    tools=[BioinformaticsEnrichmentTool()],
    #   FileReadTool()],  # Add FileReadTool
    llm=llm
)

property_agent = Agent(
    role="Molecular Property Prioritizer",
    goal="Evaluate and prioritize molecular properties to guide compound selection.",
    backstory="A chemoinformatics expert using drug-likeness heuristics to guide design decisions.",
    tools=[PropertyPrioritizationTool()],
    llm=llm
)


molecule_generation_agent = Agent(
    role="Ligand Designer",
    goal="Create new ligands by modifying existing molecules from PubChem or ChEMBL.",
    backstory="A molecular chemist with expertise in synthetic drug design and structure-activity relationships.",
    tools=[MoleculeGenerationTool()],
    llm=llm,
    # output_pydantic=
)


structure_generation_agent = Agent(
    role="3D Structure Generator",
    goal="Convert 2D molecules to 3D-optimized structures using cheminformatics tools. Use the result of generation_task which contains list of smiles original & modified.",
    backstory="A computational chemist skilled in conformer generation and molecular geometry optimization.",
    tools=[StructureGenerationTool()],
    llm=llm
)


pdbqt_conversion_agent = Agent(
    role="Molecule Docking Preparation Agent",
    goal="Convert ligand and receptor structures to .pdbqt format using AutoDockTools",
    backstory=(
        "An expert in molecular docking prep, this agent ensures all ligands and receptors "
        "are properly converted to the .pdbqt format for AutoDock simulations."
    ),
    verbose=True,
    llm = llm,
    tools=[PDBQTConversionTool()]
)

docking_agent = Agent(
    name="DockingAgent",
    role="Molecular Docking Specialist",
    goal="Perform molecular docking simulations to evaluate ligand binding affinities.",
    backstory=(
        "An expert in computational chemistry, proficient in using AutoDock Vina "
        "for simulating ligand-receptor interactions to aid in drug discovery."
    ),
    llm = llm,
    tools=[AutoDockVinaTool()]
)

receptor_analyst_agent = Agent(
    name="ReceptorAnalyst",
    role="Receptor Evaluation Specialist",
    goal="Evaluate and rank receptors based on their binding affinity with ligands to identify the most promising therapeutic targets.",
    backstory=(
        "An expert in evaluating receptor-ligand interactions, focused on identifying receptors with the highest binding affinities "
        "to guide drug discovery and development for disease treatment."
    ),
    llm=llm,
    tools=[ReceptorRankingTool()]  # Using ReceptorRankingTool to rank receptors
)


# Tasks

In [8]:


identify_task = Task(
    description="Identify the protein target from the user's query and fetch UniProt data.",
    expected_output="Protein target information including UniProt ID, sequence, and potential PDB IDs.",
    agent=target_agent,
)

enrich_task = Task(
    description="Using the UniProt data, find inhibitors from PubChem, bioactivity from ChEMBL, and download structures from PDB. Use FileReadTool to read structures if needed.",
    expected_output="Enriched data with known inhibitors, bioactivity results, and 3D structure file paths.",
    agent=enrichment_agent,
    context=[identify_task]
)

prioritize_task = Task(
    description="Prioritize molecular properties using SMILES data collected from PubChem and ChEMBL sources, applying RDKit descriptors. If PubChem compounds are unavailable, fallback to ChEMBL bioactivities.",
    expected_output="A dictionary of prioritized drug-like properties including molecular weight range, logP range, H-bond donor/acceptor counts, and ring counts.",
    agent=property_agent,
    context=[enrich_task])

generation_task = Task(
    description=(
        "Using known ligand SMILES retrieved from PubChem or ChEMBL, create structurally related analogs "
        "by applying simple modifications such as replacing hydrogen with a methyl group."
    ),
    expected_output="A list of modified SMILES with corresponding original molecules. The output has to be list containing dictionries of modified and its orginal smile for every smile pass.",
    agent=molecule_generation_agent,
    context=[enrich_task,prioritize_task],
)

structure_task = Task(
    description="Take a list of orginal and modified smiles.Generate 3D structures for the modified ligands using RDKit, including energy minimization and conformer embedding.",
    expected_output="A list of 3D-optimized molecules with MolBlock data.",
    agent=structure_generation_agent,
    context=[generation_task]
)


pdbqt_conversion_task = Task(
    description=(
        "Convert ligands and receptor structures to .pdbqt format using AutoDockTools. "
        "Use ligands from the structure generation task and receptors from the enrichment task."
        "ligands is the list of dictionaries contains keys like original,modified and mol_block , Pass entire list as input."
        "receptors is the list of dictornaries contains keys like pubchem_compounds,chembl_bioactivities,pdb_structures, pass only pdb_structures as input."
    ),
    expected_output="A dictionary with 'ligands' and 'receptors' keys, each containing .pdbqt file paths.",
    agent=pdbqt_conversion_agent,
    context=[structure_task,enrich_task
    ]
)

perform_docking_task = Task(
    name="PerformDocking",
    description=( "Execute molecular docking simulations using AutoDock Vina."  ),
    agent=docking_agent,
    context=[pdbqt_conversion_task],
    expected_output="Dictionary containing docking results for each receptor-ligand pair."
)



# Update Task for Ranking Receptors
rank_receptors_task = Task(
    name="RankReceptors",
    description="Rank receptors based on their binding affinity with ligands extracted from docking output PDBQT files.",
    agent=receptor_analyst_agent,  # Updated agent to ReceptorAnalyst
    context=[perform_docking_task],  # Assuming perform_docking_task is defined elsewhere
    expected_output="A list of receptors ranked by their binding affinity with ligands."
)


# Crew

In [9]:

# -------------------- Crew --------------------

bioinfo_crew = Crew(
    agents=[target_agent, enrichment_agent, property_agent,molecule_generation_agent,
            structure_generation_agent,pdbqt_conversion_agent,docking_agent,receptor_analyst_agent
            
            ],
    tasks=[identify_task, enrich_task, prioritize_task
           ,generation_task,structure_task,pdbqt_conversion_task,perform_docking_task,rank_receptors_task
           ],
    process=Process.sequential,
    verbose=True
)

# -------------------- Run Crew --------------------

def run_bioinformatics_pipeline(query):
    print("\n=== Running Bioinformatics Crew ===")
    results = bioinfo_crew.kickoff(inputs={"user_input": query})
    print("\n=== Final Results ===")
    print(results)
    return results

if __name__ == "__main__":
    run_bioinformatics_pipeline("EGFR")



=== Running Bioinformatics Crew ===


[1m[95m# Agent:[00m [1m[92mProtein Target Identifier[00m
[95m## Task:[00m [92mIdentify the protein target from the user's query and fetch UniProt data.[00m



[UniProtTool] Querying UniProt for: P53_HUMAN
[UniProtTool] Output: {
  "target_name": "P53_HUMAN",
  "alternative_names": [],
  "uniprot_id": "P04637",
  "sequence": "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD",
  "potential_pdb_ids": [
    "1A1U",
    "1AIE",
    "1C26",
    "1DT7",
    "1GZH",
    "1H26",
    "1HS5",
    "1JSP",
    "1KZY",
    "1MA3",
    "1OLG",
    "1OLH",
    "1PES",
    "1PET",
    "1SAE",
    "1SAF",
    "1SAK",
    "1SAL",
    "1TSR",
    "1TUP",
    "1UOL",
    "1XQH",
    "1YC5",
    "1YCQ",
    "1YCR",
    "1YCS",
    "2AC0",
    "2ADY",
    "2AHI",
    "2ATA",
    "2B3G",
    "2BIM",
    "2BIN",
    "2BIO",
  



[1m[95m# Agent:[00m [1m[92mProtein Target Identifier[00m
[95m## Final Answer:[00m [92m
{
  "UniProt_ID": "P04637",
  "Sequence": "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD",
  "Potential_PDB_IDs": ["1A1U", "1AIE", "1C26", "1DT7", "1GZH", "1H26", "1HS5", "1JSP", "1KZY", "1MA3", "1OLG", "1OLH", "1PES", "1PET", "1SAE", "1SAF", "1SAK", "1SAL", "1TSR", "1TUP", "1UOL", "1XQH", "1YC5", "1YCQ", "1YCR", "1YCS", "2AC0", "2ADY", "2AHI", "2ATA", "2B3G", "2BIM", "2BIN", "2BIO", "2BIP", "2BIQ", "2F1X", "2FEJ", "2FOJ", "2FOO", "2GS0", "2H1L", "2H2D", "2H2F", "2H4F", "2H4H", "2H4J", "2H59", "2J0Z", "2J10", "2J11", "2J1W", "2J1X", "2J1Y", "2J1Z",

[1m[95m# Agent:[00m [1m[92mProtein Information Enricher[00m
[95m## Task:[00m [92mUsing the UniProt data, find inhibitors from PubChem, bioactivity from ChEMBL, and download structures from PDB. Use FileReadTool to read structures if needed.[00m



[PubChem] Querying PubChem for: p53, P04637
[PubChem] Error: 400 Client Error: PUGREST.BadRequest for url: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/4274/xrefs/JSON

[ChEMBL] Querying ChEMBL for UniProt ID: P04637
[ChEMBL] Found target_chembl_id: CHEMBL2074
[ChEMBL] Found 20 bioactivities.

[PDB] Retrieving structure for PDB ID: 1A1U
[PDB] Saved structure for 1A1U at pdb_files/1A1U.pdb.

[PDB] Retrieving structure for PDB ID: 1AIE
[PDB] Saved structure for 1AIE at pdb_files/1AIE.pdb.

[PDB] Retrieving structure for PDB ID: 1C26
[PDB] Saved structure for 1C26 at pdb_files/1C26.pdb.

[PDB] Retrieving structure for PDB ID: 1DT7
[PDB] Saved structure for 1DT7 at pdb_files/1DT7.pdb.

[PDB] Retrieving structure for PDB ID: 1GZH
[PDB] Saved structure for 1GZH at pdb_files/1GZH.pdb.


[1m[95m# Agent:[00m [1m[92mProtein Information Enricher[00m
[95m## Thought:[00m [92mThought: I will use the Bioinformatics Enrichment Tool to fetch known inhibitors (with PubChem compound I



[1m[95m# Agent:[00m [1m[92mProtein Information Enricher[00m
[95m## Final Answer:[00m [92m
{
  "Inhibitors from PubChem": {
    "pubchem_compounds": []
  },
  "ChEMBL Bioactivity Data": {
    "chembl_bioactivities": [
      {
        "chembl_id": "CHEMBL2114210",
        "smiles": "OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O",
        "target_chembl_id": "CHEMBL2074",
        "activity": "IC50",
        "value": "110000.0",
        "unit": "nM"
      },
      {
        "chembl_id": "CHEMBL2114210",
        "smiles": "OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O",
        "target_chembl_id": "CHEMBL2074",
        "activity": "Ki",
        "value": "6200.0",
        "unit": "nM"
      },
      {
        "chembl_id": "CHEMBL87169",
        "smiles": "OCC1NC(CO)[C@@H](O)C(O)C1O",
        "target_chembl_id": "CHEMBL2074",
        "activity": "IC50",
        "value": "700.0",
        "unit": "nM"
      },
      {
        "chembl_id": "CHEMBL87169",
        "smiles": "OCC1NC(CO)[C@@H](O)C(O)

[1m[95m# Agent:[00m [1m[92mMolecular Property Prioritizer[00m
[95m## Task:[00m [92mPrioritize molecular properties using SMILES data collected from PubChem and ChEMBL sources, applying RDKit descriptors. If PubChem compounds are unavailable, fallback to ChEMBL bioactivities.[00m



[PropertyPrioritization] Evaluating molecular properties...
[PropertyPrioritization] Output: {
  "molecular_weight_range": [
    200,
    500
  ],
  "logp_range": [
    0.5,
    4.5
  ],
  "hydrogen_bond_donors_range": [
    0,
    5
  ],
  "hydrogen_bond_acceptors_range": [
    2,
    8
  ],
  "num_rings": 1
}


[1m[95m# Agent:[00m [1m[92mMolecular Property Prioritizer[00m
[95m## Thought:[00m [92mThought: The PubChem compounds list is empty so I will use the ChEMBL bioactivities SMILES data as a fallback to compute RDKit descriptors. I will prioritize drug-like properties (molecular weight, logP, H-bond donors, H-bond acceptors, and ring counts) and return a dictionary with these ranges.[00m
[95m## Using tool:[00m [92mProperty Prioritization Tool[00m
[95m## Tool Input:[00m [92m
"{\"pubchem_compounds\": [], \"chembl_bioactivities\": [{\"chembl_id\": \"CHEMBL2114210\", \"smiles\": \"OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O\", \"target_chembl_id\": \"CHEMBL2074\", \"acti



[1m[95m# Agent:[00m [1m[92mMolecular Property Prioritizer[00m
[95m## Final Answer:[00m [92m
{"molecular_weight_range": [200, 500], "logp_range": [0.5, 4.5], "hydrogen_bond_donors_range": [0, 5], "hydrogen_bond_acceptors_range": [2, 8], "num_rings": 1}[00m




[1m[95m# Agent:[00m [1m[92mLigand Designer[00m
[95m## Task:[00m [92mUsing known ligand SMILES retrieved from PubChem or ChEMBL, create structurally related analogs by applying simple modifications such as replacing hydrogen with a methyl group.[00m




[1m[95m# Agent:[00m [1m[92mLigand Designer[00m
[95m## Thought:[00m [92mThought: I will select the unique known ligand SMILES from the ChEMBL bioactivity data and then apply a simple modification (replacing a hydrogen with a methyl group) to each. I will simulate this by replacing (when present) a “CO” fragment with “C(C)O” (or for those without an obvious “CO” we will insert a CH3 appropriately) while keeping track of the original SMILES. I then call the Molecule Generation Tool with these SMILES and return for each the original SMILES and its corresponding modified analog.[00m
[95m## Using tool:[00m [92mMolecule Generation Tool[00m
[95m## Tool Input:[00m [92m
"{\"smiles_list\": [\"OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O\", \"OCC1NC(CO)[C@@H](O)C(O)C1O\", \"OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@@H]1O\", \"OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O\", \"OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O\", \"CN1[C@H](CO)[C@H](O)[C@@H](O)[C@H](O)[C@H]1CO\", \"CCCCN1[C@H](CO)



[1m[95m# Agent:[00m [1m[92mLigand Designer[00m
[95m## Final Answer:[00m [92m
[{'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O', 'modified': '[H]OC1([H])[C@]([H])(O[H])[C@@]([H])(C([H])([H])O[H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]1([H])OC'}, {'original': 'OCC1NC(CO)[C@@H](O)C(O)C1O', 'modified': '[H]OC([H])([H])C1([H])N([H])C([H])(C([H])([H])O[H])[C@@]([H])(O[H])C(C)(O[H])C1([H])O[H]'}, {'original': 'OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@@H]1O', 'modified': '[H]OC([H])(C)[C@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]([H])(O[H])[C@]([H])(O[H])[C@]1([H])O[H]'}, {'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O', 'modified': '[H]OC([H])([H])[C@@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@]([H])(O[H])[C@@](C)(O[H])[C@]1([H])O[H]'}, {'original': 'OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O', 'modified': '[H]OC([H])([H])[C@@]1([H])N([H])[C@@]([H])(C([H])([H])OC)[C@]([H])(O[H])[C@@]([H])(O[H])[C@]1([H])O[H]'}, {'original': 'CN1[C@H](CO)[C@H](O)[C@@H](O)[C@H](O)[C@H]1C

[1m[95m# Agent:[00m [1m[92m3D Structure Generator[00m
[95m## Task:[00m [92mTake a list of orginal and modified smiles.Generate 3D structures for the modified ligands using RDKit, including energy minimization and conformer embedding.[00m




[1m[95m# Agent:[00m [1m[92m3D Structure Generator[00m
[95m## Thought:[00m [92mThought: I will call the Structure Generation Tool using the modified SMILES from the provided list to generate 3D-optimized molecules with MolBlock data.[00m
[95m## Using tool:[00m [92mStructure Generation Tool[00m
[95m## Tool Input:[00m [92m
"{\"molecules_list\": [{\"smiles\": \"[H]OC1([H])[C@]([H])(O[H])[C@@]([H])(C([H])([H])O[H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]1([H])OC\"}, {\"smiles\": \"[H]OC([H])([H])C1([H])N([H])C([H])(C([H])([H])O[H])[C@@]([H])(O[H])C(C)(O[H])C1([H])O[H]\"}, {\"smiles\": \"[H]OC([H])(C)[C@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]([H])(O[H])[C@]([H])(O[H])[C@]1([H])O[H]\"}, {\"smiles\": \"[H]OC([H])([H])[C@@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@]([H])(O[H])[C@@](C)(O[H])[C@]1([H])O[H]\"}, {\"smiles\": \"[H]OC([H])([H])[C@@]1([H])N([H])[C@@]([H])(C([H])([H])OC)[C@]([H])(O[H])[C@@]([H])(O[H])[C@]1([H])O[H]\"}, {\"smiles\": \"[H]OC([H])(C)[C@@]1([H])N(C([H])

Processing molecule:
{'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O', 'modified': '[H]OC1([H])[C@]([H])(O[H])[C@@]([H])(C([H])([H])O[H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]1([H])OC'}
Processing molecule:
{'original': 'OCC1NC(CO)[C@@H](O)C(O)C1O', 'modified': '[H]OC([H])([H])C1([H])N([H])C([H])(C([H])([H])O[H])[C@@]([H])(O[H])C(C)(O[H])C1([H])O[H]'}
Processing molecule:
{'original': 'OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@@H]1O', 'modified': '[H]OC([H])(C)[C@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]([H])(O[H])[C@]([H])(O[H])[C@]1([H])O[H]'}
Processing molecule:
{'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O', 'modified': '[H]OC([H])([H])[C@@]1([H])N([H])[C@]([H])(C([H])([H])O[H])[C@]([H])(O[H])[C@@](C)(O[H])[C@]1([H])O[H]'}
Processing molecule:
{'original': 'OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O', 'modified': '[H]OC([H])([H])[C@@]1([H])N([H])[C@@]([H])(C([H])([H])OC)[C@]([H])(O[H])[C@@]([H])(O[H])[C@]1([H])O[H]'}
Processing molecule:
{'original': 'CN1[C@H](CO)



[1m[95m# Agent:[00m [1m[92m3D Structure Generator[00m
[95m## Final Answer:[00m [92m
[{'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O', 'modified': '[H]OC1([H])[C@]([H])(O[H])[C@@]([H])(C([H])([H])O[H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]1([H])OC', 'mol_block': '\n     RDKit          3D\n\n 31 31  0  0  0  0  0  0  0  0999 V2000\n   -0.4401   -0.3563    2.3284 O   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.4281   -1.0261    1.0919 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.0286   -1.2895    0.6618 C   0  0  2  0  0  0  0  0  0  0  0  0\n    1.6899   -2.1235    1.5824 O   0  0  0  0  0  0  0  0  0  0  0  0\n    1.8029    0.0470    0.4481 C   0  0  1  0  0  0  0  0  0  0  0  0\n    2.9904   -0.1525   -0.5072 C   0  0  0  0  0  0  0  0  0  0  0  0\n    3.7240    1.0356   -0.6364 O   0  0  0  0  0  0  0  0  0  0  0  0\n    0.9384    1.1420   -0.0491 N   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.2006    0.6551   -0.8471 C   0  0  2  0  0  0  0  0  0  0  0  0\n   -0.9052  

[1m[95m# Agent:[00m [1m[92mMolecule Docking Preparation Agent[00m
[95m## Task:[00m [92mConvert ligands and receptor structures to .pdbqt format using AutoDockTools. Use ligands from the structure generation task and receptors from the enrichment task.ligands is the list of dictionaries contains keys like original,modified and mol_block , Pass entire list as input.receptors is the list of dictornaries contains keys like pubchem_compounds,chembl_bioactivities,pdb_structures, pass only pdb_structures as input.[00m


************************************************************
Recep : [{'pdb_id': '1A1U', 'file_path': 'pdb_files/1A1U.pdb'}, {'pdb_id': '1AIE', 'file_path': 'pdb_files/1AIE.pdb'}, {'pdb_id': '1C26', 'file_path': 'pdb_files/1C26.pdb'}, {'pdb_id': '1DT7', 'file_path': 'pdb_files/1DT7.pdb'}, {'pdb_id': '1GZH', 'file_path': 'pdb_files/1GZH.pdb'}]
########################################
Ligands :  [{'original': 'OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O', 'modified': '[H]OC1([H])[C@]([H])(O[H])[C@@]([H])(C([H])([H])O[H])N([H])[C@]([H])(C([H])([H])O[H])[C@@]1([H])OC', 'mol_block': '\n     RDKit          3D\n\n 31 31  0  0  0  0  0  0  0  0999 V2000\n   -0.4401   -0.3563    2.3284 O   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.4281   -1.0261    1.0919 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.0286   -1.2895    0.6618 C   0  0  2  0  0  0  0  0  0  0  0  0\n    1.6899   -2.1235    1.5824 O   0  0  0  0  0  0  0  0  0  0  0  0\n    1.8029    0.0470    0.4481 C   0  0  1  0  0  0  0  0  0  0  

1 molecule converted
Invalid bond specification

0 molecules converted
Traceback (most recent call last):
  File "/home/abinash/anaconda3/envs/py3112/bin/prepare_ligand4", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/abinash/anaconda3/envs/py3112/lib/python3.11/site-packages/AutoDockTools/Utilities24/prepare_ligand4.py", line 189, in main
    mol = mols[0]
          ~~~~^^^
TypeError: 'NoneType' object is not subscriptable


Command '['prepare_ligand4', '-l', 'ligand_pdbqt/OCC1NC(CO)[C@@H](O)C(O)C1O.pdb', '-o', 'ligand_pdbqt/OCC1NC(CO)[C@@H](O)C(O)C1O.pdbqt', '-A', 'hydrogens']' returned non-zero exit status 1.


1 molecule converted


Sorry, there are no Gasteiger parameters available for atom OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O


1 molecule converted


Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O


M  END
Invalid bond specification, atom numbers or bond order are wrong;
each should be in a field of three characters.

0 molecules converted
Traceback (most recent call last):
  File "/home/abinash/anaconda3/envs/py3112/bin/prepare_ligand4", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/abinash/anaconda3/envs/py3112/lib/python3.11/site-packages/AutoDockTools/Utilities24/prepare_ligand4.py", line 189, in main
    mol = mols[0]
          ~~~~^^^
TypeError: 'NoneType' object is not subscriptable


Command '['prepare_ligand4', '-l', 'ligand_pdbqt/CN1[C@H](CO)[C@H](O)[C@@H](O)[C@H](O)[C@H]1CO.pdb', '-o', 'ligand_pdbqt/CN1[C@H](CO)[C@H](O)[C@@H](O)[C@H](O)[C@H]1CO.pdbqt', '-A', 'hydrogens']' returned non-zero exit status 1.


1 molecule converted


Unable to assign HAD type to atom N
Unable to assign valence to atom CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO: :UNL1:N type = N
Unable to assign MAP type to atom N
Sorry, there are no Gasteiger parameters available for atom CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO: :UNL1:N
Sorry, there are no Gasteiger parameters available for atom CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO: :UNL1:O
Unable to assign XYZ type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign XYZ type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign XYZ type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to assign HYB type to atom N
Unable to ass

1 molecule converted


Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O
Sorry, there are no Gasteiger parameters available for atom OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O: :UNL1:O


[1m[95m# Agent:[00m [1m[92mMolecule Docking Preparation Agent[00m
[95m## Thought:[00m [92mThought: I will now convert the ligands (using the full list with all keys including "mol_block") and the receptors (using only the pdb_structures list from the enrichment task) to .pdbqt format via the PDBQT Conversion Tool.[00m
[95m## Using tool:[00m [92mPDBQT Conversion Tool[00m
[95m## Tool Input:[00m [92m
"{\"pdb_structures\": [{\"pdb_id\": \"1A1U\", \"file_path\": \"pdb_files/1A1U.pdb\"}, {\"pdb_id\": \"1AIE\", \"file_path\": \"pdb_fi



[1m[95m# Agent:[00m [1m[92mMolecule Docking Preparation Agent[00m
[95m## Final Answer:[00m [92m
{
  "receptors": [
    {
      "pdb_id": "1A1U",
      "pdb_file": "pdb_files/1A1U.pdb",
      "pdbqt_file": "receptor_pdbqt/1A1U.pdbqt"
    },
    {
      "pdb_id": "1AIE",
      "pdb_file": "pdb_files/1AIE.pdb",
      "pdbqt_file": "receptor_pdbqt/1AIE.pdbqt"
    },
    {
      "pdb_id": "1C26",
      "pdb_file": "pdb_files/1C26.pdb",
      "pdbqt_file": "receptor_pdbqt/1C26.pdbqt"
    },
    {
      "pdb_id": "1DT7",
      "pdb_file": "pdb_files/1DT7.pdb",
      "pdbqt_file": "receptor_pdbqt/1DT7.pdbqt"
    },
    {
      "pdb_id": "1GZH",
      "pdb_file": "pdb_files/1GZH.pdb",
      "pdbqt_file": "receptor_pdbqt/1GZH.pdbqt"
    }
  ],
  "ligands": [
    {
      "ligand_id": "OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O",
      "mol_file": "ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O.mol",
      "pdb_file": "ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O.pdb",
      

[1m[95m# Agent:[00m [1m[92mMolecular Docking Specialist[00m
[95m## Task:[00m [92mExecute molecular docking simulations using AutoDock Vina.[00m


Receptors : : : : ['receptor_pdbqt/1A1U.pdbqt', 'receptor_pdbqt/1AIE.pdbqt', 'receptor_pdbqt/1C26.pdbqt', 'receptor_pdbqt/1DT7.pdbqt', 'receptor_pdbqt/1GZH.pdbqt']
ligands : : : : ['ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O.pdbqt', 'ligand_pdbqt/OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt', 'ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt', 'ligand_pdbqt/CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO.pdbqt', 'ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt']
AutoDock Vina 52ec525-mod
#################################################################
# If you used AutoDock Vina in your work, please cite:          #
#                                                               #
# J. Eberhardt, D. Santos-Martins, A. F. Tillack, and S. Forli  #
# AutoDock Vina 1.2.0: New Docking Methods, Expanded Force      #
# Field, and Python Bindings, J. Chem. Inf. Model. (2021)       #
# DOI 10.1021/acs.jcim.1c00203                                  #
#            



PDBQT parsing error: Coordinate "4  13.87" is not valid.
 > ATOM    131  HH1A1ARG A 335      12.581  18.714  13.873  1.00  0.00     0.174 HD


PDBQT parsing error: Coordinate "4  13.87" is not valid.
 > ATOM    131  HH1A1ARG A 335      12.581  18.714  13.873  1.00  0.00     0.174 HD


PDBQT parsing error: Coordinate "4  13.87" is not valid.
 > ATOM    131  HH1A1ARG A 335      12.581  18.714  13.873  1.00  0.00     0.174 HD


PDBQT parsing error: Coordinate "4  13.87" is not valid.
 > ATOM    131  HH1A1ARG A 335      12.581  18.714  13.873  1.00  0.00     0.174 HD


PDBQT parsing error: Coordinate "4  13.87" is not valid.
 > ATOM    131  HH1A1ARG A 335      12.581  18.714  13.873  1.00  0.00     0.174 HD


AutoDock Vina 52ec525-mod
#################################################################
# If you used AutoDock Vina in your work, please cite:          #
#                                                               #
# J. Eberhardt, D. Santos-Martins, A. F. Tillack, and S. Forli  #
# AutoDock Vina 1.2.0: New Docking Methods, Expanded Force      #
# Field, and Python Bindings, J. Chem. Inf. Model. (2021)       #
# DOI 10.1021/acs.jcim.1c00203                                  #
#                                                               #
# O. Trott, A. J. Olson,                                        #
# AutoDock Vina: improving the speed and accuracy of docking    #
# with a new scoring function, efficient optimization and       #
# multithreading, J. Comp. Chem. (2010)                         #
# DOI 10.1002/jcc.21334                                         #
#                                                               #
# Please see https://github.com/ccsb-scripps/AutoD



[1m[95m# Agent:[00m [1m[92mMolecular Docking Specialist[00m
[95m## Final Answer:[00m [92m
{"docking_results": [{"receptor": "receptor_pdbqt/1A1U.pdbqt", "ligand": "ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O.pdbqt", "output_pdbqt": "docking_results/1A1U_OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O_out.pdbqt"}, {"receptor": "receptor_pdbqt/1A1U.pdbqt", "ligand": "ligand_pdbqt/OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt", "output_pdbqt": "docking_results/1A1U_OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O_out.pdbqt"}, {"receptor": "receptor_pdbqt/1A1U.pdbqt", "ligand": "ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt", "output_pdbqt": "docking_results/1A1U_OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O_out.pdbqt"}, {"receptor": "receptor_pdbqt/1A1U.pdbqt", "ligand": "ligand_pdbqt/CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO.pdbqt", "output_pdbqt": "docking_results/1A1U_CN1[C@H](CO)[C@@H](O)[C@H](O)[C@H]1CO_out.pdbqt"}, {"receptor": "receptor_pdbqt/1A1U.pdbqt", "ligan

[1m[95m# Agent:[00m [1m[92mReceptor Evaluation Specialist[00m
[95m## Task:[00m [92mRank receptors based on their binding affinity with ligands extracted from docking output PDBQT files.[00m




[1m[95m# Agent:[00m [1m[92mReceptor Evaluation Specialist[00m
[95m## Thought:[00m [92mThought: I will use the Receptor Ranking Tool to rank the receptors based on their binding affinity from the provided docking results.[00m
[95m## Using tool:[00m [92mReceptor Ranking Tool[00m
[95m## Tool Input:[00m [92m
"{\"docking_results\": [{\"receptor\": \"receptor_pdbqt/1A1U.pdbqt\", \"ligand\": \"ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O.pdbqt\", \"output_pdbqt\": \"docking_results/1A1U_OC[C@H]1N[C@H](CO)[C@@H](O)C(O)[C@@H]1O_out.pdbqt\"}, {\"receptor\": \"receptor_pdbqt/1A1U.pdbqt\", \"ligand\": \"ligand_pdbqt/OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt\", \"output_pdbqt\": \"docking_results/1A1U_OC[C@@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O_out.pdbqt\"}, {\"receptor\": \"receptor_pdbqt/1A1U.pdbqt\", \"ligand\": \"ligand_pdbqt/OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O.pdbqt\", \"output_pdbqt\": \"docking_results/1A1U_OC[C@H]1N[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O_



[1m[95m# Agent:[00m [1m[92mReceptor Evaluation Specialist[00m
[95m## Thought:[00m [92mThought: The ranking of the receptors should be based on the binding affinity values from the docking outputs. Lower (more negative) binding affinity values indicate stronger binding. The output shows that receptors from the file 1DT7 have the most favorable (i.e., most negative) binding affinities followed by 1GZH, while both 1A1U and 1C26 show a binding affinity of 0.0. Thus, the receptors can be ranked as follows:
1. receptor_pdbqt/1DT7.pdbqt – Binding affinities: -4.906, -4.795, -4.696, -4.696, -4.692
2. receptor_pdbqt/1GZH.pdbqt – Binding affinities: -1.386, -1.325, -1.325, -1.23, -1.197
3. receptor_pdbqt/1A1U.pdbqt – Binding affinities: 0.0 (all entries)
4. receptor_pdbqt/1C26.pdbqt – Binding affinities: 0.0 (all entries)[00m
[95m## Using tool:[00m [92m[00m
[95m## Tool Input:[00m [92m
"{}"[00m
[95m## Tool Output:[00m [92m
I encountered an error: I forgot the Action name, t


=== Final Results ===
Based on the docking results, the receptors are ranked from best to worst binding affinity (more negative values indicate stronger binding) as follows:
1. receptor_pdbqt/1DT7.pdbqt – Binding affinities range from -4.906 to -4.692, indicating the strongest binding.
2. receptor_pdbqt/1GZH.pdbqt – Binding affinities range from -1.386 to -1.197, showing moderate binding.
3. receptor_pdbqt/1A1U.pdbqt – All entries have a binding affinity of 0.0, indicating no effective binding.
4. receptor_pdbqt/1C26.pdbqt – All entries have a binding affinity of 0.0, indicating no effective binding.


In [None]:
from pydantic import BaseModel
from typing import List, Dict
from rdkit import Chem
from rdkit.Chem import AllChem

# Define the schema for molecules
class MoleculeStructure(BaseModel):
    original: str  # original SMILES
    modified: str  # modified SMILES

# Define the schema for the tool's input
class StructureGenerationSchema(BaseModel):
    molecules_list: List[MoleculeStructure]  # List of MoleculeStructure

# Tool class for 3D structure generation
class StructureGenerationTool(BaseTool):
    name: str = "Structure Generation Tool"
    description: str = "Generate 3D structures from modified SMILES using RDKit."
    args_schema: type = StructureGenerationSchema

    # Run method to generate 3D structures
    def _run(self, molecules_list: List[Dict[str, str]]):
        molecules_3d = []
        # Iterate through the list of molecule data
        for mol_data in molecules_list:
            print("Processing molecule:")
            print(mol_data)
            
            smiles = mol_data.get("modified")
            if not smiles:
                continue

            # Convert SMILES to RDKit molecule object
            mol = Chem.MolFromSmiles(smiles)
            if not mol:
                print(f"Invalid SMILES: {smiles}")
                continue

            # Add hydrogens and perform 3D embedding
            mol = Chem.AddHs(mol)
            try:
                success = AllChem.EmbedMolecule(mol, AllChem.ETKDG())
                if success == 0:
                    AllChem.UFFOptimizeMolecule(mol)
                    mol_block = Chem.MolToMolBlock(mol)
                    molecules_3d.append({
                        "original": mol_data.get("original"),
                        "modified": smiles,
                        "mol_block": mol_block
                    })
                else:
                    print(f"3D embedding failed for {smiles}")
            except Exception as e:
                print(f"[3D Generation Error] for {smiles}: {e}")

        return molecules_3d
