In [2]:
from searchapi import (
    text_search,
    sequence_search,
    structure_search,
    chemical_search,
    organism_search,
    method_search,
    high_quality_structures,
)
from pydantic import BaseModel

In [None]:
TOOL_DESCRIPTIONS = {
    "text_search": "Search PDB entries using text queries like protein names or PDB IDs.",
    "sequence_search": "Find structures with similar amino acid or nucleotide sequences using BLAST-like algorithm.",
    "structure_search": "Find structures with similar 3D shapes using a reference PDB structure.",
    "chemical_search": "Find structures containing specific chemical compounds using SMILES or InChI identifiers.",
    "organism_search": "Find structures from a specific organism using its scientific name.",
    "method_search": "Find structures determined by a specific experimental method like X-ray or NMR.",
    "high_quality_structures": "Find high-quality X-ray structures based on resolution and R-work criteria."
}

In [16]:
high_quality_structures(
    max_resolution=2.0,  # Maximum resolution in Angstroms
    max_r_work=0.25,    # Maximum R-work value
    limit=10,           # Return up to 10 results
    timeout=30,         # Request timeout in seconds
    max_retries=3       # Maximum number of retry attempts
)

{'pdb_ids': ['3NIR',
  '5D8V',
  '1EJG',
  '3P4J',
  '5NW3',
  '1I0T',
  '7ATG',
  '1J8G',
  '1UCS',
  '3X2M'],
 'total_count': 95259,
 'scores': {'3NIR': 1.0,
  '5D8V': 1.0,
  '1EJG': 1.0,
  '3P4J': 1.0,
  '5NW3': 1.0,
  '1I0T': 1.0,
  '7ATG': 1.0,
  '1J8G': 1.0,
  '1UCS': 1.0,
  '3X2M': 1.0},
 'returned_count': 10}

In [15]:
method_search(
    method="X-RAY DIFFRACTION",  # Experimental method (e.g., "X-RAY DIFFRACTION", "ELECTRON MICROSCOPY", "NMR")
    limit=10,  # Return up to 10 results
    timeout=30,  # Request timeout in seconds
    max_retries=3  # Maximum number of retry attempts
)

{'pdb_ids': ['100D',
  '101D',
  '101M',
  '102D',
  '102L',
  '102M',
  '103L',
  '103M',
  '104L',
  '104M'],
 'total_count': 195382,
 'scores': {'100D': 1.0,
  '101D': 1.0,
  '101M': 1.0,
  '102D': 1.0,
  '102L': 1.0,
  '102M': 1.0,
  '103L': 1.0,
  '103M': 1.0,
  '104L': 1.0,
  '104M': 1.0},
 'returned_count': 10}

In [14]:
# Search for structures from Homo sapiens
organism_search(
    organism="Homo sapiens",  # Scientific name of the organism
    limit=10  # Return up to 10 results
)

{'pdb_ids': ['10GS',
  '11GS',
  '121P',
  '12CA',
  '12GS',
  '133L',
  '134L',
  '13GS',
  '14GS',
  '16GS'],
 'total_count': 75108,
 'scores': {'10GS': 1.0,
  '11GS': 1.0,
  '121P': 1.0,
  '12CA': 1.0,
  '12GS': 1.0,
  '133L': 1.0,
  '134L': 1.0,
  '13GS': 1.0,
  '14GS': 1.0,
  '16GS': 1.0},
 'returned_count': 10}

In [13]:
chemical_search(
    identifier="CC(=O)O",  # SMILES string for acetic acid
    identifier_type="SMILES",  # Using SMILES notation
    match_type="graph-relaxed",  # Flexible matching
    limit=10  # Return up to 10 results
)

{'pdb_ids': ['19HC',
  '1A3I',
  '1A44',
  '1A7P',
  '1AA5',
  '1AGN',
  '1AOK',
  '1AQ0',
  '1AS4',
  '1ATG'],
 'total_count': 8376,
 'scores': {'19HC': 1.0,
  '1A3I': 1.0,
  '1A44': 1.0,
  '1A7P': 1.0,
  '1AA5': 1.0,
  '1AGN': 1.0,
  '1AOK': 1.0,
  '1AQ0': 1.0,
  '1AS4': 1.0,
  '1ATG': 1.0},
 'returned_count': 10}

In [11]:
structure_search(
    pdb_id="2PGH",
    assembly_id="1",
    match_type="relaxed",
    limit=3
)

{'pdb_ids': ['2PGH', '1QPW', '1SDL'],
 'total_count': 1188,
 'scores': {'2PGH': 1.0,
  '1QPW': 0.8762649951856613,
  '1SDL': 0.8457025372810714},
 'returned_count': 3}

In [9]:
sequence_search(
    sequence="VHLSAEEKEAVLGLWGKVNVDEVGGEALGRLLVVYPWTQRF",
    sequence_type="protein",
    limit=3
)

{'pdb_ids': ['1QPW', '2PGH', '4F4O'],
 'total_count': 448,
 'scores': {'1QPW': 1.0, '2PGH': 1.0, '4F4O': 1.0},
 'returned_count': 3}

In [None]:
from openai import OpenAI

client = OpenAI()

tools = [{
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get current temperature for a given location.",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "City and country e.g. Bogotá, Colombia"
                }
            },
            "required": [
                "location"
            ],
            "additionalProperties": False
        },
        "strict": True
    }
}]

completion = client.chat.completions.create(
    model="gpt-4.1",
    messages=[{"role": "user", "content": "What is the weather like in Paris today?"}],
    tools=tools
)

print(completion.choices[0].message.tool_calls)

In [3]:
text_search_result = text_search(
    query="haptoglobin protein", limit=10
)

In [4]:
text_search_result

{'pdb_ids': ['4X0J',
  '4E40',
  '5EE2',
  '4WJG',
  '1CG5',
  '1CG8',
  '9FNM',
  '1C40',
  '1T1N',
  '3WR1'],
 'total_count': 508,
 'scores': {'4X0J': 1.0,
  '4E40': 0.9716970575828646,
  '5EE2': 0.9062692997790307,
  '4WJG': 0.8989484434576879,
  '1CG5': 0.8876781796127291,
  '1CG8': 0.8876781796127291,
  '9FNM': 0.8842872915091516,
  '1C40': 0.87768981091188,
  '1T1N': 0.877688299729093,
  '3WR1': 0.8776866511660527},
 'returned_count': 10}