In [13]:
## Definition of the Origene SCP server, including basic operations such as connect, disconnect, list_tools, and parse_result.

import asyncio
import json
from mcp.client.streamable_http import streamablehttp_client
from mcp import ClientSession
import gseapy as gp
import mygene
import requests

def ensemblID2Symbol(id_list: list[str])-> list:
    """
    A function to convert Ensembl gene ids to gene symbols.

    Parameters
    ----------
    id_list:
        A list containing ensembl ids. e.g. ['ENSG00000139618', 'ENSG00000225972', 'ENSG00000186092']

    Return
    ------
        gene symbols for the query gene ids. e.g. ['BRCA2', 'MTND1P23', 'OR4F5']
    """
    mg = mygene.MyGeneInfo()
    symbols = mg.querymany(id_list, scopes='ensembl.gene', fields='symbol', species='human')
    symbols = [i["symbol"] for i in symbols]
    return symbols

def gene_set_enrichment(gene_list: list[str]) -> list:
    """
    A function to do pathway enrichment analysis for the provided gene list.

    Parameters
    ----------
    gene_list:
        A list containing gene symbols. e.g. ['BRCA1', 'TP53', 'GAPDH', 'IL6', 'TNF']

    Return
    ------
        Significant pathway terms such as ['Regulation of lipolysis in adipocytes', 'Arachidonic acid metabolism', ...]
    """
    enr = gp.enrichr(gene_list = gene_list, gene_sets='KEGG_2019_Human')
    results = enr.results
    significant = results[results['Adjusted P-value'] < 0.05]
    return significant.Term.tolist()

def get_protein_ids_for_gene(ensembl_gene_id: str) -> list:
    """
    A function to fetche Ensembl Protein IDs for a given Ensembl Gene ID using the Ensembl REST API.
    Handles the one-to-many relationship between a gene and its protein products.

    Parameters
    ----------
    enseml_gene_id:
        A string representing the Ensembl gene id. e.g. 'ENSG00000157764'

    Return
    ------
        Ensembl protein ID for the query gene. e.g. ['ENSP00000493543', 'ENSP00000288602', 'ENSP00000493678', ...]
    """
    server = "https://rest.ensembl.org"
    # The 'expand=1' option is crucial to get transcript and translation information
    ext = f"/lookup/id/{ensembl_gene_id}?expand=1"
    
    headers = {"Content-Type": "application/json"}
    
    try:
        r = requests.get(server + ext, headers=headers)
        r.raise_for_status()  # This will raise an HTTPError for bad responses (4xx or 5xx)
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error for {ensembl_gene_id}: {err}")
        return []
    except requests.exceptions.RequestException as err:
        print(f"Request Error for {ensembl_gene_id}: {err}")
        return []

    decoded = r.json()
    protein_ids = []
    
    # A gene can have multiple transcripts, and each transcript can have a translation
    if 'Transcript' in decoded:
        for transcript in decoded['Transcript']:
            if 'Translation' in transcript:
                protein_id = transcript['Translation'].get('id')
                if protein_id:
                    protein_ids.append(protein_id)
                    
    return list(set(protein_ids)) # Use set to ensure unique IDs

class OrigeneClient:    
    def __init__(self, server_url: str):
        self.server_url = server_url
        self.session = None
        
    async def connect(self):
        print(f"server url: {self.server_url}")
        
        try:
            # 建立streamable-http传输连接
            self.transport = streamablehttp_client(
                url=self.server_url,
                headers={"SCP-HUB-API-KEY": "sk-a0033dde-b3cd-413b-adbe-980bc78d6126"}
            )
            self.read, self.write, self.get_session_id = await self.transport.__aenter__()
            
            # 创建客户端会话
            self.session_ctx = ClientSession(self.read, self.write)
            self.session = await self.session_ctx.__aenter__()
            
            # 初始化会话
            await self.session.initialize()
            session_id = self.get_session_id()
            
            print(f"✓ connect success")
            return True
            
        except Exception as e:
            print(f"✗ connect failure: {e}")
            import traceback
            traceback.print_exc()
            return False
    
    async def disconnect(self):
        try:
            if self.session:
                await self.session_ctx.__aexit__(None, None, None)
            if hasattr(self, 'transport'):
                await self.transport.__aexit__(None, None, None)
            print("✓ already disconnect")
        except Exception as e:
            print(f"✗ disconnect error: {e}")

    async def list_tools(self):    
        try:
            tools_list = await self.session.list_tools()
            print(f"tool count: {len(tools_list.tools)}")
            
            for i, tool in enumerate(tools_list.tools, 1):
                print(f"{i:2d}. {tool.name}")
                if tool.description:
                    # 只显示描述的第一行
                    desc_line = tool.description.split('\n')[0]
                    print(f"    {desc_line}")
            
            print(f"✓ Get tool list success")
            return tools_list.tools
            
        except Exception as e:
            print(f"✗ Get tool list fail: {e}")
            return []
    
    def parse_result(self, result):
        if isinstance(result, dict):
            content_list = result.get("content") or []
        else:
            content_list = getattr(result, "content", []) or []

        texts = []
        for item in content_list:
            if isinstance(item, dict):
                if item.get("type") == "text":
                    texts.append(item.get("text") or "")
            else:
                if getattr(item, "type", None) == "text":
                    texts.append(getattr(item, "text", "") or "")
        return "".join(texts)

In [None]:
'''
Example 1 - Drug to Target Pathways 
This workflow identifies the biological pathways affected by a drug's targets, aiding in understanding its mechanism of action.
''' 

import re

async def main():
    SERVER_URL = "https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets"
    client = OrigeneClient(SERVER_URL)
    if not await client.connect():
        print("connection failed")
        return
    
    input_chemblId = "CHEMBL25" # Example Chembl ID for testing
    
    # step 1: Use get_associated_targets_by_drug_chemblId (Open Targets) to retrieve Ensembl IDs of targets for a given drug ChEMBL ID.
    result = await client.session.call_tool(
        "get_associated_targets_by_drug_chemblId",
        arguments={
            "chemblId": input_chemblId
        }
    )
    
    result_data = client.parse_result(result)    
    step_1_output_pattern = re.compile("ENSG\d+")
    step_1_output = [*step_1_output_pattern.finditer(result_data)]
    step_1_output = [i.group() for i in step_1_output]

    print (step_1_output)

    # step 2: Use ensemblID2Symbol to map the Ensembl IDs to gene symbols.
    step_2_input = ensemblID2Symbol(step_1_output)
    
    # step 3: Use gene_set_enrichment to retrieve significant pathways.
    step_2_result = gene_set_enrichment(step_2_input)
    
    print (step_2_result)

    await client.disconnect()
    

if __name__ == '__main__':
    await main()

  step_1_output_pattern = re.compile("ENSG\d+")


server url: https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets
✓ connect success


Input sequence provided is already in string format. No operation performed
Input sequence provided is already in string format. No operation performed


['ENSG00000073756', 'ENSG00000095303']
['Regulation of lipolysis in adipocytes', 'Arachidonic acid metabolism', 'Serotonergic synapse', 'Ovarian steroidogenesis', 'VEGF signaling pathway', 'Leishmaniasis', 'Chemical carcinogenesis', 'IL-17 signaling pathway', 'Small cell lung cancer', 'NF-kappa B signaling pathway', 'C-type lectin receptor signaling pathway', 'TNF signaling pathway', 'Platelet activation', 'Retrograde endocannabinoid signaling', 'Oxytocin signaling pathway', 'Kaposi sarcoma-associated herpesvirus infection', 'Human cytomegalovirus infection', 'MicroRNAs in cancer', 'Human papillomavirus infection']
✓ already disconnect


In [None]:
'''
Example 2 - Protein to Homologues and Interactions
This workflow explores evolutionary relationships and interaction networks, useful for comparative genomics and functional studies.
''' 

import re

async def main():
    client_1 = OrigeneClient("https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets")
    if not await client_1.connect():
        print("connection failed")
        return
    
    client_2 = OrigeneClient("https://scp.intern-ai.org.cn/api/v1/mcp/6/Origene-STRING")
    if not await client_2.connect():
        print("connection failed")
        return
    
    input_ensemblId = "ENSG00000105851" # Example Ensembl ID for testing
    
    # step 1: Use get_target_homologues_by_ensemblID (Ensembl or Open Targets) to find homologous proteins for a given target Ensembl ID, potentially across species.
    result = await client_1.session.call_tool(
        "get_target_homologues_by_ensemblID",
        arguments={
            "ensemblId": input_ensemblId
        }
    )

    result_data = client_1.parse_result(result)    
    step_1_output_pattern = re.compile("ENSG\d+")
    step_1_output = [*step_1_output_pattern.finditer(result_data)]
    step_1_output = [i.group() for i in step_1_output]

    print (step_1_output)
    
    # step 2: Use get_protein_ids_for_gene to get protein IDs for each gene
    if len(step_1_output) > 1:
        step_2_input = list()
        for gene_id in step_1_output:
            step_2_input.extend(get_protein_ids_for_gene(gene_id))
    else:
        step_2_input = get_protein_ids_for_gene(step_1_output)
    
    print (step_2_input)
    
    # step 3: Use get_ppi_enrichment (STRING) to retrieve the interaction network of those homologous proteins.
    result = await client_2.session.call_tool(
        "get_ppi_enrichment",
        arguments={
            "identifiers": step_2_input,
            "species": 9606
        }
    )
    
    step_2_result = client_2.parse_result(result)
    print (step_2_result)

    await client_2.disconnect()
    await client_1.disconnect()
    

if __name__ == '__main__':
    await main()

  step_1_output_pattern = re.compile("ENSG\d+")


server url: https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets
✓ connect success
server url: https://scp.intern-ai.org.cn/api/v1/mcp/6/Origene-STRING
✓ connect success
['ENSG00000105851', 'ENSG00000078142', 'ENSG00000241973', 'ENSG00000011405', 'ENSG00000121879', 'ENSG00000139144', 'ENSG00000143393', 'ENSG00000133056', 'ENSG00000051382', 'ENSG00000171608']
['ENSP00000521157', 'ENSP00000419260', 'ENSP00000623484', 'ENSP00000417623', 'ENSP00000392258', 'ENSP00000352121', 'ENSP00000528128', 'ENSP00000473061', 'ENSP00000528125', 'ENSP00000466946', 'ENSP00000528126', 'ENSP00000629809', 'ENSP00000471957', 'ENSP00000262039', 'ENSP00000381845', 'ENSP00000464909', 'ENSP00000528129', 'ENSP00000608659', 'ENSP00000528127', 'ENSP00000629810', 'ENSP00000465621', 'ENSP00000609469', 'ENSP00000627061', 'ENSP00000627062', 'ENSP00000550871', 'ENSP00000609472', 'ENSP00000550872', 'ENSP00000255882', 'ENSP00000402437', 'ENSP00000609468', 'ENSP00000609474', 'ENSP00000382162', 'ENSP00000627060', '

In [None]:
'''
Example 3 - Disease to Phenotype Details
This workflow provides a deeper understanding of the phenotypic manifestations of a disease, aiding in clinical and research applications.
''' 

import re

async def main():
    client_1 = OrigeneClient("https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets")
    if not await client_1.connect():
        print("connection failed")
        return
    
    client_2 = OrigeneClient("https://scp.intern-ai.org.cn/api/v1/mcp/16/Origene-Monarch")
    if not await client_2.connect():
        print("connection failed")
        return

    input_efoId = "EFO_0003767" # Example EFO ID for testing

    # step 1: Use get_associated_phenotypes_by_disease_efoId (Open Targets) to retrieve a list of HPO (Human Phenotype Ontology) IDs associated with a disease.
    result = await client_1.session.call_tool(
        "get_associated_phenotypes_by_disease_efoId",
        arguments={
            "efoId": input_efoId
        }
    )

    result_data = client_1.parse_result(result)    
    step_1_output_pattern = re.compile("HP_\d+")
    step_1_output = [*step_1_output_pattern.finditer(result_data)]
    step_1_output = [i.group() for i in step_1_output]
    step_1_output = [re.sub("_", ":", i) for i in step_1_output]

    print (step_1_output)
    
    # step 2: Use get_phenotype_by_HPO_ID (likely from HPO-related functions) to get detailed descriptions or annotations of those phenotypes.
    step_2_output = list()
    for id in step_1_output:
        result = await client_2.session.call_tool(
            "get_phenotype_by_HPO_ID",
            arguments={
                "id": id
            }
        )
        
        result_data = client_2.parse_result(result)    
        step_2_output.append(result_data)
    
    print (step_2_output)

    await client_2.disconnect()
    await client_1.disconnect()
    

if __name__ == '__main__':
    await main()

  step_1_output_pattern = re.compile("HP_\d+")


server url: https://scp.intern-ai.org.cn/api/v1/mcp/15/Origene-OpenTargets
✓ connect success
server url: https://scp.intern-ai.org.cn/api/v1/mcp/16/Origene-Monarch
✓ connect success
['HP:0100280', 'HP:0003829', 'HP:0100279', 'HP:0000006', 'HP:0002037', 'HP:0002037']
['{"id": "HP:0100280", "category": "biolink:PhenotypicFeature", "name": "Crohn\'s disease", "description": "A chronic granulomatous inflammatory disease of the intestines that may affect any part of the gastrointestinal tract from mouth to anus, causing a wide variety of symptoms. It primarily causes abdominal pain, diarrhea which may be bloody, vomiting, or weight loss, but may also cause complications outside of the gastrointestinal tract such as skin rashes, arthritis, inflammation of the eye, tiredness, and lack of concentration. Crohn\'s disease is thought to be an autoimmune disease, in which the body\'s immune system attacks the gastrointestinal tract, causing inflammation.", "xref": ["SNOMEDCT_US:34000006", "UMLS:C0