## Click the badge below to run it in binder

[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/BaranziniLab/spoke_api_demo/main?labpath=connect_gene_using_spoke_api.ipynb)


In [None]:
import requests
import pandas as pd


## Custom functions for API call

In [2]:
def get_api_resp(END_POINT, params=None):
    URI = BASE_URI + END_POINT
    if params:
        return requests.get(URI, params=params)
    else:
        return requests.get(URI)
    
    

def get_phenotype_nbrs(gene_name, params=None):
    END_POINT = '{}/{}/{}/{}'.format(NBR_END_POINT, NODE_TYPE, ATTRIBUTE, gene_name)
    result = get_api_resp(END_POINT, params=params)
    neighbors = result.json()
    disease_nbr_names = list(filter(None, list(map(lambda x:x['data']['properties']['name'] if x['data']['neo4j_type'] == 'Disease' else None, neighbors))))
    symptom_nbr_names = list(filter(None, list(map(lambda x:x['data']['properties']['name'] if x['data']['neo4j_type'] == 'Symptom' else None, neighbors))))
    phenotype_names = disease_nbr_names +  symptom_nbr_names

    disease_nbr_ids = list(filter(None, list(map(lambda x:x['data']['id'] if x['data']['neo4j_type'] == 'Disease' else None, neighbors))))
    symptom_nbr_ids = list(filter(None, list(map(lambda x:x['data']['id'] if x['data']['neo4j_type'] == 'Symptom' else None, neighbors))))
    phenotype_ids = disease_nbr_ids + symptom_nbr_ids

    phenotype_df = pd.DataFrame(zip(phenotype_ids, phenotype_names), columns=['phenotype_id', 'phenotype_name'])

    nbr_edge_list = []
    for item in neighbors:
        try:
            if (item['data']['source'] in phenotype_ids): 
                nbr_edge_list.append((item['data']['source'], item['data']['properties']))
            elif (item['data']['target'] in phenotype_ids):
                nbr_edge_list.append((item['data']['target'], item['data']['properties']))
        except:
            continue

    nbr_edge_df = pd.DataFrame(nbr_edge_list, columns=['phenotype_id', 'properties'])
    phenotype_nbr_df = pd.merge(phenotype_df, nbr_edge_df, on='phenotype_id').drop('phenotype_id', axis=1)
    return list(phenotype_nbr_df.to_records(index=False))


## API parameters

In [3]:
BASE_URI = 'https://spoke.rbvi.ucsf.edu'
NBR_END_POINT = '/api/v1/neighborhood/'


API_PARAMS = {
    'cutoff_DaG_diseases_sources': ['knowledge', 'experiments', 'textmining'],
    'cutoff_DaG_textmining': 3,
}

NODE_TYPE = 'Gene'
ATTRIBUTE = 'name'



## Input Gene name to get the corresponding associated phenotype


In [6]:
%%time

gene_name = 'hello'

phenotype_nbrs = get_phenotype_nbrs(gene_name, params=API_PARAMS)



CPU times: user 36.2 ms, sys: 7.91 ms, total: 44.1 ms
Wall time: 168 ms


## Printing the extracted neighbors as Markdown

In [14]:
if len(phenotype_nbrs) > 0:
    markdown_output = "### Phenotypes associated with the gene {} in SPOKE:\n\n".format(gene_name)
    for idx, (phenotype, data) in enumerate(phenotype_nbrs, start=1):
        markdown_output += f"{idx}. **{phenotype}**\n"
        for key, value in data.items():
            if isinstance(value, list):
                value = ', '.join(map(str, value))
                markdown_output += f"   - {key.capitalize()}: {value}\n"
            else:
                markdown_output += f"   - {key.capitalize()}: {value}\n"
        markdown_output += "\n"
else:
    markdown_output = "### Selected Gene name not found in SPOKE graph!\n"
    markdown_output += "### Either there will be a typo in the given Gene name or the Gene has a different name in SPOKE"

from IPython.display import Markdown
display(Markdown(markdown_output))


### Selected Gene name not found in SPOKE graph!
### Either there will be a typo in the given Gene name or the Gene has a different name in SPOKE