# MELODI Presto API Example Usage

In [33]:
import json
import pandas as pd
import requests

## Configure parameters

In [34]:
#API_URL = "https://melodi-presto.mrcieu.ac.uk/api/"
API_URL = "http://localhost:8000/api"

requests.get(f"{API_URL}/status").json()

True

## Create functions for the endpoints

Each of the endoints can be wrapped up in a function:

In [35]:
#enrich
def enrich(q):
    endpoint = "/enrich/"
    url = f"{API_URL}{endpoint}"
    params = {
        "query": q,
    }
    response = requests.post(url, data=json.dumps(params))
    try:
        res = response.json()
        enrich_df = (
                pd.json_normalize(res)
        )
        return enrich_df
    except:
        print('No data')
        return []
    
#overlap
def overlap(q1,q2):
    endpoint = "/overlap/"
    url = f"{API_URL}{endpoint}"
    params = {
        "x": q1,
        "y": q2,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    if 'data' in res:
        overlap_df = (    
            pd.json_normalize(res['data'])
        )
    else:
        overlap_df=pd.DataFrame()
    return overlap_df

def sentence(q):
    endpoint = "/sentence/"
    url = f"{API_URL}{endpoint}"
    params = {
        "pmid": pmid,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    pub_df = (
        pd.json_normalize(res['data'])
    )
    return pub_df

#### Some extra functions

It may also be useful to filter the overlap records to exclude any results derived from the same publication

In [36]:
def pub_check(row):
    px = set(row['pmids_x'].split(' '))
    py = set(row['pmids_y'].split(' '))
    check=[]
    if len(px.intersection(py)) > 0: 
        return False
    else:
        return True

### Get enriched SemMedDB objects for a given search term

Starting from a PubMed search, return all SemMedDB subject-predicate-object triples, their enrcichment metrics and publication information 

In [29]:
query_term='chronic kidney disease'

enrich_df = enrich(query_term)
enrich_df

Unnamed: 0,query,triple,subject_name,subject_type,subject_id,predicate,object_name,object_type,object_id,localCount,localTotal,globalCount,globalTotal,odds,pval,pmids
0,chronic_kidney_disease,Angiotensin-Converting Enzyme Inhibitors:INHIB...,Angiotensin-Converting Enzyme Inhibitors,phsu,C0003015,INHIBITS,Angiotensin Receptor,gngm,C0034787,115,20219,862,9775514,64.8648478030252,4.2344171589560276e-158,22565200 27706879 29602404 19417857 16157081 3...
1,chronic_kidney_disease,Calcium Channel Blockers:INHIBITS:Angiotensin ...,Calcium Channel Blockers,phsu,C0006684,INHIBITS,Angiotensin Receptor,gngm,C0034787,41,20219,325,9775514,61.11496130591581,1.476567983125179e-56,17215651 22565200 19847947 17398315 15938037 3...
2,chronic_kidney_disease,"N,N-dimethylarginine:INHIBITS:Nitric Oxide Syn...","N,N-dimethylarginine",aapp,C0067385,INHIBITS,Nitric Oxide Synthase,aapp,C0132555,39,20219,325,9775514,58.12798216055501,4.274615013951481e-53,21445101 22200421 24970872 18204093 16807406 2...
3,chronic_kidney_disease,Adrenergic beta-Antagonists:INHIBITS:Angiotens...,Adrenergic beta-Antagonists,phsu,C0001645,INHIBITS,Angiotensin Receptor,gngm,C0034787,29,20219,443,9775514,31.694059817736022,8.936924342716107e-33,22565200 29667759 12045788 28736557 24791185 1...
4,chronic_kidney_disease,paricalcitol:STIMULATES:Vitamin D3 Receptor,paricalcitol,orch,C0249582,STIMULATES,Vitamin D3 Receptor,gngm,C0108082,25,20219,36,9775514,336.16551395904173,5.955777868610629e-51,17115413 23258805 31035488 25363733 28835982 2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,chronic_kidney_disease,Ferritin:INTERACTS_WITH:Erythropoietin|EPO,Ferritin,aapp,C0015879,INTERACTS_WITH,Erythropoietin|EPO,horm,C0014822|2056,2,20219,3,9775514,322.35283837034837,4.242584106723107e-05,8142226
1965,chronic_kidney_disease,maxacalcitol:INTERACTS_WITH:Calcitriol,maxacalcitol,phsu,C0043872,INTERACTS_WITH,Calcitriol,horm,C0006674,2,20219,4,9775514,241.7646040460998,6.355124591272181e-05,7816252
1966,chronic_kidney_disease,Cytochrome P-450 CYP2D6:INTERACTS_WITH:Autoant...,Cytochrome P-450 CYP2D6,aapp,C0057223,INTERACTS_WITH,Autoantibodies,aapp,C0004358,2,20219,2,9775514,483.52930701884554,2.5490565171674514e-05,15679907 12144908
1967,chronic_kidney_disease,"NOS1 protein, human|NOS1|NANOS1:COEXISTS_WITH:...","NOS1 protein, human|NOS1|NANOS1",aapp,C0907533|4842|340719,COEXISTS_WITH,N-(2-mercaptoproprionyl)-glycine|MPG,gngm,C0671444|4350,2,20219,2,9775514,483.52930701884554,2.5490565171674514e-05,10492240


### Overlapping terms

We can provide two lists of query terms and identify overlapping data

In [38]:
x=['MLH1','MSH2','MLH3','MSH6','PMS1','PMS2','APC']
y=['Hereditary non-polyposis colon cancer']
x=['pcsk9']
y=['marfan syndrome']

overlap_df = overlap(x,y)
print(overlap_df.shape)

#remove records found in the same publication
pub_filter=overlap_df.apply(pub_check,axis=1)
pub_removed = overlap_df.shape[0]-pub_filter.sum()
print(pub_removed,'records were found in the same publication')
overlap_df=overlap_df[pub_filter]
print(overlap_df.shape)
overlap_df

(2, 32)
0 records were found in the same publication
(2, 32)


Unnamed: 0,triple_x,subject_name_x,subject_type_x,subject_id_x,predicate_x,object_name_x,object_type_x,object_id_x,localCount_x,localTotal_x,...,object_type_y,object_id_y,localCount_y,localTotal_y,globalCount_y,globalTotal_y,odds_y,pval_y,pmids_y,set_y
0,COG2:INHIBITS:Pharmaceutical Preparations,COG2,gngm,22796,INHIBITS,Pharmaceutical Preparations,phsu,C0013227,6,1733,...,gngm,C0040690,3,499,20,9775514,2956.298589,2e-10,31284709 21493863 22277429,marfan_syndrome
1,APP gene:INTERACTS_WITH:Pharmaceutical Prepara...,APP gene,gngm,C1364818,INTERACTS_WITH,Pharmaceutical Preparations,phsu,C0013227,2,1733,...,gngm,C0040690,3,499,20,9775514,2956.298589,2e-10,31284709 21493863 22277429,marfan_syndrome


Look at the overlapping data in more detail

In [15]:
overlap_counts = overlap_df.groupby('object_type_x')['object_name_x'].value_counts()
overlap_counts

object_type_x  object_name_x       
aapp           MLH1 gene|MLH1           78
               beta catenin             35
               APC                       3
gngm           MLH1 gene|MLH1          668
               MSH2 gene|MSH2          606
               MSH6 gene|MSH6          129
               PMS2 gene|PMS2           96
               APC                      63
               beta catenin             16
               BRAF gene|BRAF            7
               DNA Repair Gene           4
               TACSTD2 gene|TACSTD2      3
               Oncogenes                 1
orch           Sulindac                  2
Name: object_name_x, dtype: int64

### Publicaton data

We can retrive the SemMedDB data for a give PubMed ID

In [17]:
pmid = '19755659'
pub_df = sentence(pmid)
pub_df

Unnamed: 0,PREDICATION_ID,SENTENCE_ID,PMID,PREDICATE,SUBJECT_CUI,SUBJECT_NAME,SUBJECT_SEMTYPE,SUBJECT_NOVELTY,OBJECT_CUI,OBJECT_NAME,...,NUMBER,SENT_START_INDEX,SENT_END_INDEX,SECTION_HEADER,NORMALIZED_SECTION_HEADER,SENTENCE,ISSN,DP,EDAT,PYEAR
0,131291271,124272128,19755659,INTERACTS_WITH,C0879389|4292,MLH1 gene|MLH1,gngm,1,C0038792,Sulindac,...,1,21,126,"""""""""""""","""""""""""""",Sulindac effects on inflammation and tumorigen...,1460-2180,2009 Nov,2009-9-17,2009
1,131291235,124272128,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,...,1,21,126,"""""""""""""","""""""""""""",Sulindac effects on inflammation and tumorigen...,1460-2180,2009 Nov,2009-9-17,2009
2,131291696,124272130,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,...,2,367,802,"""""""""""""","""""""""""""",To further explore intestinal regional respons...,1460-2180,2009 Nov,2009-9-17,2009
