# MELODI Presto API Example Usage

In [1]:
import json
import pandas as pd
import requests

## Configure parameters

In [2]:
API_URL = "https://melodi-presto.mrcieu.ac.uk/api/"

requests.get(f"{API_URL}/status").json()

True

## Create functions for the endpoints

Each of the endoints can be wrapped up in a function:

In [3]:
#enrich
def enrich(q):
    endpoint = "/enrich/"
    url = f"{API_URL}{endpoint}"
    params = {
        "query": q,
    }
    response = requests.post(url, data=json.dumps(params))
    try:
        res = response.json()
        enrich_df = (
                pd.json_normalize(res)
        )
        return enrich_df
    except:
        print('No data')
        return []
    
#overlap
def overlap(q1,q2):
    endpoint = "/overlap/"
    url = f"{API_URL}{endpoint}"
    params = {
        "x": q1,
        "y": q2,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    if 'data' in res:
        overlap_df = (    
            pd.json_normalize(res['data'])
        )
    else:
        overlap_df=pd.DataFrame()
    return overlap_df

def sentence(q):
    endpoint = "/sentence/"
    url = f"{API_URL}{endpoint}"
    params = {
        "pmid": pmid,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    pub_df = (
        pd.json_normalize(res['data'])
    )
    return pub_df

#### Some extra functions

It may also be useful to filter the overlap records to exclude any results derived from the same publication

In [4]:
def pub_check(row):
    px = set(row['pmids_x'].split(' '))
    py = set(row['pmids_y'].split(' '))
    check=[]
    if len(px.intersection(py)) > 0: 
        return False
    else:
        return True

### Get enriched SemMedDB objects for a given search term

Starting from a PubMed search, return all SemMedDB subject-predicate-object triples, their enrcichment metrics and publication information 

In [5]:
query_term='chronic kidney disease'

enrich_df = enrich(query_term)
enrich_df

Unnamed: 0,query,triple,subject_name,subject_type,subject_id,predicate,object_name,object_type,object_id,localCount,localTotal,globalCount,globalTotal,odds,pval,pmids
0,chronic_kidney_disease,Diabetic Nephropathy:CAUSES:End stage renal fa...,Diabetic Nephropathy,dsyn,C0011881,CAUSES,End stage renal failure,dsyn,C1261469,389,99428,976,8295443,33.379669,0.000000,27275335 29745907 25871838 23816723 30101924 2...
1,chronic_kidney_disease,"Anemia:COEXISTS_WITH:Kidney Failure, Chronic",Anemia,dsyn,C0002871,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,357,99428,368,8295443,81.225832,0.000000,24691014 25817226 27742192 30022586 21722599 3...
2,chronic_kidney_disease,"Kidney Failure, Chronic:PREDISPOSES:Cardiovasc...","Kidney Failure, Chronic",dsyn,C0022661,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,329,99428,331,8295443,83.199533,0.000000,20424360 27865823 18657657 29866459 18987465 1...
3,chronic_kidney_disease,Cardiovascular Diseases:COEXISTS_WITH:Kidney F...,Cardiovascular Diseases,dsyn,C0007222,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,318,99428,322,8295443,82.656402,0.000000,25923753 18266955 27796755 17198930 25055351 1...
4,chronic_kidney_disease,Hypertensive disease:COEXISTS_WITH:Kidney Fail...,Hypertensive disease,dsyn,C0020538,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,306,99428,314,8295443,81.553925,0.000000,1197258 16336577 24770618 24304473 17173252 18...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11748,chronic_kidney_disease,Dihydrotachysterol:TREATS:Bone Diseases,Dihydrotachysterol,phsu,C0012319,TREATS,Bone Diseases,dsyn,C0005940,2,99428,2,8295443,83.433317,0.000828,951878 907948
11749,chronic_kidney_disease,Chronic Kidney Insufficiency:COEXISTS_WITH:Uro...,Chronic Kidney Insufficiency,dsyn,C0403447,COEXISTS_WITH,Urolithiasis,dsyn,C0451641,2,99428,2,8295443,83.433317,0.000828,7052364 1223368
11750,chronic_kidney_disease,Chronic Kidney Insufficiency:COEXISTS_WITH:Hyp...,Chronic Kidney Insufficiency,dsyn,C0403447,COEXISTS_WITH,"Hyperparathyroidism, Secondary",dsyn,C0020503,2,99428,2,8295443,83.433317,0.000828,5595568 1189399
11751,chronic_kidney_disease,"Kidney Failure, Chronic:CAUSES:Zinc deficiency","Kidney Failure, Chronic",dsyn,C0022661,CAUSES,Zinc deficiency,dsyn,C0235950,2,99428,2,8295443,83.433317,0.000828,6361778


### Overlapping terms

We can provide two lists of query terms and identify overlapping data

In [6]:
x=['MLH1','MSH2','MLH3','MSH6','PMS1','PMS2','APC']
y=['Hereditary non-polyposis colon cancer']

overlap_df = overlap(x,y)
print(overlap_df.shape)

#remove records found in the same publication
pub_filter=overlap_df.apply(pub_check,axis=1)
pub_removed = overlap_df.shape[0]-pub_filter.sum()
print(pub_removed,'records were found in the same publication')
overlap_df=overlap_df[pub_filter]
print(overlap_df.shape)
overlap_df

(1817, 32)
49 records were found in the same publication
(1768, 32)


Unnamed: 0,triple_x,subject_name_x,subject_type_x,subject_id_x,predicate_x,object_name_x,object_type_x,object_id_x,localCount_x,localTotal_x,...,object_type_y,object_id_y,localCount_y,localTotal_y,globalCount_y,globalTotal_y,odds_y,pval_y,pmids_y,set_y
1,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,52,1969,...,gngm,C0879393|2956,15,630,22,8295443,9196.697339,1.273174e-52,29672549 26657901 22495361 28932927 25106712 2...,hereditary_non-polyposis_colon_cancer
2,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,52,1969,...,dsyn,C0039082,8,630,9,8295443,11854.853876,2.569749e-29,22361722 19900449 26509248 15782118 9538124 16...,hereditary_non-polyposis_colon_cancer
3,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,52,1969,...,dsyn,C0039082,7,630,10,8295443,9320.711236,2.737411e-25,17278092 25319978 25701956 12145848 18709565 2...,hereditary_non-polyposis_colon_cancer
5,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,52,1969,...,gngm,C0879391|5395,4,630,4,8295443,13251.500000,0.000000e+00,25701956 29758216 18781192 26657901,hereditary_non-polyposis_colon_cancer
6,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,52,1969,...,inch,C0170127,4,630,5,8295443,10601.198722,0.000000e+00,30916491 11208710 28646840 18257912,hereditary_non-polyposis_colon_cancer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1812,Phosphotransferases:COEXISTS_WITH:APC,Phosphotransferases,aapp,C0031727,COEXISTS_WITH,APC,gngm,324,3,21141,...,orch,C0038792,2,630,3,8295443,8806.199575,5.760000e-08,19755659,hereditary_non-polyposis_colon_cancer
1813,APCS:COEXISTS_WITH:APC,APCS,gngm,325,COEXISTS_WITH,APC,gngm,324,3,21141,...,orch,C0038792,2,630,3,8295443,8806.199575,5.760000e-08,19755659,hereditary_non-polyposis_colon_cancer
1814,Interleukin-2:INTERACTS_WITH:APC,Interleukin-2,aapp,C0021756,INTERACTS_WITH,APC,gngm,324,3,21141,...,orch,C0038792,2,630,3,8295443,8806.199575,5.760000e-08,19755659,hereditary_non-polyposis_colon_cancer
1815,CD40 Ligand|CD40LG:STIMULATES:APC,CD40 Ligand|CD40LG,aapp,C0167627|959,STIMULATES,APC,gngm,324,3,21141,...,orch,C0038792,2,630,3,8295443,8806.199575,5.760000e-08,19755659,hereditary_non-polyposis_colon_cancer


Look at the overlapping data in more detail

In [7]:
overlap_counts = overlap_df.groupby('object_type_x')['object_name_x'].value_counts()
overlap_counts

object_type_x  object_name_x       
aapp           MLH1 gene|MLH1           78
               beta catenin             36
               APC                       2
gngm           MLH1 gene|MLH1          668
               MSH2 gene|MSH2          661
               MSH6 gene|MSH6          129
               PMS2 gene|PMS2           96
               APC                      64
               beta catenin             16
               BRAF gene|BRAF            8
               DNA Repair Gene           4
               TACSTD2 gene|TACSTD2      3
               Oncogenes                 1
orch           Sulindac                  2
Name: object_name_x, dtype: int64

### Publicaton data

We can retrive the SemMedDB data for a give PubMed ID

In [8]:
pmid = '19755659'
pub_df = sentence(pmid)
pub_df

Unnamed: 0,PREDICATION_ID,SENTENCE_ID,PMID,PREDICATE,SUBJECT_CUI,SUBJECT_NAME,SUBJECT_SEMTYPE,SUBJECT_NOVELTY,OBJECT_CUI,OBJECT_NAME,...,NUMBER,SENT_START_INDEX,SENT_END_INDEX,SECTION_HEADER,NORMALIZED_SECTION_HEADER,SENTENCE,ISSN,DP,EDAT,PYEAR
0,131291235,124272128,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,...,1,21,126,"""""""""""""","""""""""""""",Sulindac effects on inflammation and tumorigen...,1460-2180,2009 Nov,2009-9-17,2009
1,131291271,124272128,19755659,INTERACTS_WITH,C0879389|4292,MLH1 gene|MLH1,gngm,1,C0038792,Sulindac,...,1,21,126,"""""""""""""","""""""""""""",Sulindac effects on inflammation and tumorigen...,1460-2180,2009 Nov,2009-9-17,2009
2,131291696,124272130,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,...,2,367,802,"""""""""""""","""""""""""""",To further explore intestinal regional respons...,1460-2180,2009 Nov,2009-9-17,2009
