# MELODI Presto API Example Usage

In [1]:
import json
import pandas as pd
import requests

## Configure parameters

In [2]:
API_URL = "https://melodi-presto.mrcieu.ac.uk/api/"

requests.get(f"{API_URL}/status").json()

True

### Get enriched SemMedDB objects for a given search term

Starting from a PubMed search, return all SemMedDB subject-predicate-object triples, their enrcichment metrics and publication information 

In [10]:
query_term='chronic kidney disease'

endpoint = "/enrich/"
url = f"{API_URL}{endpoint}"
params = {
    "query": query_term,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
enrich_df = (
        pd.json_normalize(res)
)
enrich_df

Unnamed: 0,query,triple,subject_name,subject_type,subject_id,predicate,object_name,object_type,object_id,localCount,localTotal,globalCount,globalTotal,odds,pval,pmids
0,chronic_kidney_disease,Diabetic Nephropathy:CAUSES:End stage renal fa...,Diabetic Nephropathy,dsyn,C0011881,CAUSES,End stage renal failure,dsyn,C1261469,367,74425,886,6611441,36.364777181870636,0.0,29080119 19414963 25193923 29629274 11352187 2...
1,chronic_kidney_disease,"Anemia:COEXISTS_WITH:Kidney Failure, Chronic",Anemia,dsyn,C0002871,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,329,74425,340,6611441,84.95042763144895,0.0,16498815 30108502 2648520 29225817 9375826 226...
2,chronic_kidney_disease,"Kidney Failure, Chronic:PREDISPOSES:Cardiovasc...","Kidney Failure, Chronic",dsyn,C0022661,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,307,74425,309,6611441,87.22249078968967,0.0,24800495 16609298 27865823 21422054 18786751 2...
3,chronic_kidney_disease,Cardiovascular Diseases:COEXISTS_WITH:Kidney F...,Cardiovascular Diseases,dsyn,C0007222,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,297,74425,301,6611441,86.62406145900408,0.0,16609298 23115140 28228293 20586894 20682604 2...
4,chronic_kidney_disease,Hypertensive disease:COEXISTS_WITH:Kidney Fail...,Hypertensive disease,dsyn,C0020538,COEXISTS_WITH,"Kidney Failure, Chronic",dsyn,C0022661,290,74425,298,6611441,85.43391775423662,0.0,21393360 23154587 5376775 27932204 20339374 20...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8526,chronic_kidney_disease,NPS-568:PREVENTS:Parathyroid hyperplasia,NPS-568,orch,C0907461,PREVENTS,Parathyroid hyperplasia,dsyn,C0271844,2,74425,2,6611441,87.79071548538798,0.0007497044125845847,10620187
8527,chronic_kidney_disease,"Angiotensinogen:INTERACTS_WITH:TGFB1 protein, ...",Angiotensinogen,aapp,C0003017,INTERACTS_WITH,"TGFB1 protein, human|TGFB1",aapp,C0080222|7040,2,74425,2,6611441,87.79071548538798,0.0007497044125845847,15698429
8528,chronic_kidney_disease,Cyclosporine:INHIBITS:Cytochrome P450,Cyclosporine,aapp,C0010592,INHIBITS,Cytochrome P450,aapp,C0010762,2,74425,5,6611441,35.11628619415519,0.002565466764230501,9648082
8529,chronic_kidney_disease,RenaGel:TREATS:Hyperphosphatemia,RenaGel,orch,C0526563,TREATS,Hyperphosphatemia,dsyn,C0085681,2,74425,4,6611441,43.89535774269399,0.001846286743093013,9988142


### Overlapping terms

We can provide two lists of query terms and identify overlapping data

In [11]:
x=['MLH1','MSH2','MLH3','MSH6','PMS1','PMS2','APC']
y=['Hereditary non-polyposis colon cancer']

endpoint = "/overlap/"
url = f"{API_URL}{endpoint}"
params = {
    "x": x,
    "y": y,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
overlap_df = (
    pd.json_normalize(res['data'])
)
overlap_df

Unnamed: 0,triple_x,subject_name_x,subject_type_x,subject_id_x,predicate_x,object_name_x,object_type_x,object_id_x,localCount_x,localTotal_x,...,object_type_y,object_id_y,localCount_y,localTotal_y,globalCount_y,globalTotal_y,odds_y,pval_y,pmids_y,set_y
0,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,46,1754,...,gngm,C0879389|4292,28,583,30,6611441,10460.095597,2.359079e-97,18301449 15949572 12655564 19930554 16830052 8...,hereditary_non-polyposis_colon_cancer
1,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,46,1754,...,gngm,C0879393|2956,15,583,22,6611441,7641.303602,2.071033e-51,25106712 14961575 20924129 26248088 25701956 1...,hereditary_non-polyposis_colon_cancer
2,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,46,1754,...,dsyn,C0039082,8,583,9,6611441,9961.995807,1.037182e-28,19900449 22361722 16500024 23729658 9538124 15...,hereditary_non-polyposis_colon_cancer
3,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,46,1754,...,dsyn,C0039082,6,583,9,6611441,7471.496855,2.614930e-21,25701956 26746812 25319978 17278092 18709565 1...,hereditary_non-polyposis_colon_cancer
4,MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2,MLH1 gene|MLH1,gngm,C0879389|4292,COEXISTS_WITH,MSH2 gene|MSH2,gngm,C0879290|4436,46,1754,...,gngm,C0879389|4292,6,583,8,6611441,8405.433962,1.569080e-21,14574010 23091106 11600610,hereditary_non-polyposis_colon_cancer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1557,Anthocyanins:COEXISTS_WITH:APC,Anthocyanins,orch,C0003161,COEXISTS_WITH,APC,gngm,324,3,19358,...,orch,C0038792,2,583,3,6611441,7471.496855,8.000000e-08,19755659,hereditary_non-polyposis_colon_cancer
1558,Protein C:INTERACTS_WITH:APC,Protein C,aapp,C0033621,INTERACTS_WITH,APC,gngm,324,3,19358,...,orch,C0038792,2,583,3,6611441,7471.496855,8.000000e-08,19755659,hereditary_non-polyposis_colon_cancer
1559,Phosphotransferases:COEXISTS_WITH:APC,Phosphotransferases,aapp,C0031727,COEXISTS_WITH,APC,gngm,324,3,19358,...,orch,C0038792,2,583,3,6611441,7471.496855,8.000000e-08,19755659,hereditary_non-polyposis_colon_cancer
1560,Factor Va:INHIBITS:APC,Factor Va,aapp,C0015501,INHIBITS,APC,gngm,324,3,19358,...,orch,C0038792,2,583,3,6611441,7471.496855,8.000000e-08,19755659,hereditary_non-polyposis_colon_cancer


Look at the overlapping data in more detail

In [12]:
overlap_counts = overlap_df.groupby('object_type_x')['object_name_x'].value_counts()
overlap_counts

object_type_x  object_name_x       
aapp           MLH1 gene|MLH1           66
               beta catenin             33
               APC                       3
gngm           MLH1 gene|MLH1          572
               MSH2 gene|MSH2          572
               MSH6 gene|MSH6          132
               PMS2 gene|PMS2          100
               APC                      58
               beta catenin             16
               DNA Repair Gene           4
               TACSTD2 gene|TACSTD2      3
               Oncogenes                 1
orch           Sulindac                  2
Name: object_name_x, dtype: int64

### Publicaton data

We can retrive the SemMedDB data for a give PubMed ID

In [13]:
pmid = '19755659'
endpoint = "/sentence/"
url = f"{API_URL}{endpoint}"
params = {
    "pmid": pmid,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
pub_df = (
    pd.json_normalize(res['data'])
)
pub_df

Unnamed: 0,PREDICATION_ID,SENTENCE_ID,PMID,PREDICATE,SUBJECT_CUI,SUBJECT_NAME,SUBJECT_SEMTYPE,SUBJECT_NOVELTY,OBJECT_CUI,OBJECT_NAME,OBJECT_SEMTYPE,OBJECT_NOVELTY,SUB_PRED_OBJ,NORMALIZED_SECTION_HEADER,SECTION_HEADER,SENT_START_INDEX,SENTENCE,TYPE,NUMBER,SENT_END_INDEX
0,131291271,124272128,19755659,INTERACTS_WITH,C0879389|4292,MLH1 gene|MLH1,gngm,1,C0038792,Sulindac,orch,1,MLH1 gene|MLH1:INTERACTS_WITH:Sulindac,"""""""""""""","""""""""""""",21,Sulindac effects on inflammation and tumorigen...,ti,1,126
1,131291235,124272128,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,orch,1,APC:INTERACTS_WITH:Sulindac,"""""""""""""","""""""""""""",21,Sulindac effects on inflammation and tumorigen...,ti,1,126
2,131291696,124272130,19755659,INTERACTS_WITH,324,APC,gngm,1,C0038792,Sulindac,orch,1,APC:INTERACTS_WITH:Sulindac,"""""""""""""","""""""""""""",367,To further explore intestinal regional respons...,ab,2,802
