# MELODI Lite API Example Usage

In [2]:
import json
import pandas as pd
import requests

## Configure parameters

In [3]:
API_URL = "https://melodi-lite.mrcieu.ac.uk/api/"

requests.get(f"{API_URL}/status").json()

True

### Get enriched SemMedDB objects for a given search term

Starting from a PubMed search, return all SemMedDB subject-predicate-object triples, their enrcichment metrics and publication information 

In [10]:
query_term_list=['chronic kidney disease']

endpoint = "/enrich/"
url = f"{API_URL}{endpoint}"
params = {
    "query": query_term_list,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
for i in query_term_list:
    q=i.replace(' ','_')
    enrich_df = (
        pd.json_normalize(res[q])
    )
    print(enrich_df)

                                                 triple  \
0     Diabetic Nephropathy:CAUSES:End stage renal fa...   
1          Anemia:COEXISTS_WITH:Kidney Failure, Chronic   
2     Kidney Failure, Chronic:PREDISPOSES:Cardiovasc...   
3     Cardiovascular Diseases:COEXISTS_WITH:Kidney F...   
4     Hypertensive disease:COEXISTS_WITH:Kidney Fail...   
...                                                 ...   
8526           NPS-568:PREVENTS:Parathyroid hyperplasia   
8527  Angiotensinogen:INTERACTS_WITH:TGFB1 protein, ...   
8528              Cyclosporine:INHIBITS:Cytochrome P450   
8529                   RenaGel:TREATS:Hyperphosphatemia   
8530  Complement System Proteins:AFFECTS:Kidney Dise...   

                    subject_name subject_type subject_id       predicate  \
0           Diabetic Nephropathy         dsyn   C0011881          CAUSES   
1                         Anemia         dsyn   C0002871   COEXISTS_WITH   
2        Kidney Failure, Chronic         dsyn   C0022661     PR

### Overlapping terms

We can provide two lists of query terms and identify overlapping data

In [34]:
x=['MLH1','MSH2','MLH3','MSH6','PMS1','PMS2','APC']
y=['Hereditary non-polyposis colon cancer']

endpoint = "/overlap/"
url = f"{API_URL}{endpoint}"
params = {
    "x": x,
    "y": y,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
overlap_df = (
    pd.json_normalize(res['data'])
)
print(overlap_df)

                                         triple_x       subject_name_x  \
0     MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2       MLH1 gene|MLH1   
1     MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2       MLH1 gene|MLH1   
2     MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2       MLH1 gene|MLH1   
3     MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2       MLH1 gene|MLH1   
4     MLH1 gene|MLH1:COEXISTS_WITH:MSH2 gene|MSH2       MLH1 gene|MLH1   
...                                           ...                  ...   
1683               Anthocyanins:COEXISTS_WITH:APC         Anthocyanins   
1684                 Protein C:INTERACTS_WITH:APC            Protein C   
1685        Phosphotransferases:COEXISTS_WITH:APC  Phosphotransferases   
1686                       Factor Va:INHIBITS:APC            Factor Va   
1687            beta catenin:STIMULATES:Oncogenes         beta catenin   

     subject_type_x   subject_id_x     predicate_x   object_name_x  \
0              gngm  C0879389|4292   COEX

Look at the overlapping data in more detail

In [35]:
overlap_counts = overlap_df.groupby('object_type_x')['object_name_x'].value_counts()
print(overlap_counts)

object_type_x  object_name_x                 
aapp           MLH1 gene|MLH1                     66
               beta catenin                       33
               APC                                 3
gngm           MLH1 gene|MLH1                    627
               MSH2 gene|MSH2                    616
               MSH6 gene|MSH6                    132
               PMS2 gene|PMS2                    116
               APC                                59
               beta catenin                       16
               DNA Repair Gene                    10
               TACSTD2 gene|TACSTD2                3
               Oncogenes                           1
nusq           Single Nucleotide Polymorphism      4
orch           Sulindac                            2
Name: object_name_x, dtype: int64


### Publicaton data

We can retrive the SemMedDB data for a give PubMed ID

In [33]:
pmid = '19755659'
endpoint = "/sentence/"
url = f"{API_URL}{endpoint}"
params = {
    "pmid": pmid,
}
response = requests.post(url, data=json.dumps(params))
res = response.json()
pub_df = (
    pub_df.json_normalize(res['results'])
)
print(df)

  PREDICATION_ID SENTENCE_ID      PMID       PREDICATE    SUBJECT_CUI  \
0      131291271   124272128  19755659  INTERACTS_WITH  C0879389|4292   
1      131291235   124272128  19755659  INTERACTS_WITH            324   
2      131291696   124272130  19755659  INTERACTS_WITH            324   

     SUBJECT_NAME SUBJECT_SEMTYPE  SUBJECT_NOVELTY OBJECT_CUI OBJECT_NAME  \
0  MLH1 gene|MLH1            gngm                1   C0038792    Sulindac   
1             APC            gngm                1   C0038792    Sulindac   
2             APC            gngm                1   C0038792    Sulindac   

  OBJECT_SEMTYPE  OBJECT_NOVELTY                            SUB_PRED_OBJ  \
0           orch               1  MLH1 gene|MLH1:INTERACTS_WITH:Sulindac   
1           orch               1             APC:INTERACTS_WITH:Sulindac   
2           orch               1             APC:INTERACTS_WITH:Sulindac   

  NORMALIZED_SECTION_HEADER SECTION_HEADER SENT_START_INDEX  \
0                    """"""   