# MELODI Presto Analysis Example

In [1]:
import json
import pandas as pd
import requests
import matplotlib.pyplot as plt

## Configure parameters

In [2]:
API_URL = "https://melodi-presto.mrcieu.ac.uk/api/"

requests.get(f"{API_URL}/status").json()

True

### Set up some functions

In [3]:
#overlap
def overlap(q1,q2):
    endpoint = "/overlap/"
    url = f"{API_URL}{endpoint}"
    params = {
        "x": q1,
        "y": q2,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    overlap_df = (
        pd.json_normalize(res['data'])
    )
    overlap_df
    return overlap_df

### COVID-19 risk factors



In [16]:
q1=['risk factors']
q2=['covid-19','coronavirus']

overlap_df = overlap(q1,q2)

In [17]:
overlap_counts = overlap_df.groupby(['set_y','object_type_x'])['object_name_x'].value_counts()
overlap_counts

set_y        object_type_x  object_name_x                       
coronavirus  dsyn           Infection                               484
                            Diabetes                                220
                            Hypertensive disease                     99
                            Pneumonia                                50
                            Acute myocardial infarction              18
                            Respiratory Distress Syndrome, Adult      9
                            Inflammatory Bowel Diseases               6
                            Syndrome                                  5
                            Septicemia                                3
                            Autoimmune Diseases                       2
             gngm           Cyclosporine                              1
                            Phosphotransferases                       1
covid-19     dsyn           Hypertensive disease                    297

### COVID-19 risk factors
Factors associated with hospitalization and critical illness among 4,103 patients with COVID-19 disease in New York City:

https://www.medrxiv.org/content/10.1101/2020.04.08.20057794v1

In [44]:
q1=['C-reactive protein','ferritin','d-dimer']
q2=['covid-19','coronavirus']

overlap_df = overlap(q1,q2)

Unnamed: 0,triple_x,subject_name_x,subject_type_x,subject_id_x,predicate_x,object_name_x,object_type_x,object_id_x,localCount_x,localTotal_x,...,object_type_y,object_id_y,localCount_y,localTotal_y,globalCount_y,globalTotal_y,odds_y,pval_y,pmids_y,set_y
0,C-reactive protein:PREDISPOSES:Cardiovascular ...,C-reactive protein,aapp,C0006560,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,282,62335,...,dsyn,C5203670,2,740,2,6862606,9298.921409,6.960000e-08,32281055 32304798,covid-19
1,C-reactive protein:ASSOCIATED_WITH:Cardiovascu...,C-reactive protein,aapp,C0006560,ASSOCIATED_WITH,Cardiovascular Diseases,dsyn,C0007222,169,62335,...,dsyn,C5203670,2,740,2,6862606,9298.921409,6.960000e-08,32281055 32304798,covid-19
2,Metabolic syndrome:PREDISPOSES:Cardiovascular ...,Metabolic syndrome,dsyn,C0948265,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,73,62335,...,dsyn,C5203670,2,740,2,6862606,9298.921409,6.960000e-08,32281055 32304798,covid-19
3,Obesity:PREDISPOSES:Cardiovascular Diseases,Obesity,dsyn,C0028754,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,46,62335,...,dsyn,C5203670,2,740,2,6862606,9298.921409,6.960000e-08,32281055 32304798,covid-19
4,Hypertensive disease:PREDISPOSES:Cardiovascula...,Hypertensive disease,dsyn,C0020538,PREDISPOSES,Cardiovascular Diseases,dsyn,C0007222,35,62335,...,dsyn,C5203670,2,740,2,6862606,9298.921409,6.960000e-08,32281055 32304798,covid-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
872,Ferritin:INTERACTS_WITH:Monoclonal Antibodies,Ferritin,aapp,C0015879,INTERACTS_WITH,Monoclonal Antibodies,aapp,C0003250,3,16878,...,aapp,C0065827,2,4335,2,6862606,1583.799677,2.388600e-06,30877355,coronavirus
873,Septicemia:CAUSES:Blood Coagulation Disorders,Septicemia,dsyn,C0036690,CAUSES,Blood Coagulation Disorders,dsyn,C0005779,8,5356,...,dsyn,C5203670,3,740,3,6862606,9311.537313,0.000000e+00,32302442 32302462,covid-19
874,Pulmonary Embolism:NEG_COEXISTS_WITH:Blood Coa...,Pulmonary Embolism,dsyn,C0034065,NEG_COEXISTS_WITH,Blood Coagulation Disorders,dsyn,C0005779,2,5356,...,dsyn,C5203670,3,740,3,6862606,9311.537313,0.000000e+00,32302442 32302462,covid-19
875,Chronic Obstructive Airway Disease:COEXISTS_WI...,Chronic Obstructive Airway Disease,dsyn,C0024117,COEXISTS_WITH,Respiratory Failure,dsyn,C1145670,7,5356,...,dsyn,C5203670,3,740,3,6862606,9311.537313,0.000000e+00,32299776 32277836 32320677,covid-19


In [51]:
overlap_counts = overlap_df.groupby(['set_x','set_y','object_type_x'])['object_name_x'].value_counts()
overlap_counts.to_csv('test.csv')

### Psoriasis

https://www.cell.com/ajhg/fulltext/S0002-9297(12)00157-7

In [18]:
q1=['AP1S3','IL36RN','CARD14']
q2=['Psoriasis']

overlap_df = overlap(q1,q2)

In [19]:
overlap_counts = overlap_df.groupby(['set_x','object_type_x'])['object_name_x'].value_counts()
overlap_counts

set_x   object_type_x  object_name_x                
ap1s3   gngm           CARD14                             5
card14  aapp           NF-kappa B                         3
        dsyn           Psoriasis                        861
                       Autoimmune Diseases               10
                       skin disorder                      8
        gngm           NF-kappa B                        21
                       CARD14                             5
il36rn  aapp           Interleukin Receptor               1
        dsyn           Arthritis, Psoriatic              33
                       Pustulosis of Palms and Soles      3
        gngm           CARD14                             5
                       Interleukin Receptor               1
Name: object_name_x, dtype: int64

### Drug repurposing

https://www.medrxiv.org/content/10.1101/2020.05.07.20093286v1

In [20]:
q1=['DHODH', 'ITGB5', 'JAK2']
q2=['Leflunomide','Cilengitide','Baricitinib']

overlap_df = overlap(q1,q2)

In [21]:
overlap_counts = overlap_df.groupby(['set_x','set_y','object_type_x'])['object_name_x'].value_counts()
overlap_counts

set_x  set_y        object_type_x  object_name_x                     
dhodh  leflunomide  aapp           Phosphotransferases                    1
                    dsyn           Rheumatoid Arthritis                   9
                    gngm           Dihydroorotate dehydrogenase          50
                                   Dihydroorotate dehydrogenase|DHODH     1
                    orch           leflunomide                           92
                                   Pyrimidine                            12
jak2   baricitinib  aapp           Janus kinase                           4
                                   Janus kinase 1|JAK1                    4
                                   cytokine                               3
                    gngm           Janus kinase                          11
                                   Janus kinase 1|JAK1                    4
                                   cytokine                               1
                  

### Other drug repurposing

https://www.nature.com/articles/nrd.2018.168/tables/1lo

In [22]:
q1=['(Ketoconazole) AND (("1900"[Date - Create] : "2000"[Date - Create]))']
q2=['(Cushing syndrome) AND (("1900"[Date - Create] : "2000"[Date - Create]))']

overlap_df = overlap(q1,q2)

In [23]:
overlap_counts = overlap_df.groupby(['object_type_x'])['object_name_x'].value_counts()
overlap_counts

object_type_x  object_name_x                  
aapp           Corticotropin|POMC                  62
               Glucocorticoid Receptor              1
dsyn           Cushing Syndrome                    36
               Hypertensive disease                 6
               Adrenal Gland Hyperfunction          3
               Infection                            3
               Mycoses                              3
               Adrenal gland hypofunction           1
               Histoplasmosis                       1
gngm           Corticotropin|POMC                  31
               Corticotropin-Releasing Hormone     20
horm           Hydrocortisone                     138
               Adrenal Cortex Hormones             14
               Testosterone                        14
               Corticotropin                       13
               Androgens                            6
               Estradiol                            2
               Steroid hormone     

##### Changes over time

Loop through 20 years and look at change

In [6]:
#
#year_range=range(1990,2021)
year_range=range(1990,1992)

master_df=pd.DataFrame()

for y in year_range:
    q1=['(Ketoconazole) AND (("1900"[Date - Create] : "'+str(y).zfill(2)+'"[Date - Create]))']
    q2=['(Cushing syndrome) AND (("1900"[Date - Create] : "'+str(y).zfill(2)+'"[Date - Create]))']
    print(y)
    overlap_df = overlap(q1,q2)
    overlap_counts = overlap_df.groupby(['object_type_x'])['object_name_x'].value_counts()
    overlap_counts_df = overlap_counts.rename_axis(['type','name']).reset_index(name='counts').sort_values(by='counts',ascending=False)
    #add year
    overlap_counts_df['year']=y
    #add relative counts
    overlap_counts_df['rel_counts']=(overlap_counts_df['counts']/sum(overlap_counts_df['counts']))*100
    print(overlap_counts_df.shape)
    #print(overlap_counts_df)
    master_df = master_df.append(overlap_counts_df)
    
print(master_df)

1990
(14, 5)
1991
(15, 5)
    type                                   name  counts  year  rel_counts
13  orch                           Ketoconazole      72  1990   32.727273
0   aapp                     Corticotropin|POMC      48  1990   21.818182
6   horm                         Hydrocortisone      48  1990   21.818182
5   gngm                     Corticotropin|POMC      24  1990   10.909091
7   horm                Adrenal Cortex Hormones       8  1990    3.636364
8   horm                           Testosterone       5  1990    2.272727
9   horm                              Androgens       4  1990    1.818182
1   dsyn                         Histoplasmosis       2  1990    0.909091
2   dsyn                                Mycoses       2  1990    0.909091
3   dsyn  Pituitary-dependent Cushing's disease       2  1990    0.909091
10  horm                        Steroid hormone       2  1990    0.909091
4   dsyn             Adrenal gland hypofunction       1  1990    0.454545
11  horm    

In [7]:
master_df = master_df.pivot(index='name', columns='year', values='rel_counts')

master_df.plot()

ValueError: Index contains duplicate entries, cannot reshape