# Checking novel genes for most common approved disease treated

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore')

In [2]:
def ot_drug_query(query_string, chemid, base_url):
    # Build query string to get general information about AR and genetic constraint and tractability assessments 
    # Set variables object of arguments to be passed to endpoint
    variables = {"chemblId": chemid}
    
    # Perform POST request and check status code of response
    r = requests.post(base_url, json={"query": query_string, "variables": variables})
    
    
    # Transform API response from JSON into Python dictionary and print in console
    api_response = json.loads(r.text)
    
    if r.status_code != 200:
        return r.status_code
    else:
        return api_response

In [3]:
# disease EFO Ids
dx_efo = {'AD': 'MONDO_0004975', 'LOAD' : 'EFO_1001870', 'ALS': 'MONDO_0004976', 'ALSt4': 'MONDO_0011223', 'FTD&|ALS' : 'MONDO_0007105','FTD': 'MONDO_0017276', 'LBD':'EFO_0006792', 'PD':'MONDO_0005180', 'Parkinsonian': 'MONDO_0021095','PSP': 'MONDO_0010997'} 

In [4]:
# read in novel class genes
hits = pd.read_csv('t1t2_hits_drugsthresh2.csv')

t2_genes = ['MAPT', 'ADORA2B', 'KCNN4']
novel_hits = hits.query('tier == "Tier1" & Gene != @t2_genes')
novel_hits

Unnamed: 0,Omic,Disease,Gene,probeID,topRSID,b_SMR,se_SMR,p_SMR,p_SMR_multi,p_HEIDI,gene_name,interaction_types,drug_claim_name,drug_claim_primary_name,drug_name,chemblid,tier
0,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
1,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,CDX-1135,CDX-1135,cdx-1135,CHEMBL4297720,Tier1
2,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
3,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,CDX-1135,CDX-1135,cdx-1135,CHEMBL4297720,Tier1
4,Cortex_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.147259,0.013974,5.752421e-26,3.826448e-15,0.060285,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3132,blood_eQTLgen,AD,RABEP1,ENSG00000029725,rs1065483,0.026696,0.013352,4.556008e-02,8.895786e-09,0.012615,RABEP1,,valproic acid,valproic acid,valproic acid,none,Tier1
3133,blood_eQTLgen,PD,BST1,ENSG00000109743,rs34559912,-0.258321,0.035240,2.295035e-13,7.480451e-11,0.158446,BST1,,"Beta blocking agents, selective","Beta blocking agents, selective","beta blocking agents, selective",none,Tier1
3134,blood_eQTLgen,PD,ITGAX,ENSG00000140678,rs11574631,0.053384,0.026491,4.388470e-02,2.591740e-07,0.083626,ITGAX,,PROTEASE INHIBITORS,PROTEASE INHIBITORS,protease inhibitors,none,Tier1
3135,blood_eQTLgen,PD,ITGAX,ENSG00000140678,rs11574631,0.053384,0.026491,4.388470e-02,2.591740e-07,0.083626,ITGAX,,PMA,PMA,pma,none,Tier1


In [5]:
novel_chembl = novel_hits.query('chemblid != "none"')
novel_chembl

Unnamed: 0,Omic,Disease,Gene,probeID,topRSID,b_SMR,se_SMR,p_SMR,p_SMR_multi,p_HEIDI,gene_name,interaction_types,drug_claim_name,drug_claim_primary_name,drug_name,chemblid,tier
0,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
1,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,CDX-1135,CDX-1135,cdx-1135,CHEMBL4297720,Tier1
2,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
3,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,CDX-1135,CDX-1135,cdx-1135,CHEMBL4297720,Tier1
4,Cortex_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.147259,0.013974,5.752421e-26,3.826448e-15,0.060285,CR1,,eculizumab,eculizumab,eculizumab,CHEMBL1201828,Tier1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2213,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,docetaxel,docetaxel,docetaxel,CHEMBL92,Tier1
2214,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,platinum,platinum,platinum,CHEMBL1235478,Tier1
2215,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,cisplatin,cisplatin,cisplatin,CHEMBL11359,Tier1
2216,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,oxaliplatin,oxaliplatin,oxaliplatin,CHEMBL414804,Tier1


In [6]:
# create list of novel gene drugs with chemblids
nov_chemblids = novel_chembl.chemblid.unique()

In [7]:
# query to get list of approved diseases where drug can be used to treat
query_string = """
      query drugs($chemblId: String!){
        drug(chemblId: $chemblId){
        name
        id
        approvedIndications
          }
        }
    """

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

In [8]:
chembl_dict = {}
no_drug_chembl = []
no_linked_dx = []
all_dx = []
for cid in nov_chemblids:
    # run ot query
    out = ot_drug_query(query_string,cid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
         # list to hold associated diseases
        asc_dx = []
        # check if chembleid returned any drugs OT recognizes
        if type(out['data']['drug']) == type(None):
            no_drug_chembl.append(cid)
            # if no approved uses put in seperate list
        elif type(out['data']['drug']['approvedIndications']) == type(None):
            no_linked_dx.append(cid)
        else:
            # approved indications list
            all_dx.append(out['data']['drug']['approvedIndications'])

In [9]:
# flatten list
all_dx_flat = [x for sub in all_dx for x in sub]

from collections import Counter, OrderedDict
dx_count = Counter(all_dx_flat)
#OrderedDict(sorted(dx_count.items()))

In [10]:
# need to get chemblids for drugs approved for use in MONDO_0004992 (Cancer)
chemblid_cancer = []
no_drug_chembl = []
no_linked_dx = []
all_dx = []
for cid in nov_chemblids:
    # run ot query
    out = ot_drug_query(query_string,cid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
         # list to hold associated diseases
        asc_dx = []
        # check if chembleid returned any drugs OT recognizes
        if type(out['data']['drug']) == type(None):
            no_drug_chembl.append(cid)
            # if no approved uses put in seperate list
        elif type(out['data']['drug']['approvedIndications']) == type(None):
            no_linked_dx.append(cid)
        else:
            # approved indications list
            approved_uses = out['data']['drug']['approvedIndications']
            if 'MONDO_0004992' in approved_uses:
                chemblid_cancer.append(cid)

In [11]:
len(chemblid_cancer)

53

In [12]:
# remove hits that match up to cancer chemblids
cancer_hits = novel_chembl.query('chemblid == @chemblid_cancer')
cancer_hits

Unnamed: 0,Omic,Disease,Gene,probeID,topRSID,b_SMR,se_SMR,p_SMR,p_SMR_multi,p_HEIDI,gene_name,interaction_types,drug_claim_name,drug_claim_primary_name,drug_name,chemblid,tier
64,Cerebellum_metaBrain,AD,ACE,ENSG00000159640,rs4459609,-0.130730,0.022878,1.102263e-08,6.234866e-08,0.550246,ACE,,VORINOSTAT,VORINOSTAT,vorinostat,CHEMBL98,Tier1
168,blood_mcrae,AD,ACE,cg21657705,rs4353,-0.141431,0.024177,4.921966e-09,3.370011e-09,0.023251,ACE,,VORINOSTAT,VORINOSTAT,vorinostat,CHEMBL98,Tier1
272,Cortex_metaBrain,AD,ACE,ENSG00000159640,rs4291,-0.201045,0.029651,1.198726e-11,1.386269e-08,0.272148,ACE,,VORINOSTAT,VORINOSTAT,vorinostat,CHEMBL98,Tier1
376,psychEncode_prefrontal_cortex,AD,ACE,ENSG00000159640,rs6504163,-0.461543,0.091628,4.725424e-07,4.725424e-07,0.241039,ACE,,VORINOSTAT,VORINOSTAT,vorinostat,CHEMBL98,Tier1
480,blood_eQTLgen,AD,ACE,ENSG00000159640,rs4277405,-0.901763,0.154254,5.036854e-09,6.164485e-08,0.573264,ACE,,VORINOSTAT,VORINOSTAT,vorinostat,CHEMBL98,Tier1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2211,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,PACLITAXEL,PACLITAXEL,paclitaxel,CHEMBL428647,Tier1
2213,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,docetaxel,docetaxel,docetaxel,CHEMBL92,Tier1
2215,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,cisplatin,cisplatin,cisplatin,CHEMBL11359,Tier1
2216,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,oxaliplatin,oxaliplatin,oxaliplatin,CHEMBL414804,Tier1


In [13]:
cancer_genes = cancer_hits.Gene.unique()
cancer_genes

array(['ACE', 'STAG3', 'EPHX2', 'EPHA1', 'PSMC3', 'PSORS1C1', 'CDSN',
       'POU5F1', 'EGFR', 'CD38', 'EPHB4', 'DNTT', 'MINK1', 'ERCC2'],
      dtype=object)

In [14]:
len(cancer_genes)

14

In [15]:
def ot_disease_query(query_string, efo, base_url):
    # Build query string to get general information about AR and genetic constraint and tractability assessments 
    # Set variables object of arguments to be passed to endpoint
    variables = {"efoId": efo}
    
    # Perform POST request and check status code of response
    r = requests.post(base_url, json={"query": query_string, "variables": variables})
    
    
    # Transform API response from JSON into Python dictionary and print in console
    api_response = json.loads(r.text)
    
    if r.status_code != 200:
        return r.status_code
    else:
        return api_response

In [16]:
# get Disease names
dx_name_query = """
query disease($efoId: String!){
    disease(efoId: $efoId){
        id
        name
          }
        }
    """

In [17]:
id_dx_map = {}
for dx in dx_count.keys():
    # run query
    out = ot_disease_query(dx_name_query, dx, base_url)
    
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
        # get disease name
        dx_name = out['data']['disease']['name']
        
        # append to dict
        id_dx_map[dx] = dx_name

In [18]:
# make df
df = pd.DataFrame(dx_count.items())

In [19]:
df['Disease Name'] = df[0].map(id_dx_map)

In [20]:
df.sort_values(1, ascending = False)

Unnamed: 0,0,1,Disease Name
51,EFO_0000616,63,neoplasm
54,MONDO_0004992,53,cancer
13,EFO_0000537,32,hypertension
10,EFO_0000319,30,cardiovascular disease
143,EFO_0003060,21,non-small cell lung carcinoma
...,...,...,...
120,EFO_0000729,1,ulcerative colitis
121,EFO_0000464,1,emphysema
122,EFO_0006505,1,chronic bronchitis
123,EFO_0000341,1,chronic obstructive pulmonary disease
