# Drug Annotation and Inferences

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore')

In [2]:
def ot_drug_query(query_string, chemid, base_url):
    # Build query string to get general information about AR and genetic constraint and tractability assessments 
    # Set variables object of arguments to be passed to endpoint
    variables = {"chemblId": chemid}
    
    # Perform POST request and check status code of response
    r = requests.post(base_url, json={"query": query_string, "variables": variables})
    
    
    # Transform API response from JSON into Python dictionary and print in console
    api_response = json.loads(r.text)
    
    if r.status_code != 200:
        return r.status_code
    else:
        return api_response

In [3]:
# read in significant drug target results
# ma replicated 
ma_hits2 = pd.read_csv('sig_genes_ma_chemblthresh2.csv')

# all sig hits
all_hits2 = pd.read_csv('all_sig_genes_chemblthresh2.csv')

In [4]:
# disease EFO Ids
dx_efo = {'AD': 'MONDO_0004975', 'LOAD' : 'EFO_1001870', 'ALS': 'MONDO_0004976', 'ALSt4': 'MONDO_0011223', 'FTD&|ALS' : 'MONDO_0007105','FTD': 'MONDO_0017276', 'LBD':'EFO_0006792', 'PD':'MONDO_0005180', 'Parkinsonian': 'MONDO_0021095','PSP': 'MONDO_0010997'} 

## 1. MultiAncestry replicated hits Thresh 2

In [5]:
# extract chemblid list
ma_chembl = list(ma_hits2.chemblid.unique())
len(ma_chembl)

1

In [6]:
# query to get list of approved diseases where drug can be used to treat
query_string = """
      query drugs($chemblId: String!){
        drug(chemblId: $chemblId){
        name
        id
        approvedIndications
          }
        }
    """

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

In [7]:
chembl_dict = {}
no_drug_chembl = []
no_linked_dx = []
for cid in ma_chembl:
    # run ot query
    out = ot_drug_query(query_string,cid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
         # list to holde associated diseases
        asc_dx = []
        # check if chembleid returned any drugs OT recognizes
        if type(out['data']['drug']) == type(None):
            no_drug_chembl.append(cid)
            # if no approved uses put in seperate list
        elif type(out['data']['drug']['approvedIndications']) == type(None):
            no_linked_dx.append(cid)
        else:
            # get name of drug
            #drug_name = out['data']['drug']['name']

            # number of approved indications
            num_dx = len(out['data']['drug']['approvedIndications'])
            for x in range(num_dx):
                efo_id = out['data']['drug']['approvedIndications'][0]

                    # check if 
                if any([True for k,v, in dx_efo.items() if v == efo_id]):
                    asc_dx.append([k for k,v, in dx_efo.items() if v == efo_id][0])
            chembl_dict[cid] = asc_dx

## 2. All Significant Hits Threshold 2

In [10]:
# extract chemblid list
all_chembl = list(all_hits2.chemblid.unique())
len(all_chembl)

856

In [11]:
chembl_dict_all = {}
no_drug_chembl_all = []
no_linked_dx_all = []
for cid in all_chembl:
    # run ot query
    out = ot_drug_query(query_string,cid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
         # list to holde associated diseases
        asc_dx_all = []
        # check if chembleid returned any drugs OT recognizes
        if type(out['data']['drug']) == type(None):
            no_drug_chembl_all.append(cid)
            # if no approved uses put in seperate list
        elif type(out['data']['drug']['approvedIndications']) == type(None):
            no_linked_dx_all.append(cid)
        else:
            # number of approved indications
            num_dx = len(out['data']['drug']['approvedIndications'])
            for x in range(num_dx):
                efo_id = out['data']['drug']['approvedIndications'][x]

                    # check if 
                if any([True for k,v, in dx_efo.items() if v == efo_id]):
                    asc_dx_all.append([k for k,v, in dx_efo.items() if v == efo_id][0])
            chembl_dict_all[cid] = asc_dx_all

In [12]:
# clean out output dictionary in order to get chemblIDs that returned an NDD related approved indication
chembl_dict_ndd_all = {k: v for k, v in chembl_dict_all.items() if v}
# create list of chemblid that returned NDD association
chembl_ndd_all = list(chembl_dict_ndd_all.keys())

# try and get t1 hits together
# clean out output dictionary in order to get chemblIDs that DID NOT returned an NDD related approved indication
chembl_dict_nondd_all = {k: v for k, v in chembl_dict_all.items() if not v}
# create list of chemblid that returned NO NDD association
chembl_nondd_all = list(chembl_dict_nondd_all.keys())

In [19]:
len(no_linked_dx_all)

79

In [20]:
len(chembl_nondd_all)

370

In [21]:
len(no_drug_chembl_all)

403

In [22]:
# use list of chembl IDs to find tier 2 gene hits
t2_hits_all = all_hits2.query('chemblid == @chembl_ndd_all')

In [24]:
t2_hits_all.Disease.unique()

array(['AD', 'PD', 'PSP'], dtype=object)

In [25]:
# use list of chembl IDs to find tier 1 gene hits
t1_hits_all = all_hits2.query('chemblid != @chembl_ndd_all')

t1_hits_all

Unnamed: 0,Omic,Disease,Gene,probeID,topRSID,b_SMR,se_SMR,p_SMR,p_SMR_multi,p_HEIDI,gene_name,interaction_types,drug_claim_name,drug_claim_primary_name,drug_name,chemblid
0,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828
1,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,CDX-1135,CDX-1135,CDX-1135,CHEMBL4297720
2,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828
3,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,CDX-1135,CDX-1135,CDX-1135,CHEMBL4297720
4,Cortex_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.147259,0.013974,5.752421e-26,3.826448e-15,0.060285,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16132,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,docetaxel,docetaxel,DOCETAXEL,CHEMBL92
16133,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,platinum,platinum,PLATINUM,CHEMBL1235478
16134,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,cisplatin,cisplatin,CISPLATIN,CHEMBL11359
16135,blood_eQTLgen,AD,ERCC2,ENSG00000104884,rs171140,0.128369,0.036800,4.861232e-04,1.751872e-06,0.039685,ERCC2,,oxaliplatin,oxaliplatin,OXALIPLATIN,CHEMBL414804


In [26]:
t1_hits_all.to_csv('tier1hits_all_OT_thresh2.csv', index = None)

In [27]:
# load in hits that didn't have chemblids
nochembl_sig = pd.read_csv('all_sig_genes_nochemblthresh2.csv')

In [28]:
# concat significant hits that had no chemblids but were therapeutic to  t1 hits with chemblids
t1_hits_all2 = pd.concat([t1_hits_all, nochembl_sig])

t2_genes = ['MAPT', 'KCNN4', 'ADORA2B']
# remove any t2 gene hits
t1_hits_all2 = t1_hits_all2.query('Gene != @t2_genes')

In [29]:
t1_hits_all2.Disease.unique()

array(['AD', 'PD', 'LBD', 'PSP'], dtype=object)

In [30]:
len(t1_hits_all2.Gene.unique())

41

In [32]:
# export 
t1_hits_all2.to_csv('tier1hits_all_thresh2.csv', index = None)

In [33]:
# add label for tier categories
t1_hits_all2['tier'] = 'Tier1'

t2_hits_all['tier'] = 'Tier2'

drug_hits = pd.concat([t1_hits_all2,t2_hits_all])
drug_hits

Unnamed: 0,Omic,Disease,Gene,probeID,topRSID,b_SMR,se_SMR,p_SMR,p_SMR_multi,p_HEIDI,gene_name,interaction_types,drug_claim_name,drug_claim_primary_name,drug_name,chemblid,tier
0,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828,Tier1
1,Cerebellum_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.245292,0.045868,8.901474e-08,1.723238e-08,0.109188,CR1,,CDX-1135,CDX-1135,CDX-1135,CHEMBL4297720,Tier1
2,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828,Tier1
3,Basalganglia_metaBrain,AD,CR1,ENSG00000203710,rs6697005,0.165833,0.029748,2.479933e-08,2.479933e-08,0.078615,CR1,,CDX-1135,CDX-1135,CDX-1135,CHEMBL4297720,Tier1
4,Cortex_metaBrain,AD,CR1,ENSG00000203710,rs1830763,0.147259,0.013974,5.752421e-26,3.826448e-15,0.060285,CR1,,eculizumab,eculizumab,ECULIZUMAB,CHEMBL1201828,Tier1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13913,blood_mcrae,PSP,MAPT,cg05772917,rs11012,5.231440,0.831232,3.102029e-10,6.220856e-11,0.136632,MAPT,,CARBIDOPA,CARBIDOPA,CARBIDOPA,CHEMBL1200748,Tier2
14266,blood_mcrae,PSP,MAPT,cg05772917,rs11012,5.231440,0.831232,3.102029e-10,6.220856e-11,0.136632,MAPT,,APOMORPHINE,APOMORPHINE,APOMORPHINE,CHEMBL53,Tier2
16017,Muscle_Skeletal,PD,ADORA2B,ENSG00000170425,rs1045599,0.081000,0.016160,5.378991e-07,2.729232e-06,0.017355,ADORA2B,,ISTRADEFYLLINE,ISTRADEFYLLINE,ISTRADEFYLLINE,CHEMBL431770,Tier2
16031,Muscle_Skeletal,PD,ADORA2B,ENSG00000170425,rs1045599,0.081000,0.016160,5.378991e-07,2.729232e-06,0.017355,ADORA2B,antagonist,178102242,ISTRADEFYLLINE,ISTRADEFYLLINE,CHEMBL431770,Tier2


In [34]:
drug_hits['drug_name'] = drug_hits.drug_claim_primary_name.str.lower()

drug_hits.drop_duplicates(inplace = True)

In [35]:
# export all t1 and t2 hits
drug_hits.to_csv('t1t2_hits_drugsthresh2.csv', index = None)

In [6]:
# export
drug_hits = pd.read_csv('t1t2_hits_drugsthresh2.csv')