# Annotated Gene Networks via Open Targets
1. Obtain network partners for Tier 1/ Novel Gene + Unknown gene targets
2. Get information on relationship
3. Find drugs that target any identified companion genes

In [1]:
import requests
import json
import pandas as pd
import subprocess
import sys
import numpy as np
import os
import glob
#from omicsynth_func import *
import warnings
warnings.simplefilter(action='ignore')

In [2]:
def ot_query(query_string, ensg, base_url):
    # Build query string to get general information about AR and genetic constraint and tractability assessments 
    # Set variables object of arguments to be passed to endpoint
    variables = {"ensgId": ensg, "sourceDatabase": "signor"}
    
    # Perform POST request and check status code of response
    r = requests.post(base_url, json={"query": query_string, "variables": variables})
    
    
    # Transform API response from JSON into Python dictionary and print in console
    api_response = json.loads(r.text)
    
    if r.status_code != 200:
        return r.status_code
    else:
        return api_response

## 1. Load in Unknown Gene data

In [56]:
# read in significant unknown genes
unknown = pd.read_csv('./Unknown_gene_hits_sig.csv')

In [4]:
# number of unique genes in tier 1 hits 
len(unknown.Gene.unique())

115

In [5]:
# number of genes we have ENSG IDs for
len(unknown[unknown.probeID.str.contains('ENSG')].Gene.unique())

72

In [172]:
# need to hand annotate ensgIDs for missing genes 
need_ensg = list(set(unknown.Gene.unique()) - set(unknown[unknown.probeID.str.contains('ENSG')].Gene.unique())) # checking for any genes that may have ensg but also cg probe id
#need_ensg

# use pyensembl to get ENSG
import pyensembl
gene_ensg_dict = {}
ensembl = pyensembl.EnsemblRelease(75)

for gene in need_ensg:
    #print(gene)
    # try pulling data using probeID
    try:
        ensg_num = ensembl.gene_ids_of_gene_name(gene)
        gene_ensg_dict[gene] = ensg_num[0]
    except:
        gene_ensg_dict[gene] = 'no ENSG'

gene_ensg_dict = dict(zip(gene_ensg_dict.values(), gene_ensg_dict.keys()))
#gene_ensg_dict

# df that only has genes with ensgids
ensg_hits = unknown[unknown.probeID.str.contains('ENSG')]

# create dictionary for ensg we already have
hits_ensg = dict(zip(ensg_hits['probeID'], ensg_hits['Gene']))

ensg_gene_dict = {**hits_ensg, **gene_ensg_dict}
#ensg_gene_dict

In [173]:
# query to get list of companion genes
query_string = """
     query InteractionsSectionQuery(
  $ensgId: String!
  $sourceDatabase: String
  $index: Int = 0
  $size: Int = 10
) {
  target(ensemblId: $ensgId) {
    id
    approvedName
    approvedSymbol

    interactions(
      sourceDatabase: $sourceDatabase
      page: { index: $index, size: $size }
    ) {
      count
      rows {
        intA
        intABiologicalRole
        targetA {
          id
          approvedSymbol
        }
        intB
        intBBiologicalRole
        targetB {
          id
          approvedSymbol
        }
      }
    }
  }
}
    """

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

In [174]:
# get netowrk partners + direction of relationship for unknown genes
net_roles = []
net_genes = [] # list to hold all network partners
for eid, gene in ensg_gene_dict.items():
     # create list to hold any network partners
    gene_part = []
    # run ot query
    out = ot_query(query_string, eid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
        if isinstance(out['data']['target'], type(None)):
            gene_part.append('No data on gene')
        elif isinstance(out['data']['target']['interactions'], type(None)):
            gene_part.append('No curated network partners')
        else:
            # number of curated network partners
            num_dx = len(out['data']['target']['interactions']['rows'])
             # get symbol names + ensgids
            for x in range(num_dx):
                symbol = out['data']['target']['interactions']['rows'][x]['targetB']
                
                tar_a = out['data']['target']['interactions']['rows'][x]['intABiologicalRole']
                if isinstance(symbol, type(None)):
                    continue
                else:
                    tar_b = symbol['approvedSymbol']
                    net_genes.append(tar_b) # add gene to the list of all network partners
                    if tar_a == 'regulator':
                        net_roles.append({'Regulator': gene, 'Regulator_level': 'Difficult', 'Target':  tar_b, 'Target_level': 'Partner', 'GOI_role': tar_a, 'Gene': gene, 'Partner':tar_b})
                    elif tar_a == 'regulator target':
                        net_roles.append({'Regulator': tar_b, 'Regulator_level': 'Partner', 'Target':  gene, 'Target_level': 'Difficult', 'GOI_role': tar_a, 'Gene': gene, 'Partner':tar_b})
                    else:
                        break
net_df = pd.DataFrame(net_roles)

net_df['relate'] = 'regulates'

In [175]:
net_df

Unnamed: 0,Regulator,Regulator_level,Target,Target_level,GOI_role,Gene,Partner,relate
0,FOXP1,Partner,HIP1R,Difficult,regulator target,HIP1R,FOXP1,regulates
1,GRN,Difficult,TNFRSF1A,Partner,regulator,GRN,TNFRSF1A,regulates
2,SOSTDC1,Partner,WNT3,Difficult,regulator target,WNT3,SOSTDC1,regulates
3,WNT3,Difficult,LRP6,Partner,regulator,WNT3,LRP6,regulates
4,WNT3,Difficult,FZD3,Partner,regulator,WNT3,FZD3,regulates
...,...,...,...,...,...,...,...,...
61,KIF1C,Difficult,RAB6B,Partner,regulator,KIF1C,RAB6B,regulates
62,KIF1C,Difficult,RAB6C,Partner,regulator,KIF1C,RAB6C,regulates
63,CD2AP,Difficult,ACTB,Partner,regulator,CD2AP,ACTB,regulates
64,FAM83G,Partner,CD2AP,Difficult,regulator target,CD2AP,FAM83G,regulates


In [14]:
net_df.to_csv('difficult_net_dir_relate.csv', index = None)

### Want to obtain drugs that target each unknown genes partner/copmanion genes (if any)

In [11]:
# query to get list of approved diseases where drug can be used to treat
query_string = """
     query InteractionsSectionQuery(
  $ensgId: String!) {
  target(ensemblId: $ensgId) {
    id
    approvedSymbol
    knownDrugs{
      rows{
        drugId
        prefName
        disease {
          name
        }
      }
    }
  }
}
    """

# need to get ensg id for genes
net_gene_ensg = {}
for gene in net_genes:
    #print(gene)
    # try pulling data using probeID
    try:
        ensg_num = ensembl.gene_ids_of_gene_name(gene)
        net_gene_ensg[gene] = ensg_num[0]
    except:
        net_gene_ensg[gene] = 'no ENSG'

# remove genes with no ENSG ID
net_gene_ensg_clean = {k: v for k, v in net_gene_ensg.items() if v != 'no ENSG'}


In [31]:
pg_drugdx = {} # dictionary to hold gene and resulting drug-disease interactions
part_druggability_df = [] # will hold dicts for each row
for ng in net_gene_ensg_clean.values():
    out = ot_query(query_string,ng, base_url)
    # list to hold any druggable gene partners drugs
    part_druggability = []
    if isinstance(out, int):
        print('something broke')
    
    elif isinstance(out, dict):
        if isinstance(out['data']['target'], type(None)):
            print(f'{ng} has no data')
        elif isinstance(out['data']['target']['knownDrugs'], type(None)):
            part_druggability.append('No known drugs')
        else:
            # number of curated network partners
            num_dx = len(out['data']['target']['knownDrugs']['rows'])
            
            # 
            for x in range(num_dx):
                drug_name = out['data']['target']['knownDrugs']['rows'][x]['prefName']
                
                if isinstance(drug_name, type(None)):
                    continue
                else:
                    # get disease name
                    dx_name = out['data']['target']['knownDrugs']['rows'][x]['disease']['name']
                    part_druggability_df.append({'ENSGID': ng, 'Drug' : drug_name})
                    
    # combine into dict key:value pair
    pg_drugdx[ng] = part_druggability

ENSG00000132142 has no data


In [50]:
map_swap = {v: k for k, v in net_gene_ensg_clean.items()}
df_pg_drug_dx = pd.DataFrame(part_druggability_df)
df_pg_drug_dx['Gene'] = df_pg_drug_dx['ENSGID'].map(map_swap)
df_pg_drug_dx.drop_duplicates(inplace = True)
df_pg_drug_dx

Unnamed: 0,ENSGID,Drug,Gene
0,ENSG00000067182,GSK-1995057,TNFRSF1A
1,ENSG00000173039,EDASALONEXENT,RELA
5,ENSG00000171720,VORINOSTAT,HDAC3
6,ENSG00000171720,BELINOSTAT,HDAC3
8,ENSG00000171720,PANOBINOSTAT LACTATE,HDAC3
...,...,...,...
304,ENSG00000169083,TESTOSTERONE UNDECANOATE,AR
309,ENSG00000163558,MIDOSTAURIN,PRKCI
318,ENSG00000163558,SOTRASTAURIN,PRKCI
320,ENSG00000163558,UCN-01,PRKCI


In [51]:
# export results for neo4j
df_pg_drug_dx.to_csv('unknown_genes_network_drugs.csv', index = None)

## 2. Novel Genes

In [134]:
novel_hits = pd.read_csv('tier1hits_all_thresh2.csv')

# remove MAPT, ADORA2B, KCNN4 which are tier 2 gene targets
rem = ['MAPT', 'ADORA2B', 'KCNN4']

hits = novel_hits.query('Gene != @rem')

In [176]:
# need to hand annotate ensgIDs for missing genes 
need_ensg = list(set(hits.Gene.unique()) - set(hits[hits.probeID.str.contains('ENSG')].Gene.unique())) # checking for any genes that may have ensg but also cg probe id

ensg_gene = {'ENSG00000146904': 'EPHA1',
 'ENSG00000099365': 'STX1B',
 'ENSG00000165916': 'PSMC3',
 'ENSG00000204531': 'POU5F1',
 'ENSG00000204539': 'CDSN',
 'ENSG00000204516': 'MICB',
 'ENSG00000198648': 'STK39',
 'ENSG00000149534': 'MS4A2',
 'ENSG00000120885': 'CLU',
 'ENSG00000204540': 'PSORS1C1',
 'ENSG00000120915': 'EPHX2',
 'ENSG00000204385': 'SLC44A4',
 'ENSG00000151224': 'MAT1A',
 'ENSG00000099364': 'FBXL19'}

# df that only has genes with ensgids
ensg_hits = hits[hits.probeID.str.contains('ENSG')]

# create dictionary for ensg we already have
hits_ensg = dict(zip(ensg_hits['probeID'], ensg_hits['Gene']))

ensg_gene_dict = {**hits_ensg, **ensg_gene}

In [136]:
# query to get list of approved diseases where drug can be used to treat
novel_query_string = """
     query InteractionsSectionQuery(
  $ensgId: String!
  $sourceDatabase: String
  $index: Int = 0
  $size: Int = 10
) {
  target(ensemblId: $ensgId) {
    id
    approvedName
    approvedSymbol

    interactions(
      sourceDatabase: $sourceDatabase
      page: { index: $index, size: $size }
    ) {
      count
      rows {
        intA
        intABiologicalRole
        targetA {
          id
          approvedSymbol
        }
        intB
        intBBiologicalRole
        targetB {
          id
          approvedSymbol
        }
      }
    }
  }
}
    """

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

In [137]:
novel_net_roles = []
novel_net_genes = []
for eid, gene in ensg_gene_dict.items():
     # create list to hold any network partners
    gene_part = []
    # run ot query
    out = ot_query(novel_query_string, eid, base_url)
    if isinstance(out, int):
        print('something broke')  
    elif isinstance(out, dict):
        if isinstance(out['data']['target']['interactions'], type(None)):
            gene_part.append('No curated network partners')
        else:
            # number of curated network partners
            num_dx = len(out['data']['target']['interactions']['rows'])
             # get symbol names + ensgids
            for x in range(num_dx):
                symbol = out['data']['target']['interactions']['rows'][x]['targetB']
                
                tar_a = out['data']['target']['interactions']['rows'][x]['intABiologicalRole']
                if isinstance(symbol, type(None)):
                    continue
                else:
                    tar_b = symbol['approvedSymbol']
                    novel_net_genes.append(tar_b) # add gene to the list of all network partners
                    if tar_a == 'regulator':
                        novel_net_roles.append({'Regulator': gene, 'Regulator_level': 'Novel', 'Target':  tar_b, 'Target_level': 'Partner', 'GOI_role': tar_a, 'Gene': gene, 'Partner':tar_b})
                    elif tar_a == 'regulator target':
                        novel_net_roles.append({'Regulator': tar_b,'Regulator_level': 'Partner', 'Target': gene, 'Target_level': 'Novel', 'GOI_role': tar_a, 'Gene': gene, 'Partner':tar_b})
                    else:
                        break
novel_net_df = pd.DataFrame(novel_net_roles)

novel_net_df['relate'] = 'regulates'

In [45]:
# query to get list of approved diseases where drug can be used to treat
novel_drug_query_string = """
     query InteractionsSectionQuery(
  $ensgId: String!) {
  target(ensemblId: $ensgId) {
    id
    approvedSymbol
    knownDrugs{
      rows{
        drugId
        prefName
        disease {
          name
        }
      }
    }
  }
}
    """

# need to get ensg id for genes
novel_net_gene_ensg = {}
for gene in novel_net_genes:
    #print(gene)
    # try pulling data using probeID
    try:
        ensg_num = ensembl.gene_ids_of_gene_name(gene)
        novel_net_gene_ensg[gene] = ensg_num[0]
    except:
        novel_net_gene_ensg[gene] = 'no ENSG'

# remove genes with no ENSG ID
novel_net_gene_ensg_clean = {k: v for k, v in novel_net_gene_ensg.items() if v != 'no ENSG'}


In [47]:
novel_pg_drugdx = {} # dictionary to hold gene and resulting drug-disease interactions
novel_part_druggability_df = [] # will hold dicts for each row
for ng in novel_net_gene_ensg_clean.values():
    out = ot_query(query_string,ng, base_url)
    # list to hold any druggable gene partners drugs
    part_druggability = []
    if isinstance(out, int):
        print('something broke')
    
    elif isinstance(out, dict):
        if isinstance(out['data']['target'], type(None)):
            print(f'{ng} has no data')
        elif isinstance(out['data']['target']['knownDrugs'], type(None)):
            part_druggability.append('No known drugs')
        else:
            # number of curated network partners
            num_dx = len(out['data']['target']['knownDrugs']['rows'])
            
            # 
            for x in range(num_dx):
                drug_name = out['data']['target']['knownDrugs']['rows'][x]['prefName']
                
                if isinstance(drug_name, type(None)):
                    continue
                else:
                    # get disease name
                    dx_name = out['data']['target']['knownDrugs']['rows'][x]['disease']['name']
                    novel_part_druggability_df.append({'ENSGID': ng, 'Drug' : drug_name})
                    
    # combine into dict key:value pair
    novel_pg_drugdx[ng] = part_druggability

In [49]:
map_swap = {v: k for k, v in novel_net_gene_ensg_clean.items()}
novel_df_pg_drug_dx = pd.DataFrame(novel_part_druggability_df)
novel_df_pg_drug_dx['Gene'] = novel_df_pg_drug_dx['ENSGID'].map(map_swap)
novel_df_pg_drug_dx.drop_duplicates(inplace = True)
novel_df_pg_drug_dx

Unnamed: 0,ENSGID,Drug,Gene
0,ENSG00000197405,AVACOPAN,C5AR1
4,ENSG00000197405,AVDORALIMAB,C5AR1
14,ENSG00000135744,GSK-2586881,AGT
18,ENSG00000101266,SILMITASERTIB,CSNK2A1
24,ENSG00000141736,PERTUZUMAB,ERBB2
...,...,...,...
519,ENSG00000001626,CROFELEMER,CFTR
520,ENSG00000001626,TEZACAFTOR,CFTR
521,ENSG00000001626,IVACAFTOR,CFTR
523,ENSG00000001626,LUMACAFTOR,CFTR


In [52]:
# export results for neo4j
novel_df_pg_drug_dx.to_csv('novel_genes_network_drugs.csv', index = None)

In [111]:
# merge novel and unknonwn drugs 
all_net_drug = pd.concat([novel_df_pg_drug_dx, df_pg_drug_dx])
all_net_drug.drop_duplicates(inplace = True)
#all_net_drug.to_csv('network_partner_genes_network_drugs.csv', index = None)

In [177]:
#novel_net_df['level'] = 'Novel'
#net_df['level'] = 'Difficult'
net_partners = pd.concat([novel_net_df, net_df]).drop_duplicates()
net_partners

Unnamed: 0,Regulator,Regulator_level,Target,Target_level,GOI_role,Gene,Partner,relate
0,C5AR1,Partner,CR1,Novel,regulator target,CR1,C5AR1,regulates
1,C5AR2,Partner,CR1,Novel,regulator target,CR1,C5AR2,regulates
2,ACE,Novel,AGT,Partner,regulator,ACE,AGT,regulates
3,CSNK2A1,Partner,ACE,Novel,regulator target,ACE,CSNK2A1,regulates
4,AGT,Partner,ACE,Novel,regulator target,ACE,AGT,regulates
...,...,...,...,...,...,...,...,...
61,KIF1C,Difficult,RAB6B,Partner,regulator,KIF1C,RAB6B,regulates
62,KIF1C,Difficult,RAB6C,Partner,regulator,KIF1C,RAB6C,regulates
63,CD2AP,Difficult,ACTB,Partner,regulator,CD2AP,ACTB,regulates
64,FAM83G,Partner,CD2AP,Difficult,regulator target,CD2AP,FAM83G,regulates


In [179]:
all_df = all_net_drug.merge(net_partners, right_on = 'Partner', left_on = 'Gene', how = 'right')

In [181]:
drug_weight = all_df[['Gene_x', 'Drug']]
drug_weight.columns = ['Source', 'Target']
drug_weight['relation'] = 'Drug Target'
drug_weight['Source_level'] = 'Partner'
drug_weight['Target_level'] = 'Drug'

net_weight = all_df[['Regulator', 'Target', 'GOI_role', 'Regulator_level', 'Target_level']]
net_weight.columns = ['Source', 'Target', 'relation', 'Source_level', 'Target_level']

In [182]:
cyto = pd.concat([net_weight, drug_weight])

In [184]:
cyto.drop_duplicates().to_csv('cyto_edges.csv', index = None)

In [78]:
all_net_drug.merge(net_partners, right_on = 'Partner', left_on = 'Gene').to_csv('network_partner_genes_network_drugs.csv', index = None)