# Workflow 1, Module 3

Given a disease, first find chemical agents that make the disease worse. Then look for genes that increase the amount of or response to the toxicant, and try to reduce the amount or action of those genes.

In [1]:
input_disease = 'MONDO:0005148'  #Type 2 diabetes

## Functions

In [2]:
import requests
import pandas as pd

robokop_server = 'robokop.renci.org'

def quick(question):
    url=f'http://{robokop_server}:80/api/simple/quick/'
    response = requests.post(url,json=question)
    print( f"Return Status: {response.status_code}" )
    if response.status_code == 200:
        return response.json()
    return response

In [9]:
def make_N_step_question(types,curies,props,forwards):
    question = {
                'machine_question': {
                    'nodes': [],
                    'edges': []
                }
            }
    for i,t in enumerate(types):
        newnode = {'id': f'n{i}', 'type': t}
        if curies[i] is not None:
            newnode['curie'] = curies[i]
        question['machine_question']['nodes'].append(newnode)
        if i > 0:
            if forwards[i-1]:
                edge = {'id': f'e{i}', 'source_id': f'n{i-1}', 'target_id': f'n{i}'}
            else:
                edge = {'id': f'e{i}', 'source_id': f'n{i}', 'target_id': f'n{i-1}'}
            if props[i-1] is not None:
                edge['type'] = props[i-1]
            question['machine_question']['edges'].append( edge )
    return question

In [4]:
def merge_answers(answerdict):
    """Take a dictionary of answer lists and return a single sorted answer list."""
    final_answer = {'misc_info': {'natural_question': None, 'num_total_paths': 0}, 'answers': []}
    for k,v in answerdict.items():
        if 'filename' in v: 
            final_answer['filename'] = v['filename']
        if 'timestamp' in v:
            final_answer['timestamp'] = v['timestamp']
        if 'misc_info' in v:
            final_answer['misc_info']['num_total_paths'] += v['misc_info']['num_total_paths']
        if 'answers' in v:
            final_answer['answers'].extend(v['answers'])
    final_answer['answers'].sort(key = lambda a: -a['score'])

    return final_answer

In [18]:
import pandas as pd
import json
def answers2frame(graph_answers,properties_a,properties_b):
    answers = []
    for graph_answer in graph_answers['answers']:
        try:
            #The return nodes don't necessarily come out in order. We can recognize the disease and the gene
            # by type, but we have to get the original toxicant by finding which node has the correct id.
            for edge in graph_answer['edges']:
                if edge['type']=='contributes_to':
                    toxicant_id = edge['source_id']
                elif edge['type'] in properties_a:
                    gene_tox = edge
                elif edge['type'] in properties_b:
                    chem_gene = edge
            for node in graph_answer['nodes']:
                if node['type'] == 'gene':
                    gene = node
                elif node['type'] == 'chemical_substance':
                    if node['id'] == toxicant_id:
                        toxicant=node
                    else:
                        drug=node
            ans = { 'score': graph_answer['score'],
                    'toxicant': toxicant['name'] if 'name' in toxicant else toxicant['id'],
                    'gene': gene['name'],
                    'chemical': drug['name'] if 'name' in drug else drug['id'],
                    'gene->toxicant': gene_tox['type'],
                    'chem->gene': chem_gene['type']}
        except IndexError:
            #this happens if chemical and toxicant are the same thing
            continue
        except UnboundLocalError:
            #this happens if chemical and toxicant are the same thing
            continue
        answers.append(ans)
    df = pd.DataFrame(answers)
    ordered_columns = ['score','chemical','chem->gene','gene','gene->toxicant','toxicant']
    df = df[ordered_columns]
    return df

## Set up the property lists

There are several properties at each step that are acceptable for our purposes. 

In [7]:
properties_a=['increases_synthesis_of',
              'increases_abundance_of',
              'increases_response_to',
              'decreases_updake_of']
properties_b=['decreases_activity_of',
              'decreases_expression_of',
              'increases_degradation_of',
              'decreases_stability_of',
              'decreases_synthesis_of',
              'decreases_secretion_of']
types = ['disease',
        'chemical_substance',
        'gene',
        'chemical_substance']
curies = [input_disease,None,None,None]
forwards = [False,False,False]

In [10]:
single_q = make_N_step_question(types,curies,['contributes_to',properties_a,properties_b],forwards)
single_answer = quick(single_q)

Return Status: 200


In [19]:

df = answers2frame(single_answer,properties_a,properties_b)
from IPython.display import display
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,score,chemical,chem->gene,gene,gene->toxicant,toxicant
0,1.463079,cholesterol,decreases_expression_of,LDLR,increases_response_to,zinc atom
1,1.397945,cholesterol,decreases_expression_of,LDLR,increases_response_to,pravastatin
2,1.371962,cholesterol,decreases_expression_of,LDLR,increases_response_to,Dietary Fats
3,1.352535,glucose,decreases_expression_of,NOS3,increases_response_to,Dietary Fats
4,1.342986,glucose,decreases_expression_of,SOD2,increases_response_to,L-ascorbic acid
5,1.327458,glucose,decreases_expression_of,SIRT1,increases_response_to,Dietary Fats
6,1.310164,metformin,decreases_activity_of,TNF,increases_synthesis_of,glutathione
7,1.305033,cholesterol,decreases_activity_of,PON1,increases_response_to,pesticide
8,1.304226,glucose,decreases_expression_of,PPARA,increases_response_to,perfluorooctanoic acid
9,1.297882,glucose,decreases_activity_of,NR1I2,increases_response_to,bisphenol A


Both glucose and cholesterol also show up in the chemical column, which is meant to be the potential treatment!   This seems counterintuitive, but what we are seeing is that the query is too simple.  The question posits a linear relationship: there is a toxicant, the action of a gene gets rid of the toxicant, and the treatment makes the gene do this more. 

But actual biology is not so simple!  Specifically, feedback loops substantially complicate the picture.  If a gene is involved in cleaning up a toxicant, then the presence of that toxicant usually stimulates the gene.  So it's no suprise to see glucose on the treatment side, as it does tend to stimulate the metabolic processes that are disregulated in diabetes.

We don't have the ability to model things so directly, but we can at least filter out rows where the chemical has a known causal effect on diabetes.  We'd also like to flag answers that are already known diabetes treatments.

In [20]:
#Get chemicals that contribute to the disease
cq=make_N_step_question(['disease','chemical_substance'],[input_disease,None],['contributes_to'],[False])
aq=quick(cq)

Return Status: 200


In [21]:
contributors = []
for answer in aq['answers']:
    nodes = answer['nodes']
    for node in nodes:
        if node['type'] == 'chemical_substance':
            contributors.append(node)
df_contributors = pd.DataFrame(contributors)
df_contributors = df_contributors[['name']]
df_contributors['contributors']=True

In [22]:
#Get treatments for the disease
tq=make_N_step_question(['disease','chemical_substance'],[input_disease,None],['treats'],[False])
ta=quick(tq)
treatments = []
for answer in ta['answers']:
    nodes = answer['nodes']
    for node in nodes:
        if node['type'] == 'chemical_substance':
            treatments.append(node)
df_treatments = pd.DataFrame(treatments)
df_treatments = df_treatments[['name']]
df_treatments['treatments']=True

Return Status: 200


In [23]:
#Merge in the contributors and treatments.
dfx = pd.merge(df,df_contributors,left_on='chemical',right_on='name',how='left')
dfx = pd.merge(dfx,df_treatments,left_on='chemical',right_on='name',how='left')
dfx = dfx.sort_values(by='score',ascending=False)
dfx.fillna(False,inplace=True)
del dfx['name_x']
del dfx['name_y']

In [24]:
dfx[(dfx.contributors==True) & (dfx.treatments==True)].head()

Unnamed: 0,score,chemical,chem->gene,gene,gene->toxicant,toxicant,contributors,treatments
51,0.730609,glyburide,decreases_activity_of,ABCC1,increases_response_to,silver atom,True,True
53,0.717957,zinc atom,decreases_activity_of,SLC6A3,increases_response_to,manganese atom,True,True
70,0.628998,zinc atom,increases_degradation_of,APP,increases_response_to,copper atom,True,True
76,0.621766,zinc atom,increases_degradation_of,APP,increases_synthesis_of,superoxide,True,True
169,0.503401,chromium atom,decreases_activity_of,TNF,increases_synthesis_of,glutathione,True,True


In [29]:
dfx_no_contrib = dfx[ (dfx.contributors == False) | (dfx.treatments == True)]
moretox = ['cholesterol','lipid','cortisol','reactive oxygen species','hydrogen peroxide','fatty acid']
newdfx = dfx_no_contrib[ ~dfx_no_contrib.chemical.isin(moretox)]
newdfx

Unnamed: 0,score,chemical,chem->gene,gene,gene->toxicant,toxicant,contributors,treatments
6,1.310164,metformin,decreases_activity_of,TNF,increases_synthesis_of,glutathione,False,True
16,1.236173,metformin,decreases_activity_of,TNF,increases_response_to,chlorpromazine,False,True
22,1.042370,rosiglitazone,decreases_secretion_of,TNF,increases_synthesis_of,glutathione,False,True
23,1.026845,ATP,decreases_activity_of,AKT1,increases_response_to,glucose,False,False
25,0.994647,ethanol,decreases_expression_of,SIRT1,increases_response_to,glucose,False,False
26,0.961246,rosiglitazone,decreases_expression_of,CCL2,increases_response_to,arsane,False,True
27,0.956541,rosiglitazone,decreases_activity_of,GSK3B,increases_response_to,cadmium atom,False,True
28,0.952145,pioglitazone,decreases_activity_of,CYP3A4,increases_response_to,nefazodone,False,True
29,0.950431,rosiglitazone,decreases_expression_of,IL6,increases_response_to,N-nitrosodiethylamine,False,True
30,0.946279,rosiglitazone,decreases_secretion_of,TNF,increases_response_to,chlorpromazine,False,True


In [30]:
newdfx.groupby('chemical').nth(0).reset_index().sort_values(by='score',ascending=False)

Unnamed: 0,chemical,chem->gene,contributors,gene,gene->toxicant,score,toxicant,treatments
49,metformin,decreases_activity_of,False,TNF,increases_synthesis_of,1.310164,glutathione,True
65,rosiglitazone,decreases_secretion_of,False,TNF,increases_synthesis_of,1.042370,glutathione,True
5,ATP,decreases_activity_of,False,AKT1,increases_response_to,1.026845,glucose,False
37,ethanol,decreases_expression_of,False,SIRT1,increases_response_to,0.994647,glucose,False
59,pioglitazone,decreases_activity_of,False,CYP3A4,increases_response_to,0.952145,nefazodone,True
47,lipopolysaccharide,decreases_expression_of,False,SIRT1,increases_response_to,0.740294,glucose,False
44,glyburide,decreases_activity_of,True,ABCC1,increases_response_to,0.730609,silver atom,True
74,troglitazone,decreases_expression_of,False,TNF,increases_synthesis_of,0.724362,glutathione,True
77,zinc atom,decreases_activity_of,True,SLC6A3,increases_response_to,0.717957,manganese atom,True
62,protoporphyrin,increases_degradation_of,False,HIF1A,increases_response_to,0.672333,iron atom,False


Notes on many of these results are in the 3a notebook.

protoporphyrin: part of heme: http://diabetes.diabetesjournals.org/content/57/6/1526 "Treatment of Obese Diabetic Mice With a Heme Oxygenase Inducer Reduces Visceral and Subcutaneous Adiposity, Increases Adiponectin Levels, and Improves Insulin Sensitivity and Glucose Tolerance"

wortmannin: steroid PI3K inhibitor "Wortmannin, a PI3-Kinase Inhibitor: Promoting Effect on Insulin Secretion from Pancreatic β Cells through a cAMP-Dependent Pathway" https://www.sciencedirect.com/science/article/pii/S0006291X00925142

LY294002: PI3K inhibitor also. PI3K inhibitors tend to cause hyperglycemia

trans-resveratrol: antioxidant (the bioactive version is the trans molecule) "Antidiabetic Effects of Resveratrol: The Way Forward in Its Clinical Utility"  https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5165160/

nicotinic acid: niacin.  used to treat high cholesterol, and there's a lot of crosstalk there with diabetes, eg: http://www.jlr.org/content/58/1/31.short


### Rare disease version

The above was using the identifier for the common-disease type 2 diabetes.  Here, we try the same analysis, but using rare-disease versions of diabetes.  This list is found by looking for ontological descendants of diabetes that are also rare genetic disorders.  (See notebook WF1Mod1_Ontology.ipynb)

In [31]:
rare_diseases = ['MONDO:0014785',
'MONDO:0011986',
'MONDO:0012819',
'MONDO:0012513',
'MONDO:0014458',
'MONDO:0012970',
'MONDO:0013478',
'MONDO:0013242',
'MONDO:0009575',
'MONDO:0016391',
'MONDO:0010802',
'MONDO:0016422',
'MONDO:0012971',
'MONDO:0011955',
'MONDO:0015308',
'MONDO:0014991',
'MONDO:0009517',
'MONDO:0013647',
'MONDO:0011363',
'MONDO:0014523',
'MONDO:0011386',
'MONDO:0017230',
'MONDO:0008763',
'MONDO:0012966',
'MONDO:0012436',
'MONDO:0012348',
'MONDO:0012520',
'MONDO:0007455',
'MONDO:0008812',
'MONDO:0020569',
'MONDO:0018105',
'MONDO:0012818',
'MONDO:0007669',
'MONDO:0014686',
'MONDO:0007452',
'MONDO:0013225',
'MONDO:0007453',
'MONDO:0012480',
'MONDO:0009874',
'MONDO:0013240',
'MONDO:0010020',
'MONDO:0012962',
'MONDO:0012923',
'MONDO:0011502',
'MONDO:0010800',
'MONDO:0014589',
'MONDO:0012963',
'MONDO:0008696',
'MONDO:0011027',
'MONDO:0011273',
'MONDO:0013673',
'MONDO:0018581',
'MONDO:0018911',
'MONDO:0011073',
'MONDO:0010773',
'MONDO:0011667',
'MONDO:0014488',
'MONDO:0011668',
'MONDO:0012192',
'MONDO:0020525',
'MONDO:0019207',
'MONDO:0010785',
'MONDO:0009192',
'MONDO:0018320',
'MONDO:0015967',
'MONDO:0014674',
'MONDO:0000208',
'MONDO:0018625',
'MONDO:0008185',
'MONDO:0009419',
'MONDO:0010894',
'MONDO:0011643',
'MONDO:0012071',
'MONDO:0009101',
'MONDO:0000065',
'MONDO:0018575',
'MONDO:0012969',
'MONDO:0014497',
'MONDO:0018629',
'MONDO:0018883',
'MONDO:0012522',
'MONDO:0010026',
'MONDO:0011072',
'MONDO:0008491']

In [32]:
outputs = {}
for rare_id in rare_diseases:
    rarecuries = [rare_id,None,None,None]
    single_disease_question = make_N_step_question(types,rarecuries,['contributes_to',properties_a,properties_b],forwards)
    rare_answer = quick(single_disease_question)
    if rare_answer == 'No results found':
        npaths = 0
    else:
        npaths = rare_answer['misc_info']['num_total_paths']
    print(rare_id,npaths)
    if npaths > 0:
        outputs[rare_id] = rare_answer

Return Status: 200
MONDO:0014785 0
Return Status: 200
MONDO:0011986 0
Return Status: 200
MONDO:0012819 250
Return Status: 200
MONDO:0012513 0
Return Status: 200
MONDO:0014458 0
Return Status: 200
MONDO:0012970 0
Return Status: 200
MONDO:0013478 0
Return Status: 200
MONDO:0013242 0
Return Status: 200
MONDO:0009575 0
Return Status: 200
MONDO:0016391 0
Return Status: 200
MONDO:0010802 0
Return Status: 200
MONDO:0016422 0
Return Status: 200
MONDO:0012971 0
Return Status: 200
MONDO:0011955 0
Return Status: 200
MONDO:0015308 0
Return Status: 200
MONDO:0014991 0
Return Status: 200
MONDO:0009517 0
Return Status: 200
MONDO:0013647 0
Return Status: 200
MONDO:0011363 0
Return Status: 200
MONDO:0014523 0
Return Status: 200
MONDO:0011386 0
Return Status: 200
MONDO:0017230 0
Return Status: 200
MONDO:0008763 0
Return Status: 200
MONDO:0012966 0
Return Status: 200
MONDO:0012436 0
Return Status: 200
MONDO:0012348 0
Return Status: 200
MONDO:0012520 0
Return Status: 200
MONDO:0007455 0
Return Status: 200

In [33]:
outputs.keys()

dict_keys(['MONDO:0012819', 'MONDO:0008491'])

In [34]:
rare_answers = merge_answers(outputs)
df = answers2frame(rare_answers,properties_a,properties_b)
dfg = df.groupby('chemical').nth(0).reset_index().sort_values(by='score',ascending=False)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(dfg)

Unnamed: 0,chemical,chem->gene,gene,gene->toxicant,score,toxicant
67,glucose,decreases_expression_of,PPARA,increases_response_to,0.903801,bezafibrate
93,olanzapine,decreases_activity_of,DRD2,increases_response_to,0.571318,clozapine
31,chlorpromazine,decreases_activity_of,DRD2,increases_response_to,0.449,clozapine
56,dopamine,decreases_activity_of,CYP2D6,increases_response_to,0.416764,risperidone
51,dexamethasone,decreases_expression_of,TNF,increases_response_to,0.402513,chlorpromazine
40,clozapine,decreases_activity_of,ABCB1,increases_response_to,0.393567,risperidone
117,simvastatin,decreases_expression_of,TNF,increases_response_to,0.392198,chlorpromazine
98,paroxetine,decreases_activity_of,CYP2D6,increases_response_to,0.390332,risperidone
41,cocaine,decreases_expression_of,DRD2,increases_response_to,0.388678,clozapine
133,troglitazone,decreases_expression_of,TNF,increases_response_to,0.385691,chlorpromazine


olanzapinem chlorpromazine,cloazpine: antipsychotics that apparently can lead to glucose dysregulation

paroxetine (paxil): SSRI inhibitor, https://www.sciencedaily.com/releases/2012/12/121214091616.htm "Antidepressant could do double duty as diabetes drug, study shows (2012).

enalapril: high blood pressure medication 