In [1]:
from BioLink.biolink_client import BioLinkWrapper
import pandas as pd
import cache_magic

# Workflow II Rare Disease Candidates

In [2]:
from Modules.Mod0_lookups import LookUp

# workflow input is a disease identifier
lu = LookUp()

Mod O DiseaseGeneLookup metadata:
{'data_type': 'disease',
 'input_type': {'complexity': 'single', 'id_type': ['MONDO', 'DO', 'OMIM']},
 'limit': None,
 'output_type': {'complexity': 'set', 'id_type': 'HGNC'},
 'predicate': 'blm:gene associated with condition',
 'source': 'Monarch Biolink',
 'taxon': 'human'}


In [3]:
input_disease = 'MONDO:0019600' # xeroderma pigmentosum  
input_object = {
    'input': input_disease,
    'parameters': {
        'taxon': 'human',
        'threshold': None,
    },
}

lu.load_input_object(input_object=input_object)
input_object = lu.input_object

{'description': 'Xeroderma pigmentosum (XP) is a rare genodermatosis '
                'characterized by extreme sensitivity to ultraviolet '
                '(UV)-induced changes in the skin and eyes, and multiple skin '
                'cancers. It is subdivided into 8 complementation groups, '
                'according to the affected gene: classical XP (XPA to XPG) and '
                'XP variant (XPV) (see these terms).',
 'id': 'MONDO:0019600',
 'label': 'xeroderma pigmentosum'}


In [4]:
# get genes associated with disease from Biolink
disease_associated_genes = lu.disease_geneset_lookup()
# create list of gene curies for downstream module input
%cache input_curie_set = disease_associated_genes[['hit_id', 'hit_symbol']].to_dict(orient='records')
# # show the 
disease_associated_genes['modules'] = 'Mod0'
disease_associated_genes
# genes to investigate TNF, FANCD2OS

Loading cached value for variable 'input_curie_set'. Time since caching: 3:09:17.658787


Unnamed: 0,input_id,input_symbol,hit_id,hit_symbol,sources,modules
0,MONDO:0019600,xeroderma pigmentosum,HGNC:12814,XPA,"omim, ctd, clinvar, orphane",Mod0
1,MONDO:0019600,xeroderma pigmentosum,HGNC:12816,XPC,"ctd, orphane, omim, ctd, clinvar, corie, clinvar",Mod0
2,MONDO:0019600,xeroderma pigmentosum,HGNC:2718,DDB2,"orphane, ctd, omim, clinvar, corie",Mod0
3,MONDO:0019600,xeroderma pigmentosum,HGNC:3434,ERCC2,"orphane, omim, ctd, clinvar, corie",Mod0
4,MONDO:0019600,xeroderma pigmentosum,HGNC:3435,ERCC3,"omim, ctd, clinvar, orphane",Mod0
5,MONDO:0019600,xeroderma pigmentosum,HGNC:3436,ERCC4,"ctd, omim, clinvar, orphane",Mod0
6,MONDO:0019600,xeroderma pigmentosum,HGNC:3437,ERCC5,"omim, ctd, corie, orphane",Mod0
7,MONDO:0019600,xeroderma pigmentosum,HGNC:43690,BIVM-ERCC5,clinvar,Mod0
8,MONDO:0019600,xeroderma pigmentosum,HGNC:9181,POLH,"ctd, ctd, omim, orphane, clinvar, corie",Mod0


# Mod1A Functional Similarity
## Find similar genes based on GO functional annotations using OntoBio Jaccard similarity

In [5]:
from Modules.Mod1A_functional_sim import FunctionalSimilarity

## Mod1A_human

In [6]:
# Module specification
mod1a_input_object_human = {
    'input': input_curie_set,
    'parameters': {
        'taxon': 'human',
        'threshold': 0.75,  # jaccard index threshold
    },
}
    
func_sim_human = FunctionalSimilarity()

Mod1A Functional Similarity metadata:
{'input_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'output_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'predicate': ['blm:macromolecular machine to biological process association',
               'macromolecular machine to molecular activity association'],
 'source': 'Monarch Biolink'}


In [7]:
func_sim_human.load_input_object(mod1a_input_object_human)
func_sim_human.load_gene_set() 

In [8]:
func_sim_human.load_associations()

In [9]:
mod1a_results = func_sim_human.compute_similarity()

In [31]:
Mod1A_results_human = pd.DataFrame(mod1a_results)
Mod1A_results_human = Mod1A_results_human[Mod1A_results_human['hit_symbol'] != Mod1A_results_human['input_symbol']].sort_values('score', ascending=False)
Mod1A_results_human['module'] = 'Mod1A'
Mod1A_results_human

Unnamed: 0,hit_id,hit_symbol,input_id,input_symbol,score,module
17,HGNC:21700,RAD9B,HGNC:3583,FANCB,0.92,Mod1A
21,HGNC:3586,FANCE,HGNC:3583,FANCB,0.916667,Mod1A
28,HGNC:3583,FANCB,HGNC:3586,FANCE,0.916667,Mod1A
15,HGNC:26171,FAAP100,HGNC:3583,FANCB,0.857143,Mod1A
22,HGNC:24994,INIP,HGNC:3583,FANCB,0.851852,Mod1A
26,HGNC:21700,RAD9B,HGNC:3586,FANCE,0.84,Mod1A
20,HGNC:28467,FAAP24,HGNC:3583,FANCB,0.827586,Mod1A
16,HGNC:25021,ASTE1,HGNC:3583,FANCB,0.793103,Mod1A
18,HGNC:22223,EEPD1,HGNC:3583,FANCB,0.785714,Mod1A
25,HGNC:26171,FAAP100,HGNC:3586,FANCE,0.785714,Mod1A


# MOD1B Phenotype Similarity
## Find similar genes based on OwlSim calculated Phenotype Similarity

## Mod1B Human

In [11]:
from Modules.Mod1B1_phenotype_similarity import PhenotypeSimilarity

In [12]:
# Module specification
mod1b_input_object_human = {
    'input': input_curie_set,
     'parameters': {
        'taxon': 'human',
        'threshold': .50,
    },
}


pheno_sim_human = PhenotypeSimilarity()

Mod1B Phenotype Similarity metadata:
{'input_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'output_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'predicate': ['blm:has phenotype'],
 'source': 'Monarch Biolink'}


In [13]:
pheno_sim_human.load_input_object(mod1b_input_object_human)
pheno_sim_human.load_gene_set()

In [14]:
pheno_sim_human.load_associations()

In [15]:
mod1b_results = pheno_sim_human.compute_similarity()

In [16]:
# compute phenotype similarity
Mod1B_results = pd.DataFrame(mod1b_results)
Mod1B_results = Mod1B_results[Mod1B_results['hit_symbol'] != Mod1B_results['input_symbol']].sort_values('score', ascending=False)
Mod1B_results['module'] = 'Mod1B'
Mod1B_results

Unnamed: 0,hit_id,hit_symbol,input_id,input_symbol,score,module
102,HGNC:12829,XRCC2,HGNC:23168,FANCM,1.000000,Mod1B
327,HGNC:23168,FANCM,HGNC:3587,FANCF,1.000000,Mod1B
38,HGNC:23168,FANCM,HGNC:12829,XRCC2,1.000000,Mod1B
348,HGNC:12829,XRCC2,HGNC:3588,FANCG,1.000000,Mod1B
347,HGNC:23168,FANCM,HGNC:3588,FANCG,1.000000,Mod1B
94,HGNC:3587,FANCF,HGNC:23168,FANCM,1.000000,Mod1B
31,HGNC:3587,FANCF,HGNC:12829,XRCC2,1.000000,Mod1B
103,HGNC:3588,FANCG,HGNC:23168,FANCM,1.000000,Mod1B
104,HGNC:25539,RFWD3,HGNC:23168,FANCM,1.000000,Mod1B
41,HGNC:25539,RFWD3,HGNC:12829,XRCC2,1.000000,Mod1B


# Mod1E Protein Interaction

## Mod1E Human

In [17]:
from Modules.Mod1E_interactions import GeneInteractions

In [18]:
# Module specification
mod1E_input_object_human = {
    'input': input_curie_set,
     'parameters': {
        'taxon': 'human',
        'threshold': None,
    },
}

interactions_human = GeneInteractions()

Mod1E Interaction Network metadata:
{'input_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'output_type': {'complexity': 'set', 'data_type': 'gene', 'id_type': 'HGNC'},
 'predicate': ['blm:interacts with'],
 'source': 'Monarch Biolink'}


In [19]:
interactions_human.load_input_object(mod1E_input_object_human)
interactions_human.load_gene_set()

In [20]:
mod1e_results = interactions_human.get_interactions()

In [21]:
Mod1E_results_human = pd.DataFrame(mod1e_results)

In [22]:
counts = Mod1E_results_human['hit_symbol'].value_counts().rename_axis('unique_values').to_frame('counts').reset_index()
high_counts = counts[counts['counts'] > 12]['unique_values'].tolist()

In [23]:
Mod1E_results_final = pd.DataFrame(Mod1E_results_human[Mod1E_results_human['hit_symbol'].isin(high_counts)])

In [24]:
Mod1E_results_final['module'] = 'Mod1E'

In [25]:
Mod1E_results_final.head()

Unnamed: 0,hit_id,hit_symbol,input_id,input_symbol,score,module
13,HGNC:10290,RPA2,HGNC:1100,BRCA1,0,Mod1E
60,HGNC:1925,CHEK1,HGNC:1100,BRCA1,0,Mod1E
70,HGNC:3584,FANCC,HGNC:1100,BRCA1,0,Mod1E
92,HGNC:25764,RMI1,HGNC:1100,BRCA1,0,Mod1E
107,HGNC:10289,RPA1,HGNC:1101,BRCA2,0,Mod1E


In [32]:
all_results = pd.concat([Mod1A_results_human.head(40), Mod1B_results.head(40)])

In [33]:
from Modules.StandardOutput import StandardOutput

In [34]:
so = StandardOutput(results=all_results.to_dict(orient='records'), input_object=input_object)

In [35]:
std_api_response_json = so.output_object

In [36]:
import requests

# get the URL for these results displayed in the RTX UI
RTX_UI_REQUEST_URL = "https://rtx.ncats.io/devED/api/rtx/v1/response/process"
to_post = {"options": ["Store", "ReturnResponseId"], "responses": [std_api_response_json]}
ui_url = requests.post(RTX_UI_REQUEST_URL, json=to_post)
print("Please visit the following website: https://rtx.ncats.io/?r=%s" % ui_url.json()['response_id'])

Please visit the following website: https://rtx.ncats.io/?r=572
