In [1]:
import indra

In [73]:
from indra.sources import trips
from indra.assemblers.pysb import PysbAssembler
from indra.sources import bel
from indra.assemblers.sif import SifAssembler
from indra.assemblers.cag.assembler import CAGAssembler
from indra.assemblers.sbgn.assembler import SBGNAssembler
from indra.assemblers.indranet.assembler import IndraNetAssembler
from indra.assemblers.pybel.assembler import PybelAssembler

* Additional hypotheses beyond CARNIVAL
    * Priming hypothesis
    * MaxEnt hypothesis
    * InvCARNIVAL (no perturbations)
    * CARNIVAL (with perturbations) 
* Developing strategies for actionable interventions of CARNIVAL output
   * Minimal change 
   * Identifying druggable targets of the CARNIVAL outputs
   * Adverse effects of druggable targets
*	Identifying additional sources of knowledge that can be used to identify viral-host protein interactions as inputs to CARNIVAL
*	Generating evidence and visualizations that support the CARNIVAL outputs using dialogue.bio and pathwaymap.indra.bio
*	Curating the CARNIVAL output to add potential confounders
*	Curating the CARNIVAL output to discover feedback loops
*	Identifying and curating the molecular signatures of the CARNIVAL inputs
*	Analyzing similarities and differences in CARNIVAL outputs as a function of time
*	Analyzing similarities and differences in CARNIVAL outputs as a function of viral strain
* Extract molecular signature for double-circle nodes from Progeny/Dorothea

January goal:
* Demonstrate we can interrogate a model of Influenza pathogenesis using PNNL Pan-viral multiomics datasets.
    * Examples of types of questions one can ask of the model
        * What is the effect of the treatment on the treated?
        * What are the necessary causes of an effect?
        * What are sufficient causes of an effect?
        * What interventions could restore homeostasis?
    * Hypotheses that would generate different models:
        * Priming hypothesis
        * MaxEnt hypothesis
        * InvCARNIVAL (no perturbations)
        * CARNIVAL (with perturbations) 
    * How does this compare with what is known?
        * Is this surprising?
            * If so, is it wrong?
            * If not, is it new?
    * What are limitations of model and how we would handle these threats to validity?
        * Unobserved confounders
        * Feedback loops
        * Missing data
        * Selection bias
        * Transportability
    * What experiments can improve the predictions or reduce uncertainty in the specification of the model?
    * Suggestions that narrow down experimental space
    * Proof that experiments reduce space
    * Actionable suggestions that improve output.
    * Confidence that additional experiments improve process
* Data comparisons that could answer interesting questions:
    * CARNIVAL output with and without known interactions
    * diff between virus and mock
    * diff between high-risk and low-risk virus
    * diff between early and late infection
* Ways to compare graphs--talk to Joseph
    
    
    

# Convert CARNIVAL output SIF into INDRA using REACH

In [3]:

with open('bob.txt') as bob:
    statements = bob.read().replace('\n', '. ')
    
    trips_processor = trips.process_text(statements)
        # Collect extracted mechanisms in PysbAssembler
        

In [26]:
assemblers = dict(
    sif = SifAssembler( trips_processor.statements ),
    cag = CAGAssembler(  trips_processor.statements ),
    sbgn = SBGNAssembler(  trips_processor.statements ),
    indranet = IndraNetAssembler(  trips_processor.statements ),
    pybel = PybelAssembler( trips_processor.statements ),
    pysb = PysbAssembler(trips_processor.statements)

)

model = {}
for a in assemblers:
    model[a] = assemblers[a].make_model()


INFO: [2020-11-18 14:10:09] indra.assemblers.pysb.assembler - Using default assembly policy.


In [28]:

print(assemblers['sif'].print_boolean_net())

MAPK14 = False
STAT3 = False
ATF2 = False
AR = False
HBP1 = False
MEF2B = False
MAPK3 = False
SMAD2 = False
PARP1 = False
SREBF1 = False
AKT1 = False
MAP3K1 = False
MTA1 = False
BCL11A = False
ZEB2 = False
E2F4 = False
MMP9 = False
ATF3 = False
ZEB1 = False
NR4A1 = False
NR0B2 = False
NR5A2 = False
NR2F2 = False
ADCYAP1 = False
FHL5 = False
CREM = False
PRKACA = False
IRF3 = False
FOS = False
RARA = False
CD44 = False
TFAP2A = False
E2F1 = False
MYCN = False
TP73 = False
EGR1 = False
MEF2C = False
MAP2K4 = False
MAPK11 = False
PRKCD = False
RB1 = False
SP1 = False
SP3 = False
GSK3A = False
TCF4 = False
MEF2A = False
MAX = False
USF1 = False
AURKB = False
PRDM1 = False
ZBED1 = False
YAP1 = False
TEAD1 = False
KLF5 = False
MNT = False
TFAP4 = False
POU2AF1 = False
POU2F2 = False
HMGB1 = False
HOXD9 = False
MEIS1 = False
NANOG = False
USF2 = False
CDKL5 = False
PBX2 = False
TAF1 = False
YY1 = False
FBXO22 = False
BACH1 = False
PAX5 = False
LIMS1 = False
POU5F1 = False
KDM5B = False
KLF6 =

In [82]:
with open('VN1203_at_7h.sbgn','wb') as out:
    sbgn = assemblers['sbgn'].print_model()
    out.write(sbgn)
    

In [None]:
assemblers['sbgn'].print_model

In [30]:
model = assemblers['cag'].print_tsv(file_name='vn1203_7h.causal.tsv')

assemblers['cag'].make_model()

<networkx.classes.multidigraph.MultiDiGraph at 0x7fc1b62b2910>

In [35]:
!cat bob.boolean.net




In [29]:
for u, v, data in assemblers['pybel'].make_model().edges( data=True):
    print(f'{u} {data["relation"]} {v}' )

p(HGNC:6876 ! MAPK14) increases p(HGNC:11364 ! STAT3)
p(HGNC:6876 ! MAPK14) increases p(HGNC:11364 ! STAT3)
p(HGNC:6876 ! MAPK14) increases p(HGNC:784 ! ATF2)
p(HGNC:6876 ! MAPK14) increases p(HGNC:784 ! ATF2)
p(HGNC:6876 ! MAPK14) increases p(HGNC:644 ! AR)
p(HGNC:6876 ! MAPK14) increases p(HGNC:644 ! AR)
p(HGNC:6876 ! MAPK14) increases p(HGNC:23200 ! HBP1)
p(HGNC:6876 ! MAPK14) increases p(HGNC:23200 ! HBP1)
p(HGNC:6876 ! MAPK14) increases p(HGNC:6995 ! MEF2B)
p(HGNC:6876 ! MAPK14) increases p(HGNC:6995 ! MEF2B)
p(HGNC:6876 ! MAPK14) decreases p(HGNC:9884 ! RB1)
p(HGNC:6876 ! MAPK14) decreases p(HGNC:9884 ! RB1)
p(HGNC:11364 ! STAT3) increases p(HGNC:7176 ! MMP9)
p(HGNC:11364 ! STAT3) increases p(HGNC:7176 ! MMP9)
p(HGNC:784 ! ATF2) increases p(HGNC:785 ! ATF3)
p(HGNC:784 ! ATF2) increases p(HGNC:785 ! ATF3)
p(HGNC:6877 ! MAPK3) increases p(HGNC:6768 ! SMAD2)
p(HGNC:6877 ! MAPK3) increases p(HGNC:6768 ! SMAD2)
p(HGNC:6877 ! MAPK3) increases p(HGNC:11364 ! STAT3)
p(HGNC:6877 ! MAPK3) 

In [17]:
sorted(node.as_bel() for node in belgraph)

['p(HGNC:11094 ! SNAI2)',
 'p(HGNC:11205 ! SP1)',
 'p(HGNC:11208 ! SP3)',
 'p(HGNC:11289 ! SREBF1)',
 'p(HGNC:11364 ! STAT3)',
 'p(HGNC:11390 ! AURKB)',
 'p(HGNC:11411 ! CDKL5)',
 'p(HGNC:11535 ! TAF1)',
 'p(HGNC:11634 ! TCF4)',
 'p(HGNC:11642 ! ZEB1)',
 'p(HGNC:11714 ! TEAD1)',
 'p(HGNC:11742 ! TFAP2A)',
 'p(HGNC:11745 ! TFAP4)',
 'p(HGNC:12003 ! TP73)',
 'p(HGNC:12593 ! USF1)',
 'p(HGNC:12594 ! USF2)',
 'p(HGNC:12856 ! YY1)',
 'p(HGNC:13221 ! BCL11A)',
 'p(HGNC:13593 ! FBXO22)',
 'p(HGNC:14881 ! ZEB2)',
 'p(HGNC:16262 ! YAP1)',
 'p(HGNC:1681 ! CD44)',
 'p(HGNC:17371 ! FHL5)',
 'p(HGNC:18039 ! KDM5B)',
 'p(HGNC:20857 ! NANOG)',
 'p(HGNC:2235 ! KLF6)',
 'p(HGNC:23200 ! HBP1)',
 'p(HGNC:2352 ! CREM)',
 'p(HGNC:241 ! ADCYAP1)',
 'p(HGNC:24605 ! ZNF521)',
 'p(HGNC:270 ! PARP1)',
 'p(HGNC:3113 ! E2F1)',
 'p(HGNC:3118 ! E2F4)',
 'p(HGNC:3126 ! EBF1)',
 'p(HGNC:3238 ! EGR1)',
 'p(HGNC:3796 ! FOS)',
 'p(HGNC:391 ! AKT1)',
 'p(HGNC:447 ! ZBED1)',
 'p(HGNC:4616 ! GSK3A)',
 'p(HGNC:4983 ! HMGB1)

In [40]:
!grep  Perturbation bob.txt |wc -l

      12


## Output statements from REACH

It appears that at least one non-perturbation line did not get parsed

In [22]:
single_statement = trips.process_text('active PRKACA inhibits Active SREBF1. active MAPK14 activates active STAT3')
single_statement.statements

[Activation(MAPK14(activity), STAT3(activity)),
 Inhibition(PRKACA(activity), SREBF1(activity))]

In [25]:
pb = PybelAssembler( single_statement.statements ).make_model()
sorted(node.as_bel() for node in pb)

['p(HGNC:11289 ! SREBF1)',
 'p(HGNC:11364 ! STAT3)',
 'p(HGNC:6876 ! MAPK14)',
 'p(HGNC:9380 ! PRKACA)']

In [27]:
pb.summarize()

indra v68774096-f5af-4ce6-82d8-83e9f6773a2f
Number of Nodes: 4
Number of Edges: 2
Number of Citations: 1
Number of Authors: 0
Network Density: 1.67E-01
Number of Components: 2


In [26]:
from pybel.io.jupyter import to_jupyter
to_jupyter(pb)

<IPython.core.display.Javascript object>

In [42]:
pa.make_model??

# Omnipath data

In [68]:
import pandas as pd
omnipath = pd.read_csv('OmnipathSignedDirectedInteractions.csv').drop_duplicates()#,index_col=['source_genesymbol','target_genesymbol'])
omnipath

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,dip_url,sources,references,curation_effort,n_references,n_resources,type
0,P0DP24,P48995,CALM2,TRPC1,1,0,1,1,0,1,,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,3,3,1,ppi
1,Q03135,P48995,CAV1,TRPC1,1,1,0,1,1,0,http://dip.doe-mbi.ucla.edu/dip/DIPview.cgi?IK...,DIP;HPRD;IntAct;Lit-BM-17;TRIP,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,13,8,5,ppi
2,P14416,P48995,DRD2,TRPC1,1,1,0,1,1,0,,TRIP,TRIP:18261457,1,1,1,ppi
3,Q02790,P48995,FKBP4,TRPC1,1,0,1,1,0,1,,TRIP,TRIP:15199065;TRIP:19945390;TRIP:23228564,3,3,1,ppi
4,Q99750,P48995,MDFI,TRPC1,1,0,1,1,0,1,,HPRD;TRIP,HPRD:14530267;TRIP:14530267;TRIP:23770672,3,2,2,ppi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60860,O15264,Q96BA8,MAPK13,CREB3L1,1,1,0,1,1,0,,KEGG-MEDICUS,,0,0,1,ppi
60861,COMPLEX:P49023_Q05397_Q13418,COMPLEX:O43707_P18206_Q9Y4G6,ILK_PTK2_PXN,ACTN4_TLN2_VCL,1,1,0,1,1,0,,KEGG-MEDICUS,,0,0,1,ppi
60862,COMPLEX:P29466_Q96P20_Q9ULZ3,P01584,CASP1_NLRP3_PYCARD,IL1B,1,1,0,1,1,0,,KEGG-MEDICUS,,0,0,1,ppi
67443,P05129,P00533,PRKCG,EGFR,1,0,1,1,0,1,,ACSN;HPRD_MIMP;MIMP;PhosphoSite_MIMP;phosphoEL...,ACSN:11313945;ACSN:11447289;ACSN:11940581;ACSN...,14,14,2,ppi


In [64]:
vn1203_7h = pd.read_excel('VN1203_at_7hr_CARNIVAL.xlsx',
                          sheet_name='RegulatoryInteractions')#.set_index(['Source','Target'])

vn1203_7h

Unnamed: 0,Source,Relationship,Target
0,PRKACA,inhibits,SREBF1
1,MAPK14,activates,STAT3
2,MAPK14,activates,ATF2
3,MAPK14,activates,AR
4,MAPK14,activates,HBP1
...,...,...,...
110,Perturbation,activates,LIMS1
111,Perturbation,activates,LYL1
112,Perturbation,activates,POU2AF1
113,Perturbation,activates,SP2


In [72]:
vn1203_omnipath = pd.merge(vn1203_7h, omnipath, how='left',  left_on=['Source','Target'], 
         right_on=['source_genesymbol','target_genesymbol'])


vn1203_omnipath.to_excel('vn1203_omnipath.xlsx')
vn1203_omnipath

Unnamed: 0,Source,Relationship,Target,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,dip_url,sources,references,curation_effort,n_references,n_resources,type
0,PRKACA,inhibits,SREBF1,P17612,P36956,PRKACA,SREBF1,1.0,0.0,1.0,1.0,0.0,1.0,,MIMP;PhosphoSite;PhosphoSite_MIMP;PhosphoSite_...,PhosphoSite:16381800;ProtMapper:16381800;SIGNO...,4.0,1.0,5.0,ppi
1,MAPK14,activates,STAT3,Q16539,P40763,MAPK14,STAT3,1.0,1.0,0.0,1.0,1.0,0.0,,HPRD_MIMP;KEA;MIMP;NCI-PID_ProtMapper;NetworKI...,KEA:17570479;ProtMapper:10201984;ProtMapper:17...,10.0,5.0,7.0,ppi
2,MAPK14,activates,ATF2,Q16539,P15336,MAPK14,ATF2,1.0,1.0,0.0,1.0,1.0,0.0,,ACSN;BEL-Large-Corpus_ProtMapper;BioGRID;CA1;C...,ACSN:11274345;ACSN:15140942;ACSN:15187187;ACSN...,99.0,58.0,20.0,ppi
3,MAPK14,activates,AR,Q16539,P10275,MAPK14,AR,1.0,1.0,0.0,1.0,1.0,0.0,,HPRD_MIMP;MIMP;NCI-PID_ProtMapper;PhosphoSite;...,PhosphoSite:16282370;ProtMapper:16282370;ProtM...,4.0,3.0,4.0,ppi
4,MAPK14,activates,HBP1,Q16539,O60381,MAPK14,HBP1,1.0,1.0,0.0,1.0,1.0,0.0,,MIMP;PhosphoSite;PhosphoSite_MIMP;PhosphoSite_...,PhosphoSite:14612426;ProtMapper:14612426;SIGNO...,4.0,1.0,5.0,ppi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,Perturbation,activates,LIMS1,,,,,,,,,,,,,,,,,
111,Perturbation,activates,LYL1,,,,,,,,,,,,,,,,,
112,Perturbation,activates,POU2AF1,,,,,,,,,,,,,,,,,
113,Perturbation,activates,SP2,,,,,,,,,,,,,,,,,


In [67]:
omnipath[(omnipath['source_genesymbol'] =='MAPK14') & (omnipath['target_genesymbol']== 'STAT3')]

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,dip_url,sources,references,curation_effort,n_references,n_resources,type
3926,Q16539,P40763,MAPK14,STAT3,1,1,0,1,1,0,,HPRD_MIMP;KEA;MIMP;NCI-PID_ProtMapper;NetworKI...,KEA:17570479;ProtMapper:10201984;ProtMapper:17...,10,5,7,ppi
21847,Q16539,P40763,MAPK14,STAT3,1,1,0,1,1,0,,HPRD_MIMP;KEA;MIMP;NCI-PID_ProtMapper;NetworKI...,KEA:17570479;ProtMapper:10201984;ProtMapper:17...,10,5,7,ppi
61703,Q16539,P40763,MAPK14,STAT3,1,1,0,1,1,0,,HPRD_MIMP;KEA;MIMP;NCI-PID_ProtMapper;NetworKI...,KEA:17570479;ProtMapper:10201984;ProtMapper:17...,10,5,7,ppi


In [60]:
omnipath_idx = omnipath.set_index(['source_genesymbol','target_genesymbol'])
omnipath_idx.index = tuple(omnipath_idx.index)
omnipath_idx

Unnamed: 0,Unnamed: 1,source,target,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,dip_url,sources,references,curation_effort,n_references,n_resources,type
CALM2,TRPC1,P0DP24,P48995,1,0,1,1,0,1,,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,3,3,1,ppi
CAV1,TRPC1,Q03135,P48995,1,1,0,1,1,0,http://dip.doe-mbi.ucla.edu/dip/DIPview.cgi?IK...,DIP;HPRD;IntAct;Lit-BM-17;TRIP,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,13,8,5,ppi
DRD2,TRPC1,P14416,P48995,1,1,0,1,1,0,,TRIP,TRIP:18261457,1,1,1,ppi
FKBP4,TRPC1,Q02790,P48995,1,0,1,1,0,1,,TRIP,TRIP:15199065;TRIP:19945390;TRIP:23228564,3,3,1,ppi
MDFI,TRPC1,Q99750,P48995,1,0,1,1,0,1,,HPRD;TRIP,HPRD:14530267;TRIP:14530267;TRIP:23770672,3,2,2,ppi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ANP32B,TNFRSF17,Q92688,Q02223,1,1,0,1,1,0,,Kirouac2010,,0,0,1,ppi
CCL13,CCR10,Q99616,P46092,1,1,0,1,1,0,,EMBRACE;Wang,,0,0,2,ppi
CCL13,CCR4,Q99616,P51679,1,1,0,1,1,0,,EMBRACE;Wang,,0,0,2,ppi
INHBE,ACVR1,P58166,Q04771,1,1,0,1,1,0,,EMBRACE;Wang;iTALK,,0,0,3,ppi
