# Building a boolean network from a knowledge graph

Use SigNOR

Given a list of genes for which we want to construct a BN:

1. Find the Steiner subgraph for the given list of genes.
2. For each node in the subgraph, find all if its input nodes (i.e. all edges leading into that node).
3. For each such node, there is an activating relation if the edge is "up-regulates", and a repressing relation if the edge is "down-regulates".
4. Combine all of the edges with a "joiner function" - AND, OR, repressor wins, etc.

Or, given an existing model (with some number of genes), a list of nodes, and a knowledge graph, how do we incorporate the new nodes into the model?


In [2]:
from BNMPy import build_bn_from_kg

In [2]:
bn_string, relations = build_bn_from_kg.load_signor_network(['TNFAIP3', 'CCND1', 'MACF1'])

['UNIPROT::P21580', 'UNIPROT::P24385', 'UNIPROT::Q9UPN3']
['UNIPROT::P21580', 'UNIPROT::P24385', 'UNIPROT::Q9UPN3']


In [3]:
print(bn_string)

ITCH = ITCH
TNFAIP3 = (ITCH)
CCND1 = (! GSK3B) & (GLI1)
GSK3B = (GSK3B) & (! MACF1)
MACF1 = (! GSK3B)
GLI1 = (! ITCH) & (GLI1)


In [4]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'], joiner='&')
print(bn_string)

['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
GNAS = GNAS
MAPK3 = (MAPK3)
IKBKB = (! IKBKB) & (SRC)
MYC = (MAPK3) & (! CEBPA) & (! SMAD4)
CEBPA = (! MAPK3) & (CEBPA) & (! FZD2)
SRC = (GNAS) & (! SRC)
TP53 = (MAPK3)
FZD2 = (RNF43)
SMAD4 = (MAPK3)
KRAS = (SRC)
CDKN2A = (! IKBKB) & (! MYC)
RNF43 = RNF43


In [5]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'], joiner='|')
print(bn_string)

['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
GNAS = GNAS
MAPK3 = (MAPK3)
IKBKB = (! IKBKB) | (SRC)
MYC = (MAPK3) | (! CEBPA) | (! SMAD4)
CEBPA = (! MAPK3) | (CEBPA) | (! FZD2)
SRC = (GNAS) | (! SRC)
TP53 = (MAPK3)
FZD2 = (RNF43)
SMAD4 = (MAPK3)
KRAS = (SRC)
CDKN2A = (! IKBKB) | (! MYC)
RNF43 = RNF43


In [6]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'],
                                                            joiner='inhibitor_wins')
print(bn_string)

['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
['UNIPROT::P01116', 'UNIPROT::P63092', 'UNIPROT::P04637', 'UNIPROT::Q13485', 'UNIPROT::P42771', 'UNIPROT::Q68DV7']
GNAS = GNAS
MAPK3 = MAPK3
IKBKB = (!IKBKB) & (SRC)
MYC = (!CEBPA & !SMAD4) & (MAPK3)
CEBPA = (!MAPK3 & !FZD2) & (CEBPA)
SRC = (!SRC) & (GNAS)
TP53 = MAPK3
FZD2 = RNF43
SMAD4 = MAPK3
KRAS = SRC
CDKN2A = !IKBKB & !MYC
RNF43 = RNF43


In [7]:
relations

[('MAPK3', 'MAPK3', 'activate'),
 ('IKBKB', 'IKBKB', 'inhibit'),
 ('SRC', 'IKBKB', 'activate'),
 ('MAPK3', 'MYC', 'activate'),
 ('CEBPA', 'MYC', 'inhibit'),
 ('SMAD4', 'MYC', 'inhibit'),
 ('MAPK3', 'CEBPA', 'inhibit'),
 ('CEBPA', 'CEBPA', 'activate'),
 ('FZD2', 'CEBPA', 'inhibit'),
 ('GNAS', 'SRC', 'activate'),
 ('SRC', 'SRC', 'inhibit'),
 ('MAPK3', 'TP53', 'activate'),
 ('RNF43', 'FZD2', 'activate'),
 ('MAPK3', 'SMAD4', 'activate'),
 ('SRC', 'KRAS', 'activate'),
 ('IKBKB', 'CDKN2A', 'inhibit'),
 ('MYC', 'CDKN2A', 'inhibit')]

## Combining KG knowledge graphs

In [15]:
# load the Vundavalli KG
from BNMPy import booleanNetwork as bn
from BNMPy import BMatrix 

file = 'input_files/pancreatic_vundavilli_2020_fig3.txt'
network = BMatrix.load_network_from_file(file)
genes = network.nodeDict.keys()
print(f"number of genes: {len(genes)}")
genes

No initial state provided, using a random initial state
number of genes: 38


dict_keys(['EGF', 'HBEGF', 'IGF', 'NRG1', 'PTEN', 'LKB1', 'EGFR', 'EFGR', 'IGFR1A_B', 'ERBB2', 'JAK5', 'STAT3', 'IRS1', 'GRB2', 'RAS', 'MEKK1', 'RAF', 'MKK4', 'MEK1', 'PIK3CA', 'JNK1', 'ERK1_2', 'PIP3', 'PDPK1', 'AKT1', 'AMPK', 'GSK3', 'TSC1_2', 'RHEB', 'mTOR', 'RPS6KB1', 'BAD', 'CCND1', 'BCL2', 'SRFELK1', 'FOS-JUN', 'SRFELK4', 'SP1'])

In [16]:
bn_string, relations = build_bn_from_kg.load_signor_network(genes, joiner='inhibitor_wins')

number of genes found: 26
[1950, 1839, 3479, 3084, 5728, 6794, 1956, 2064, 6774, 3667, 2885, 4214, 22882, 6416, 5604, 5290, 5599, 5170, 207, 5562, 6008, 6198, 572, 595, 596, 5669]
['UNIPROT::P01133', 'UNIPROT::Q99075', 'UNIPROT::P05019', 'UNIPROT::Q02297', 'UNIPROT::P60484', 'UNIPROT::Q15831', 'UNIPROT::P00533', 'UNIPROT::P04626', 'UNIPROT::P40763', 'UNIPROT::P35568', 'UNIPROT::P62993', 'UNIPROT::Q13233', 'UNIPROT::Q9Y6X8', 'UNIPROT::P45985', 'UNIPROT::Q02750', 'UNIPROT::P42336', 'UNIPROT::P45983', 'UNIPROT::O15530', 'UNIPROT::P31749', 'UNIPROT::Q13131', 'UNIPROT::P23443', 'UNIPROT::Q92934', 'UNIPROT::P24385', 'UNIPROT::P10415', 'UNIPROT::P11464']
['UNIPROT::P01133', 'UNIPROT::Q99075', 'UNIPROT::P05019', 'UNIPROT::Q02297', 'UNIPROT::P60484', 'UNIPROT::Q15831', 'UNIPROT::P00533', 'UNIPROT::P04626', 'UNIPROT::P40763', 'UNIPROT::P35568', 'UNIPROT::P62993', 'UNIPROT::Q13233', 'UNIPROT::P45985', 'UNIPROT::Q02750', 'UNIPROT::P42336', 'UNIPROT::P45983', 'UNIPROT::O15530', 'UNIPROT::P31749', '

In [12]:
print(bn_string)

STAT3 = MAPK8 | EGFR | MAP3K1
CCND1 = STAT3
RPS6KB1 = PDPK1
MAPK8 = (!RPS6KB1) & (MAP2K4)
BAD = !MAPK8 & !AKT1
AKT1 = (!PTEN) & (MAPK8 | PDPK1 | PIK3CA)
EGFR = EGFR | ERBB2 | EGF | HBEGF
PDPK1 = PDPK1
PTEN = PTEN
ERBB2 = EGFR | ERBB2 | NRG1 | EGF
PIK3CA = (!PTEN) & (IRS1)
IRS1 = !RPS6KB1 & !MAPK8 & !PIK3CA & !MAP2K1
BCL2 = (!BAD) & (MAPK8)
MAP2K1 = PDPK1 | MAP2K1 | MAP3K1
MAP3K1 = MAP3K1
NRG1 = NRG1
STK11 = STK11
PPARGC1A = (!AKT1) & (PPARGC1A | PRKAA1)
GRB2 = ERBB2 | IRS1
MAP2K4 = (!AKT1) & (MAP3K1 | MAP2K4)
PRKAA1 = (!AKT1 & !PRKAA1) & (STK11)
IGF1 = PPARGC1A
EGF = EGF
HBEGF = HBEGF


In [17]:
file = 'input_files/Vundavilli2020_standardized.txt'
network_std = BMatrix.load_network_from_file(file)
genes = network_std.nodeDict.keys()
print(f"number of genes: {len(genes)}")
print(genes)

No initial state provided, using a random initial state
number of genes: 38
dict_keys(['EGF', 'HBEGF', 'IGF1', 'NRG1', 'PTEN', 'STK11', 'EGFR', 'ERBB4', 'IGF1R', 'ERBB2', 'JAK1', 'STAT3', 'IRS1', 'GRB2', 'KRAS', 'MAP3K1', 'RAF1', 'MAP2K4', 'MAP2K1', 'PIK3CA', 'MAPK8', 'MAPK3', 'PIP3', 'PDPK1', 'AKT1', 'PRKAA1', 'GSK3B', 'TSC1', 'RHEB', 'MTOR', 'RPS6KB1', 'BAD', 'CCND1', 'BCL2', 'ELK1', 'FOS', 'ELK4', 'SP1'])


In [18]:
bn_string_std, relations_std = build_bn_from_kg.load_signor_network(genes, joiner='inhibitor_wins')
print(bn_string_std)
# The only gene missing should be SP1, which is an output node

number of genes found: 37
[1950, 1839, 3479, 3084, 5728, 6794, 1956, 2066, 3480, 2064, 3716, 6774, 3667, 2885, 3845, 4214, 5894, 6416, 5604, 5290, 5599, 5595, 5170, 207, 5562, 2932, 7248, 6008, 2475, 6198, 572, 595, 596, 2002, 2353, 2005, 5669]
['UNIPROT::P01133', 'UNIPROT::Q99075', 'UNIPROT::P05019', 'UNIPROT::Q02297', 'UNIPROT::P60484', 'UNIPROT::Q15831', 'UNIPROT::P00533', 'UNIPROT::Q15303', 'UNIPROT::P08069', 'UNIPROT::P04626', 'UNIPROT::P23458', 'UNIPROT::P40763', 'UNIPROT::P35568', 'UNIPROT::P62993', 'UNIPROT::P01116', 'UNIPROT::Q13233', 'UNIPROT::P04049', 'UNIPROT::P45985', 'UNIPROT::Q02750', 'UNIPROT::P42336', 'UNIPROT::P45983', 'UNIPROT::P27361', 'UNIPROT::O15530', 'UNIPROT::P31749', 'UNIPROT::Q13131', 'UNIPROT::P49841', 'UNIPROT::Q92574', 'UNIPROT::P42345', 'UNIPROT::P23443', 'UNIPROT::Q92934', 'UNIPROT::P24385', 'UNIPROT::P10415', 'UNIPROT::P19419', 'UNIPROT::P01100', 'UNIPROT::P28324', 'UNIPROT::P11464']
['UNIPROT::P01133', 'UNIPROT::Q99075', 'UNIPROT::P05019', 'UNIPROT::Q0

In [13]:
merged_bn_string = build_bn_from_kg.merge_network(network, bn_string, outer_joiner='|')

In [14]:
print(merged_bn_string)

PDPK1 = ( PIP3) | ( PDPK1)
MAP2K4 =  (!AKT1) & (MAP3K1 | MAP2K4)
TSC1_2 =  AMPK | ( ! AKT1)
AKT1 = ( PIP3) | ( (!PTEN) & (MAPK8 | PDPK1 | PIK3CA))
IGF1 =  PPARGC1A
PTEN = ( PTEN) | ( PTEN)
FOS-JUN =  RPS6KB1 & JNK1
RAF =  RAS
MAP2K1 =  PDPK1 | MAP2K1 | MAP3K1
MAPK8 =  (!RPS6KB1) & (MAP2K4)
PPARGC1A =  (!AKT1) & (PPARGC1A | PRKAA1)
BAD = ( ! (AKT1 | RPS6KB1)) | ( !MAPK8 & !AKT1)
PIK3CA = ( (ERBB2 | IRS1) | (RAS | STAT3)) | ( (!PTEN) & (IRS1))
MAP3K1 =  MAP3K1
NRG1 = ( NRG1) | ( NRG1)
STAT3 = ( JAK5) | ( MAPK8 | EGFR | MAP3K1)
HBEGF = ( HBEGF) | ( HBEGF)
PIP3 =  ! PTEN | PIK3CA
RHEB =  ! TSC1_2
EGF = ( EGF) | ( EGF)
MEK1 =  RAF
JNK1 =  MKK4
AMPK =  LKB1
ERK1_2 =  MEK1
JAK5 =  EGFR
RPS6KB1 = ( mTOR | (PDPK1 | ERK1_2)) | ( PDPK1)
IRS1 = ( IGFR1A_B) | ( !RPS6KB1 & !MAPK8 & !PIK3CA & !MAP2K1)
MKK4 =  MEKK1
STK11 =  STK11
CCND1 = ( ! GSK3) | ( STAT3)
IGFR1A_B =  IGF
mTOR =  RHEB
ERBB2 = ( NRG1) | ( EGFR | ERBB2 | NRG1 | EGF)
EGFR = ( EGF) | ( EGFR | ERBB2 | EGF | HBEGF)
PRKAA1 =  (!AKT1 & !PR

## Visualization

In [23]:
# replace the invalid name
bn_string_std = bn_string_std.replace('(-)-anisomycin', 'anisomycin')
# save the logic rules to a file
with open('Vundavilli2020_kg_inhibitor_wins.txt', 'w') as f:
    f.write(bn_string_std)

In [24]:
from BNMPy import vis
logic_rules = vis.read_logic_rules('Vundavilli2020_kg_inhibitor_wins.txt')
vis.vis_logic_graph(logic_rules, output_html = "kg_graph.html") # visualize the logic graph, in an html file. view it in a browser.


kg_graph.html
