# Building a boolean network from a knowledge graph

Use SigNOR

Given a list of genes for which we want to construct a BN:

1. Find the Steiner subgraph for the given list of genes.
2. For each node in the subgraph, find all if its input nodes (i.e. all edges leading into that node).
3. For each such node, there is an activating relation if the edge is "up-regulates", and a repressing relation if the edge is "down-regulates".
4. Combine all of the edges with a "joiner function" - AND, OR, repressor wins, etc.

Or, given an existing model (with some number of genes), a list of nodes, and a knowledge graph, how do we incorporate the new nodes into the model?


In [1]:
from BNMPy import build_bn_from_kg

In [2]:
bn_string, relations = build_bn_from_kg.load_signor_network(['TNFAIP3', 'CCND1', 'MACF1'])

number of genes found: 3
[7128, 595, 23499]


In [4]:
print(bn_string)

CCND1 = (! GSK3B) # inhibit_GSK3B:0.783
GSK3B = (GSK3B) & (! MACF1) # activate_GSK3B:0.2; inhibit_MACF1:0.436
MACF1 = (! GSK3B) # inhibit_GSK3B:0.436
TNFAIP3 = TNFAIP3
TRAF6 = (! GSK3B) & (TRAF6) & (! TNFAIP3) # inhibit_GSK3B:0.48; activate_TRAF6:0.2; inhibit_TNFAIP3:0.701


In [5]:
bn_string, relations = build_bn_from_kg.load_signor_network(['TNFAIP3', 'CCND1', 'MACF1'],score_cutoff = 0.5)
print(bn_string)

Applied score cutoff 0.5, filtered to 19342/40940 edges
number of genes found: 3
[7128, 595, 23499]
CCND1 = (CTNNB1) # activate_CTNNB1:0.8
CTNNB1 = (! SRC) # inhibit_SRC:0.76
SRC = SRC
TNFAIP3 = TNFAIP3
TRAF6 = (SRC) & (! TNFAIP3) # activate_SRC:0.566; inhibit_TNFAIP3:0.701


In [None]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'], joiner='&')
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = (! MYC) # inhibit_MYC:0.765
GNAS = (! MDM2) # inhibit_MDM2:0.395
GSK3B = (GSK3B) # activate_GSK3B:0.2
KRAS = (SRC) # activate_SRC:0.656
MDM2 = (TP53) # activate_TP53:0.968
MYC = (! GSK3B) & (! SMAD4) # inhibit_GSK3B:0.719; inhibit_SMAD4:0.638
RNF43 = RNF43
SMAD4 = (! GSK3B) # inhibit_GSK3B:0.397
SRC = (GNAS) & (GSK3B) & (SRC) # activate_GNAS:0.506; activate_GSK3B:0.383; activate_SRC:0.2
TP53 = (! MDM2) & (GSK3B) & (! SRC) & (! RNF43) # inhibit_MDM2:0.968; activate_GSK3B:0.727; inhibit_SRC:0.524; inhibit_RNF43:0.452


In [8]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'], joiner='&', score_cutoff = 0.5)
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = (! MYC) # inhibit_MYC:0.765
GNAS = GNAS
KRAS = (SRC) # activate_SRC:0.656
MAPK1 = MAPK1
MYC = (MAPK1) & (! SMAD4) # activate_MAPK1:0.733; inhibit_SMAD4:0.638
SMAD4 = (MAPK1) # activate_MAPK1:0.511
SRC = (GNAS) # activate_GNAS:0.506
TP53 = (! SRC) & (MAPK1) # inhibit_SRC:0.524; activate_MAPK1:0.777


In [5]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'], joiner='|')
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = (! MYC) | (! IKBKB)
GNAS = GNAS
IKBKB = (SRC) | (! IKBKB)
KRAS = (SRC)
MYC = (! SMAD4)
RNF43 = RNF43
SMAD4 = SMAD4
SRC = (GNAS) | (SRC)
TP53 = (! SRC) | (IKBKB) | (! RNF43)


In [6]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'],
                                                            joiner='inhibitor_wins')
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = (!MYC & !IKBKB)
GNAS = GNAS
IKBKB = !IKBKB & SRC
KRAS = SRC
MYC = !SMAD4
RNF43 = RNF43
SMAD4 = SMAD4
SRC = (GNAS | SRC)
TP53 = (!SRC & !RNF43) & IKBKB


In [7]:
relations

[('SMAD4', 'MYC', 'inhibit'),
 ('SRC', 'TP53', 'inhibit'),
 ('IKBKB', 'TP53', 'activate'),
 ('RNF43', 'TP53', 'inhibit'),
 ('GNAS', 'SRC', 'activate'),
 ('SRC', 'SRC', 'activate'),
 ('SRC', 'IKBKB', 'activate'),
 ('IKBKB', 'IKBKB', 'inhibit'),
 ('SRC', 'KRAS', 'activate'),
 ('MYC', 'CDKN2A', 'inhibit'),
 ('IKBKB', 'CDKN2A', 'inhibit')]

In [8]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'],
                                                            joiner='majority')
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = 0
GNAS = GNAS
IKBKB = (SRC & !IKBKB)
KRAS = (SRC)
MYC = 0
RNF43 = RNF43
SMAD4 = SMAD4
SRC = (GNAS & !SRC) | (!GNAS & SRC) | (GNAS & SRC)
TP53 = (IKBKB & !SRC & !RNF43)


In [9]:
bn_string, relations = build_bn_from_kg.load_signor_network(['KRAS', 'GNAS', 'TP53', 'SMAD4', 'CDKN2A', 'RNF43'],
                                                            joiner='plurality')
print(bn_string)

number of genes found: 6
[3845, 2778, 7157, 4089, 1029, 54894]
CDKN2A = (!MYC & !IKBKB)
GNAS = GNAS
IKBKB = (!SRC & !IKBKB) | (SRC & !IKBKB) | (SRC & IKBKB)
KRAS = (!SRC) | (SRC)
MYC = (!SMAD4)
RNF43 = RNF43
SMAD4 = SMAD4
SRC = (!GNAS & !SRC) | (GNAS & !SRC) | (!GNAS & SRC) | (GNAS & SRC)
TP53 = (!IKBKB & !SRC & !RNF43) | (IKBKB & !SRC & !RNF43) | (IKBKB & SRC & !RNF43) | (IKBKB & !SRC & RNF43)


## Combining KG knowledge graphs

In [10]:
# load the Vundavalli KG
from BNMPy import booleanNetwork as bn
from BNMPy import BMatrix 

file = 'input_files/pancreatic_vundavilli_2020_fig3.txt'
network = BMatrix.load_network_from_file(file)
genes = network.nodeDict.keys()
print(f"number of genes: {len(genes)}")
genes

No initial state provided, using a random initial state
Network loaded successfully. There are 38 genes in the network.
number of genes: 38


dict_keys(['EGF', 'HBEGF', 'IGF', 'NRG1', 'PTEN', 'LKB1', 'EGFR', 'EFGR', 'IGFR1A_B', 'ERBB2', 'JAK5', 'STAT3', 'IRS1', 'GRB2', 'RAS', 'MEKK1', 'RAF', 'MKK4', 'MEK1', 'PIK3CA', 'JNK1', 'ERK1_2', 'PIP3', 'PDPK1', 'AKT1', 'AMPK', 'GSK3', 'TSC1_2', 'RHEB', 'mTOR', 'RPS6KB1', 'BAD', 'CCND1', 'BCL2', 'SRFELK1', 'FOS-JUN', 'SRFELK4', 'SP1'])

In [11]:
bn_string, relations = build_bn_from_kg.load_signor_network(genes, joiner='inhibitor_wins')

number of genes found: 26
[1950, 1839, 3479, 3084, 5728, 6794, 1956, 2064, 6774, 3667, 2885, 4214, 22882, 6416, 5604, 5290, 5599, 5170, 207, 5562, 6008, 6198, 572, 595, 596, 5669]


In [12]:
print(bn_string)

AKT1 = !PTEN & (PDPK1 | AKT1 | MAPK8 | PIK3CA)
BAD = (!AKT1 & !MAPK8)
BCL2 = (!MAPK8 & !BAD)
CCND1 = STAT3
EGF = EGF
EGFR = (EGF | ERBB2 | EGFR | HBEGF)
ERBB2 = (EGF | ERBB2 | EGFR | NRG1)
GRB2 = (IRS1 | ERBB2 | EGFR)
HBEGF = HBEGF
IGF1 = STAT5A
IRS1 = (!RPS6KB1 & !MAPK8 & !PIK3CA & !MAP2K1 & !PTEN)
MAP2K1 = (PDPK1 | MAP2K1 | MAP3K1)
MAP2K4 = !AKT1 & (MAP2K4 | MAP3K1)
MAP3K1 = MAP3K1
MAPK8 = !RPS6KB1 & MAP2K4
NRG1 = NRG1
PDPK1 = !RPS6KB1
PIK3CA = !PTEN & IRS1
PRKAA1 = (!AKT1 & !PRKAA1) & STK11
PTEN = !STK11 & PTEN
RPS6KB1 = !PTEN & PDPK1
STAT3 = (MAPK8 | EGFR | MAP3K1)
STAT5A = !PTEN & EGFR
STK11 = STK11


In [13]:
file = 'input_files/Vundavilli2020_standardized.txt'
network_std = BMatrix.load_network_from_file(file)
genes = network_std.nodeDict.keys()
print(f"number of genes: {len(genes)}")
print(genes)

No initial state provided, using a random initial state
Network loaded successfully. There are 38 genes in the network.
number of genes: 38
dict_keys(['EGF', 'HBEGF', 'IGF1', 'NRG1', 'PTEN', 'STK11', 'EGFR', 'ERBB4', 'IGF1R', 'ERBB2', 'JAK1', 'STAT3', 'IRS1', 'GRB2', 'KRAS', 'MAP3K1', 'RAF1', 'MAP2K4', 'MAP2K1', 'PIK3CA', 'MAPK8', 'MAPK3', 'PIP3', 'PDPK1', 'AKT1', 'PRKAA1', 'GSK3B', 'TSC1', 'RHEB', 'MTOR', 'RPS6KB1', 'BAD', 'CCND1', 'BCL2', 'ELK1', 'FOS', 'ELK4', 'SP1'])


In [14]:
bn_string_std, relations_std = build_bn_from_kg.load_signor_network(genes, joiner='inhibitor_wins')
print(bn_string_std)
# The only gene missing should be SP1, which is an output node

number of genes found: 37
[1950, 1839, 3479, 3084, 5728, 6794, 1956, 2066, 3480, 2064, 3716, 6774, 3667, 2885, 3845, 4214, 5894, 6416, 5604, 5290, 5599, 5595, 5170, 207, 5562, 2932, 7248, 6008, 2475, 6198, 572, 595, 596, 2002, 2353, 2005, 5669]
AKT1 = !PTEN & (PDPK1 | AKT1 | MTOR | MAPK8 | PIK3CA | IGF1R)
BAD = (!AKT1 & !MAPK3 & !MAPK8 & !RAF1)
BCL2 = (!MAPK8 & !GSK3B & !BAD) & MAPK3
CCND1 = !GSK3B & STAT3
CDK2 = (AKT1 | CDK2)
EGF = EGF
EGFR = !MAPK3 & (EGF | ERBB2 | EGFR | HBEGF)
ELK1 = (MAPK3 | MAPK8)
ELK4 = CDK2
ERBB2 = (EGF | ERBB2 | EGFR | NRG1)
ERBB4 = !MAPK3 & (ERBB2 | NRG1 | HBEGF)
FOS = MAPK3
GRB2 = (IRS1 | ERBB2 | EGFR | ERBB4)
GSK3B = (!AKT1 & !MAPK3 & !STK11) & (MAP2K1 | GSK3B)
HBEGF = HBEGF
IGF1 = IGF1
IGF1R = (IGF1R | IGF1)
IRS1 = (!MAPK3 & !MTOR & !RPS6KB1 & !MAPK8 & !PIK3CA & !MAP2K1 & !GSK3B & !PTEN) & (JAK1 | IGF1R)
JAK1 = EGFR
KRAS = KRAS
MAP2K1 = (PDPK1 | RAF1 | MAP2K1 | MAP3K1)
MAP2K4 = !AKT1 & (MAP2K4 | MAP3K1)
MAP3K1 = KRAS
MAPK3 = (MAPK3 | MAP2K1)
MAPK8 = !RPS6K

In [15]:
merged_bn_string = build_bn_from_kg.merge_PBN_string(network, bn_string)

AttributeError: 'BooleanNetwork' object has no attribute 'strip'

In [14]:
print(merged_bn_string)

PDPK1 = ( PIP3) | ( PDPK1)
MAP2K4 =  (!AKT1) & (MAP3K1 | MAP2K4)
TSC1_2 =  AMPK | ( ! AKT1)
AKT1 = ( PIP3) | ( (!PTEN) & (MAPK8 | PDPK1 | PIK3CA))
IGF1 =  PPARGC1A
PTEN = ( PTEN) | ( PTEN)
FOS-JUN =  RPS6KB1 & JNK1
RAF =  RAS
MAP2K1 =  PDPK1 | MAP2K1 | MAP3K1
MAPK8 =  (!RPS6KB1) & (MAP2K4)
PPARGC1A =  (!AKT1) & (PPARGC1A | PRKAA1)
BAD = ( ! (AKT1 | RPS6KB1)) | ( !MAPK8 & !AKT1)
PIK3CA = ( (ERBB2 | IRS1) | (RAS | STAT3)) | ( (!PTEN) & (IRS1))
MAP3K1 =  MAP3K1
NRG1 = ( NRG1) | ( NRG1)
STAT3 = ( JAK5) | ( MAPK8 | EGFR | MAP3K1)
HBEGF = ( HBEGF) | ( HBEGF)
PIP3 =  ! PTEN | PIK3CA
RHEB =  ! TSC1_2
EGF = ( EGF) | ( EGF)
MEK1 =  RAF
JNK1 =  MKK4
AMPK =  LKB1
ERK1_2 =  MEK1
JAK5 =  EGFR
RPS6KB1 = ( mTOR | (PDPK1 | ERK1_2)) | ( PDPK1)
IRS1 = ( IGFR1A_B) | ( !RPS6KB1 & !MAPK8 & !PIK3CA & !MAP2K1)
MKK4 =  MEKK1
STK11 =  STK11
CCND1 = ( ! GSK3) | ( STAT3)
IGFR1A_B =  IGF
mTOR =  RHEB
ERBB2 = ( NRG1) | ( EGFR | ERBB2 | NRG1 | EGF)
EGFR = ( EGF) | ( EGFR | ERBB2 | EGF | HBEGF)
PRKAA1 =  (!AKT1 & !PR

## Visualization

In [23]:
# replace the invalid name
bn_string_std = bn_string_std.replace('(-)-anisomycin', 'anisomycin')
# save the logic rules to a file
with open('Vundavilli2020_kg_inhibitor_wins.txt', 'w') as f:
    f.write(bn_string_std)

In [24]:
from BNMPy import vis
logic_rules = vis.read_logic_rules('Vundavilli2020_kg_inhibitor_wins.txt')
vis.network(logic_rules, output_html = "kg_graph.html", interactive=True) # visualize the logic graph, in an html file. view it in a browser.


kg_graph.html


## Test

In [None]:
import pandas as pd
signor_file = 'KG_files/SIGNOR_2025_08_14.tsv'
graph_table = pd.read_csv(signor_file, index_col=None, sep='\t')
print(graph_table.columns)

Index(['subject_id', 'object_id', 'subject_id_prefix', 'object_id_prefix',
       'subject_name', 'object_name', 'predicate', 'Primary_Knowledge_Source',
       'Knowledge_Source', 'publications', 'subject_category',
       'object_category', 'score'],
      dtype='object')


In [4]:
graph_table['score'].describe()

count    40935.000000
mean         0.501978
std          0.235996
min          0.100000
25%          0.278000
50%          0.468000
75%          0.727000
max          1.000000
Name: score, dtype: float64

In [None]:
from BNMPy import graph_info
graph = graph_info.df_to_graph(graph_table)
graph

KeyError: 'Attribute does not exist'