In [1]:
import collections
import itertools
import networkx as nx
import pandas as pd

In [2]:
omnipath = nx.read_graphml('omnipath.gxml')
trrust = nx.read_graphml('trrust.gxml')

In [3]:
combined = nx.DiGraph()
for src, dst in omnipath.edges:
    edge_info = omnipath.edges[src, dst]
    if edge_info['kind'] == 'transcriptional':
        combined.add_edge(omnipath.nodes[src]['name'], omnipath.nodes[dst]['name'], 
                          repress=edge_info['repress'], source='OmniPath')
for src, dst in trrust.edges:
    src_name = trrust.nodes[src]['name']
    dst_name = trrust.nodes[dst]['name']
    if combined.has_edge(src_name, dst_name):
        if trrust.edges[src, dst]['repress'] != combined.edges[src_name, dst_name]['repress']:
            combined.remove_edge(src_name, dst_name)
            continue
        new_source = 'Both'
    else:
        new_source = 'TRRUST2'
    combined.add_edge(src_name, dst_name, repress=trrust.edges[src, dst]['repress'], source=new_source)

In [4]:
df_emt = pd.read_excel('MMI2_MMI4_Genes_2022_0507.xlsx', sheet_name='EMT_Genes')
emt_genes = {d['Gene']: d['Annotation'] for d in df_emt.to_dict('records')}

In [5]:
def isrepression(edges):
    if not isinstance(edges[0], tuple):
        edges = nx.utils.pairwise(edges)
    repress = False
    for e in edges:
        repress ^= combined.edges[e]['repress']
    return repress

## Genes with 4 miRNA binding sites
Trivial because the spreadsheet lists EMT genes with 4 binding sites.

In [6]:
df_mmi4 = pd.read_excel('MMI2_MMI4_Genes_2022_0507.xlsx', sheet_name='MMI4_1mi', header=None)
mmi4_genes = {d[0] for d in df_mmi4.to_dict('records')}

In [7]:
len(mmi4_genes.intersection(emt_genes.keys()))

7

In [8]:
df_mmi22 = pd.read_excel('MMI2_MMI4_Genes_2022_0507.xlsx', sheet_name='MMI4_2mis', header=None)
mmi22_genes = {d[0] for d in df_mmi22.to_dict('records')}

In [9]:
len(mmi22_genes)

45

In [10]:
len(mmi22_genes.union(mmi4_genes))

46

## MMI2 targets regulated by another MMI2 target

In [11]:
df_mmi2 = pd.read_excel('MMI2_MMI4_Genes_2022_0507.xlsx', sheet_name='MMI2', header=None)
mmi2_genes = {d[0] for d in df_mmi2.to_dict('records')}

In [12]:
mmi2_regulates_mmi2 = []
for regulator, target in itertools.product(sorted(mmi2_genes.intersection(combined.nodes)), repeat=2):
    if regulator == target:
        continue
    if combined.has_edge(regulator, target):
        edge_info = combined.edges[regulator, target]
        plausible_core = (emt_genes[regulator] == emt_genes[target]) == (not edge_info['repress'])
        mmi2_regulates_mmi2.append({'regulator': regulator, 
                                    'target': target, 
                                    'repress': edge_info['repress'], 
                                    'database': edge_info['source'],
                                    'plausible_core': plausible_core})
df_mmi2_reg_mmi2 = pd.DataFrame.from_dict(mmi2_regulates_mmi2)

In [13]:
len(df_mmi2_reg_mmi2)

8

In [14]:
sum(df_mmi2_reg_mmi2['plausible_core'])

7

In [15]:
df_mmi2_reg_mmi2#.to_csv('mmi2_reg_mmi2.csv', index=False)

Unnamed: 0,database,plausible_core,regulator,repress,target
0,Both,True,NR3C1,True,ATP1B1
1,Both,True,NR3C1,False,CALD1
2,TRRUST2,True,SNAI2,True,CXADR
3,TRRUST2,True,SNAI2,True,HPGD
4,TRRUST2,True,ZEB1,True,CXADR
5,Both,True,ZEB1,True,GRHL2
6,TRRUST2,True,ZEB2,True,CXADR
7,Both,False,ZEB2,True,MEOX2


### Including indirect regulation

In [16]:
mmi2_indirectreg_mmi2 = []
for regulator, target in itertools.product(sorted(mmi2_genes.intersection(combined.nodes)), repeat=2):
    if regulator == target:
        continue
    exists = False
    repress = False
    plausible = False
    for path in nx.algorithms.simple_paths.all_simple_paths(combined, regulator, target, cutoff=5):
        exists = True
        repress = isrepression(path)
        plausible = (emt_genes[regulator] == emt_genes[target]) == (not repress)
        if plausible:
            break
    if exists:
        mmi2_indirectreg_mmi2.append({'regulator': regulator, 
                                      'target': target, 
                                      'repress': repress,
                                      'plausible_core': plausible})
df_mmi2_indirectreg_mmi2 = pd.DataFrame.from_dict(mmi2_indirectreg_mmi2)

In [17]:
len(df_mmi2_indirectreg_mmi2)

171

In [18]:
sum(df_mmi2_indirectreg_mmi2['plausible_core'])

146

## Genes regulated by two MMI2 targets

In [19]:
shared_mmi2_targets = []
for target in emt_genes.keys():
    if target not in combined.nodes:
        continue
    for reg1, reg2 in itertools.product(combined.predecessors(target), repeat=2):
        if reg1 >= reg2:
            continue
        if reg1 in mmi2_genes and reg2 in mmi2_genes:
            repress1 = combined.edges[reg1, target]['repress']
            repress2 = combined.edges[reg2, target]['repress']
            plausible_core = ((emt_genes[reg1] == emt_genes[target]) == (not repress1)) and \
                             ((emt_genes[reg2] == emt_genes[target]) == (not repress2))
            shared_mmi2_targets.append({'regulator1': reg1, 
                                        'regulator1_repress': repress1,
                                        'regulator2': reg2, 
                                        'regulator2_repress': repress2,
                                        'target': target, 
                                        'plausible_core': plausible_core})
df_shared_mmi2_targets = pd.DataFrame.from_dict(shared_mmi2_targets)

In [20]:
len(df_shared_mmi2_targets)

18

In [21]:
sum(df_shared_mmi2_targets['plausible_core'])

10

In [22]:
df_shared_mmi2_targets.sort_values(['target', 'regulator1', 'regulator2'])#.to_csv('mmi2_shared_targets.csv', index=False)

Unnamed: 0,plausible_core,regulator1,regulator1_repress,regulator2,regulator2_repress,target
8,False,KLF8,False,SNAI2,True,CDH1
6,False,KLF8,False,TWIST1,True,CDH1
9,False,KLF8,False,ZEB1,True,CDH1
7,False,KLF8,False,ZEB2,True,CDH1
2,True,SNAI2,True,TWIST1,True,CDH1
4,True,SNAI2,True,ZEB1,True,CDH1
3,True,SNAI2,True,ZEB2,True,CDH1
1,True,TWIST1,True,ZEB1,True,CDH1
0,True,TWIST1,True,ZEB2,True,CDH1
5,True,ZEB1,True,ZEB2,True,CDH1


### Including indirect regulation

In [23]:
emt_mmi2_regulators = collections.defaultdict(dict)
for regulator, target in itertools.product(mmi2_genes, emt_genes.keys()):
    if regulator == target or regulator not in combined.nodes or target not in combined.nodes:
        continue
    always_required_nodes = None
    plausible_required_nodes = None
    for path in nx.algorithms.simple_paths.all_simple_paths(combined, regulator, target, cutoff=5):
        exists = True
        plausible = (emt_genes[regulator] == emt_genes[target]) == (not isrepression(path))
        if always_required_nodes is None:
            always_required_nodes = frozenset(path)
        else:
            always_required_nodes = always_required_nodes.intersection(path)
        if plausible:
            if plausible_required_nodes is None:
                plausible_required_nodes = frozenset(path)
            else:
                plausible_required_nodes = plausible_required_nodes.intersection(path)
    if always_required_nodes is not None:
        emt_mmi2_regulators[target][regulator] = (always_required_nodes, plausible_required_nodes)

In [24]:
shared_mmi2_indirect_targets = []
for target in emt_mmi2_regulators.keys():
    for reg1, reg2 in itertools.product(emt_mmi2_regulators[target].keys(), repeat=2):
        if reg1 >= reg2:
            continue
        always_required1, plausible_required1 = emt_mmi2_regulators[target][reg1]
        if reg2 in always_required1:
            continue
        always_required2, plausible_required2 = emt_mmi2_regulators[target][reg2]
        if reg1 in always_required2:
            continue
        if len(always_required1.intersection(always_required2)) > 1:
            #print(reg1, reg2, target, always_required1, always_required2)
            continue
        plausible1 = plausible_required1 is not None and reg2 not in plausible_required1
        plausible2 = plausible_required2 is not None and reg1 not in plausible_required2
        plausible = plausible1 and plausible2 and len(plausible_required1.intersection(plausible_required2)) == 1
        shared_mmi2_indirect_targets.append({'regulator1': reg1, 
                                             #'regulator1_repress': repress1,
                                             'regulator2': reg2, 
                                             #'regulator2_repress': repress2,
                                             'target': target, 
                                             'plausible_core': plausible})
df_shared_mmi2_indirect_targets = pd.DataFrame.from_dict(shared_mmi2_indirect_targets)

In [25]:
len(df_shared_mmi2_indirect_targets)

1312

In [26]:
sum(df_shared_mmi2_indirect_targets['plausible_core'])

1105

## Count of genes involved in direct-regulation instances

In [27]:
involved_genes = set(mmi4_genes.union(mmi22_genes).intersection(emt_genes.keys()))
for chain in mmi2_regulates_mmi2:
    involved_genes.add(chain['regulator'])
    involved_genes.add(chain['target'])
for cotargeting in shared_mmi2_targets:
    involved_genes.add(cotargeting['regulator1'])
    involved_genes.add(cotargeting['regulator2'])
    involved_genes.add(cotargeting['target'])
len(involved_genes)

55

In [28]:
len(emt_genes)

423