In [None]:
import networkx as nx
import pandas as pd

## 1. Loading pathway data

In [26]:
omnipath = pd.read_csv('omnipath_selected.csv')
omnipath

Unnamed: 0,source,target,source_genesymbol,target_genesymbol,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,sources,references,curation_effort,n_references,n_resources
0,P0DP23,P48995,CALM1,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,1
1,P0DP25,P48995,CALM3,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,1
2,P0DP24,P48995,CALM2,TRPC1,1,0,1,1,0,1,TRIP,TRIP:11290752;TRIP:11983166;TRIP:12601176,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,1
3,Q03135,P48995,CAV1,TRPC1,1,1,0,1,1,0,DIP;HPRD;IntAct;Lit-BM-17;TRIP,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,13,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,5
4,P14416,P48995,DRD2,TRPC1,1,1,0,1,1,0,TRIP,TRIP:18261457,1,TRIP:18261457,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67116,P43405,P25963,SYK,NFKBIA,1,1,0,1,1,0,NCI-PID_ProtMapper;ProtMapper;REACH_ProtMapper...,ProtMapper:12711606;ProtMapper:19014652;ProtMa...,5,ProtMapper:12711606;ProtMapper:19014652;ProtMa...,3
67117,P27361,P43694,MAPK3,GATA4,1,1,0,1,1,0,HPRD;PhosphoPoint;Wang;iPTMnet,HPRD:11585926;iPTMnet:22227582,2,HPRD:11585926;iPTMnet:22227582,4
67118,P34947,P61073,GRK5,CXCR4,1,1,0,1,1,0,ProtMapper;REACH_ProtMapper;RLIMS-P_ProtMapper...,ProtMapper:24632620;iPTMnet:24632620,2,ProtMapper:24632620;iPTMnet:24632620,3
67119,Q9H2G2,P61586,SLK,RHOA,1,0,1,1,0,1,ProtMapper;RLIMS-P_ProtMapper;iPTMnet,ProtMapper:18420945;iPTMnet:18420945,2,ProtMapper:18420945;iPTMnet:18420945,2


In [27]:
directed_signaling_pathways = nx.DiGraph()

In [29]:
for i in range(omnipath.shape[0]):
    if omnipath['is_stimulation'][i]:
        directed_signaling_pathways.add_edge(omnipath['source_genesymbol'][i], omnipath['target_genesymbol'][i], effect='stimulation')
    else:
        directed_signaling_pathways.add_edge(omnipath['source_genesymbol'][i], omnipath['target_genesymbol'][i], effect='inhibition')

In [30]:
print(nx.info(directed_signaling_pathways))

DiGraph with 6571 nodes and 66988 edges



  print(nx.info(directed_signaling_pathways))


###  1.1 Removing protien complex

In [31]:
node_is_complex = []
for node in directed_signaling_pathways.nodes():
    proteins = node.split('_')
    if len(proteins)  > 1:
        node_is_complex.append(node)
        
directed_signaling_pathways.remove_nodes_from(node_is_complex)
print(nx.info(directed_signaling_pathways))

DiGraph with 5857 nodes and 16710 edges



  print(nx.info(directed_signaling_pathways))


## 2. Constructing driver-centered pathway network

### 2.1 Selecting pathway driver and its downstream effectors

In [15]:
drivers = ['VHL']
downstream_effectors = ['SLC2A1', 'VEGFA', 'PDGFB', 'TGFA', 'CCND1']

### 2.2 Constructing driver-centered pathway manually (according to KEGG)

In [9]:
VHL_pathway = nx.DiGraph()

VHL_pathway.add_edge('VHL', 'HIF1A', effect='inhibition')
VHL_pathway.add_edge('VHL', 'EPAS1', effect='inhibition')
VHL_pathway.add_edge('HIF1A', 'SLC2A1', effect='stimulation')
VHL_pathway.add_edge('HIF1A', 'VEGFA', effect='stimulation')
VHL_pathway.add_edge('HIF1A', 'PDGFB', effect='stimulation')
VHL_pathway.add_edge('HIF1A', 'TGFA', effect='stimulation')
VHL_pathway.add_edge('EPAS1', 'CCND1', effect='stimulation')

In [10]:
for node in VHL_pathway.nodes():
    if node in drivers:
        VHL_pathway.nodes[node]['class'] = 'driver'
    elif node in downstream_effectors:
        VHL_pathway.nodes[node]['class'] = 'effector'
    else:
        VHL_pathway.nodes[node]['class'] = 'other'

nx.write_gml(VHL_pathway, 'VHL_pathway.gml')

###  2.3 Adding influencers whose activity provide a context to driver-centered pathway and may promote or block signal transduction

In [11]:
GEP_df = pd.read_csv('VHL_kidney_GEPs_T.csv', index_col=0)
GEP_df

Unnamed: 0,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11
AURKB,0.000000,38.281752,0.000000,0.000000,0.000000,0.000000,0.000000,8.898212,0.000000,0.000000,0.000000
CEP55,0.672203,26.893208,0.677079,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
OTUD7A,5.321076,0.000000,8.688075,10.805916,0.000000,1.660756,9.393560,5.584060,0.000000,0.000000,1.177817
BCL2L11,12.018525,1.725301,7.283811,6.630207,0.000000,0.607753,1.196198,9.133753,0.000000,0.000000,0.092324
NOTCH3,17.635961,4.963044,24.318759,0.000000,2.144305,0.000000,19.088086,64.000079,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
PLK2,4.599441,0.882284,0.439337,0.000000,0.000000,0.076025,1.825165,2.381278,2.980261,0.677752,0.492411
SRSF7,33.119311,3.081757,6.812608,14.473799,0.717447,0.000000,1.110784,31.989432,0.000000,0.000000,0.000000
SIK1,2.595102,0.490211,1.436016,0.212519,0.000000,2.888544,0.602722,4.709764,0.000000,0.000000,0.095214
SCO2,2.376953,0.000000,0.886670,0.526543,0.193319,0.033534,0.367036,0.475974,0.000000,0.000000,0.000000


In [33]:
sorted_GEP_df = GEP_df.sort_values(by="P9", ascending=False)
influencers = list(sorted_GEP_df.index)[:10] # modifying this number to include more influencers
influencers

['PLCB1',
 'MAML2',
 'HDAC9',
 'LPHN3',
 'DOCK4',
 'SEMA3A',
 'LEF1',
 'WT1',
 'MAP3K1',
 'RPL27']

### 2.4 Connecting influencer with downstream effectors 

In [34]:
all_path_members = []

for node1 in influencers:
    for node2 in downstream_effectors:
        try:
            paths = list(nx.all_shortest_paths(directed_signaling_pathways, node1, node2))
        except:
            print('There is no path from {0} to {1}'.format(node1, node2))
            continue
        
        path_members = []
        for p in paths:
            path_members = path_members + p
            
        all_path_members = all_path_members + path_members
    
all_path_members = list(set(all_path_members))
len(all_path_members)

There is no path from PLCB1 to SLC2A1
There is no path from PLCB1 to VEGFA
There is no path from PLCB1 to PDGFB
There is no path from PLCB1 to TGFA
There is no path from PLCB1 to CCND1
There is no path from LPHN3 to SLC2A1
There is no path from LPHN3 to VEGFA
There is no path from LPHN3 to PDGFB
There is no path from LPHN3 to TGFA
There is no path from LPHN3 to CCND1
There is no path from SEMA3A to SLC2A1
There is no path from SEMA3A to VEGFA
There is no path from SEMA3A to PDGFB
There is no path from SEMA3A to TGFA
There is no path from SEMA3A to CCND1
There is no path from WT1 to SLC2A1
There is no path from WT1 to VEGFA
There is no path from WT1 to PDGFB
There is no path from WT1 to TGFA
There is no path from WT1 to CCND1
There is no path from RPL27 to SLC2A1
There is no path from RPL27 to VEGFA
There is no path from RPL27 to PDGFB
There is no path from RPL27 to TGFA
There is no path from RPL27 to CCND1


106

In [35]:
VHL_pathway_plus = directed_signaling_pathways.subgraph(all_path_members).copy()
for node1, node2 in VHL_pathway.edges():
    VHL_pathway_plus.add_edge(node1, node2, effect=VHL_pathway.edges[node1, node2]['effect'])


for node in VHL_pathway_plus.nodes():
    if node in drivers:
        VHL_pathway_plus.nodes[node]['class'] = 'driver'
    elif node in downstream_effectors:
        VHL_pathway_plus.nodes[node]['class'] = 'effector'
    elif node in influencers:
        VHL_pathway_plus.nodes[node]['class'] = 'influencer'
    else:
        VHL_pathway_plus.nodes[node]['class'] = 'other'

nx.write_gml(VHL_pathway_plus, 'VHL_pathway_plus.gml')