In [1]:
import warnings
warnings.filterwarnings('ignore')
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random as rd
import pickle
import statistics as st

In [2]:
networks={}
networks['HINT'] = nx.read_edgelist('2 - output/HINT.edgelist',delimiter='\t')
networks['IntAct'] = nx.read_edgelist('2 - output/IntAct.edgelist',delimiter='\t')
networks['Reactome'] = nx.read_edgelist('2 - output/Reactome.edgelist',delimiter='\t')
networks['STRING'] = nx.read_edgelist('2 - output/STRING.edgelist',delimiter='\t')

In [3]:
for name,G in networks.items():
    print(name,G)

HINT Graph with 14763 nodes and 114588 edges
IntAct Graph with 13268 nodes and 90446 edges
Reactome Graph with 11873 nodes and 228447 edges
STRING Graph with 16582 nodes and 252801 edges


## Centrality Function

In [13]:
def averageNeighborDegree(G):
    nodes = list(G.nodes)
    degrees = G.degree
    avgDegree = st.mean([t[1] for t in degrees])
    averageNeighborDegree ={}
    for node in nodes:
        neighborsDegree=0
        if(len(list(G.neighbors(node)))==0):
            averageNeighborDegree[node] = 0
        else:
            for n in G.neighbors(node):
                neighborsDegree += degrees[n]

            averageNeighborDegree[node] = neighborsDegree/len(list(G.neighbors(node)))

    return averageNeighborDegree

def get_df_averageNeighbor_centrality(network, output_file_name):
    centrality = averageNeighborDegree(network)
    df = pd.DataFrame([centrality]).T
    df.columns=["AverageNeighbor"]
    df.index.name = 'nodeName'
    df.to_csv(output_file_name, sep="\t", index=True)   
    
    
def get_df_degree_centrality(network, output_file_name):
    centrality = dict(nx.degree(network))
    degree_df = pd.DataFrame([centrality]).T
    degree_df.columns=["Degree"]
    degree_df.index.name = 'nodeName'
    degree_df.to_csv(output_file_name, sep="\t", index=True)

    
def get_df_clustering_coefficient(network, output_file_name):
    cc = {}
    for nodeName in network.nodes():
        cc[nodeName] = nx.clustering(network, nodeName)
    cc_df = pd.DataFrame([cc]).T
    cc_df.columns=["Clusteringcoeff"]
    cc_df.index.name = 'nodeName'
    cc_df.to_csv(output_file_name, sep="\t", index=True)
    
    
weight='weightInverse'
#weight='weight'
#weight=1
def get_df_pagerank_centrality(network, output_file_name):
    centrality = dict(nx.pagerank(network,weight=weight))  
    degree_df = pd.DataFrame([centrality]).T
    degree_df.columns=["Pagerank"]
    degree_df.index.name = 'nodeName'
    degree_df.to_csv(output_file_name, sep="\t", index=True)
    
def get_df_eigenvector_centrality(network, output_file_name):    
    centrality = dict(nx.eigenvector_centrality(network,weight=weight)) 
    degree_df = pd.DataFrame([centrality]).T
    degree_df.columns=["Eigenvector"]
    degree_df.index.name = 'nodeName'
    degree_df.to_csv(output_file_name, sep="\t", index=True)



In [14]:
%%time
for name in networks:
    network = networks[name]
    #print(name)
    get_df_degree_centrality(network, "3 - output/degree_"+name+".tsv")
    get_df_averageNeighbor_centrality(network, "3 - output/averageNeighbor_"+name+".tsv")
    get_df_clustering_coefficient(network, "3 - output/clusteringcoeff_"+name+".tsv")
    get_df_pagerank_centrality(network, "3 - output/pagerank_"+name+".tsv")
    get_df_eigenvector_centrality(network, "3 - output/eigenvector_"+name+".tsv")
    
    resume=pd.read_csv("3 - output/degree_"+name+".tsv",sep='\t')
    resume['AvgNeighbors']=pd.read_csv("3 - output/averageNeighbor_"+name+".tsv",sep='\t')['AverageNeighbor']
    resume['Clustering']=pd.read_csv("3 - output/clusteringcoeff_"+name+".tsv",sep='\t')['Clusteringcoeff']
    resume['PageRank']=pd.read_csv("3 - output/pagerank_"+name+".tsv",sep='\t')['Pagerank']
    resume['Eigenvector']=pd.read_csv("3 - output/eigenvector_"+name+".tsv",sep='\t')['Eigenvector']
    resume.to_csv('3 - output/resume_'+name+'.tsv', sep="\t", index=True)
    display(resume[['Degree','PageRank','Eigenvector']].corr())

Unnamed: 0,Degree,PageRank,Eigenvector
Degree,1.0,0.983299,0.821228
PageRank,0.983299,1.0,0.742443
Eigenvector,0.821228,0.742443,1.0


Unnamed: 0,Degree,PageRank,Eigenvector
Degree,1.0,0.979549,0.820481
PageRank,0.979549,1.0,0.739588
Eigenvector,0.820481,0.739588,1.0


Unnamed: 0,Degree,PageRank,Eigenvector
Degree,1.0,0.884938,0.487426
PageRank,0.884938,1.0,0.24521
Eigenvector,0.487426,0.24521,1.0


Unnamed: 0,Degree,PageRank,Eigenvector
Degree,1.0,0.924391,0.572829
PageRank,0.924391,1.0,0.319037
Eigenvector,0.572829,0.319037,1.0


CPU times: total: 1min 33s
Wall time: 1min 33s
