In [1]:
import pandas as pd
import networkx as nx

In [2]:
# read union dataset
union_df = pd.read_csv('results/union.csv', index_col=0)
union_df.head()

Unnamed: 0,interactor A gene symbol,interactor B gene symbol,interactor A Uniprot AC,interactor B Uniprot AC,database source
0,TCAP,TTN,O15273,Q8WZ42,IID
1,TCAP,CSRP3,O15273,P50461,IID
2,TCAP,ENO3,O15273,P13929,IID
3,TCAP,ENO1,O15273,P06733,IID
4,TCAP,MYOZ1,O15273,Q9NP98,IID


In [3]:
intersection_df = pd.read_csv('results/intersection.csv', index_col=0)

In [8]:
sgi_df = pd.read_csv('results/sgi.csv', index_col=0)

In [5]:
def global_measures(df, type_net):
    # undirected graph object
    graph = nx.from_pandas_edgelist(df, source = 'interactor A gene symbol', target='interactor B gene symbol')
    # check number of nodes
    if graph.number_of_nodes() >20:
        n_nodes = graph.number_of_nodes()
        # not consider the duplicates (union can have same edges with different sources: they will be considered just one time)
        n_edges = graph.number_of_edges()
        # number of connected components
        conn_components = nx.number_connected_components(graph)
        # number of isolates
        n_isolates = nx.number_of_isolates(graph)
        # total number of edges
        size_graph = graph.size()
        # average degree
        avg_degree = size_graph/n_nodes
        # avg clustering coefficient
        avg_cluster_coeff = nx.average_clustering(graph)
        
        print('%s'%type_net,'network has: \n')
        print(n_nodes, 'nodes')
        print(n_edges, 'edges')
        print(conn_components,'connected components')
        print(n_isolates, 'isolated nodes')
        print('average degree = ', avg_degree)
        print('average clustering coefficient = ', avg_cluster_coeff)
        
        # if graph is connected
        if conn_components ==1:
            
            # average shprtest path length
            avg_path = nx.average_shortest_path_length(graph)
            # diameter 
            diameter = nx.diameter(graph)
            # radius 
            radius = nx.radius(graph)
        
            print('shortest path length = ', avg_path)
            print('diameter = ', diameter)
            print('radius = ', radius)
            
        # if graph not connected  
        else:
            ll=[]
            #for each connected component computes the properties
            c = 1
            for g in nx.connected_component_subgraphs(graph): 
                print('Connected Component',c)
                print('average Shortest Path: ', nx.average_shortest_path_length(g))
                print('diameter', nx.diameter(g))
                print('radius', nx.radius(g))
                c +=1
    else:
        print('%s'%type_net,'network do not have a number of edges bigger than 20')

In [6]:
global_measures(union_df, 'Union')

Union network has: 

5513 nodes
10329 edges
2 connected components
0 isolated nodes
average degree =  1.8735715581353165
average clustering coefficient =  0.07321978067098309
Connected Component 1
average Shortest Path:  3.4924265343579193
diameter 7
radius 4
Connected Component 2
average Shortest Path:  1.5
diameter 2
radius 1


In [7]:
global_measures(intersection_df, 'Intersection')

Intersection network has: 

57 nodes
89 edges
10 connected components
0 isolated nodes
average degree =  1.5614035087719298
average clustering coefficient =  0.09359509885825675
Connected Component 1
average Shortest Path:  3.6659619450317127
diameter 9
radius 5
Connected Component 2
average Shortest Path:  0
diameter 0
radius 0
Connected Component 3
average Shortest Path:  0
diameter 0
radius 0
Connected Component 4
average Shortest Path:  1.0
diameter 1
radius 1
Connected Component 5
average Shortest Path:  1.3333333333333333
diameter 2
radius 1
Connected Component 6
average Shortest Path:  0
diameter 0
radius 0
Connected Component 7
average Shortest Path:  1.0
diameter 1
radius 1
Connected Component 8
average Shortest Path:  0
diameter 0
radius 0
Connected Component 9
average Shortest Path:  0
diameter 0
radius 0
Connected Component 10
average Shortest Path:  0
diameter 0
radius 0


In [9]:
global_measures(sgi_df, 'SGI')

SGI network has: 

74 nodes
176 edges
13 connected components
0 isolated nodes
average degree =  2.3783783783783785
average clustering coefficient =  0.15667418167418168
Connected Component 1
average Shortest Path:  3.23879781420765
diameter 7
radius 4
Connected Component 2
average Shortest Path:  0
diameter 0
radius 0
Connected Component 3
average Shortest Path:  0
diameter 0
radius 0
Connected Component 4
average Shortest Path:  0
diameter 0
radius 0
Connected Component 5
average Shortest Path:  0
diameter 0
radius 0
Connected Component 6
average Shortest Path:  1.0
diameter 1
radius 1
Connected Component 7
average Shortest Path:  0
diameter 0
radius 0
Connected Component 8
average Shortest Path:  0
diameter 0
radius 0
Connected Component 9
average Shortest Path:  0
diameter 0
radius 0
Connected Component 10
average Shortest Path:  0
diameter 0
radius 0
Connected Component 11
average Shortest Path:  0
diameter 0
radius 0
Connected Component 12
average Shortest Path:  0
diameter 0
rad