In [2]:
#Munik Shrestha
#May 8th 2018

import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd

import networkx as nx 
import igraph as ig

In [3]:
def xlogx(x):
    if x==0:
        return 0
    else:
        return(x*np.log(x))
    
def vonEntropy(listL):
    sumZ=sum(listL) #normalize
    if sumZ==0:
        return 0
    listL=[x/sumZ for x in listL] # normalized
    toReturn=0
    for sValue in listL: 
        toReturn-= xlogx(sValue)
    return(round(toReturn/np.log((len(listL))),4))

def gini(array): #imported from: https://github.com/oliviaguest/gini
    """Calculate the Gini coefficient of a numpy array."""
    # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
    # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    array = array.flatten() #all values are treated equally, arrays must be 1d
    if np.amin(array) < 0:
        array -= np.amin(array) #values cannot be negative
    array += 0.0000001 #values cannot be 0
    array = np.sort(array) #values must be sorted
    index = np.arange(1,array.shape[0]+1) #index per array element
    n = array.shape[0]#number of array elements
    toReturn=((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient
    return(round(toReturn,3))

In [4]:
homeFolder=''
# directed and weighted in networkx
g=nx.read_gml(homeFolder+'/nicu-analysis/data/graph-nicu.gml',destringizer=int) 
pageRank=nx.pagerank(g,weight='weight')   
betwRank=nx.betweenness_centrality(g,weight='weight')
# igraph
# 
g_Igraph=ig.Graph.Read_GML(homeFolder+'/nicu-analysis/data/nicuUGW.gml') 
#for community partition in i-graph
comm=g_Igraph.community_multilevel(weights=g_Igraph.es["weight"],return_levels=True)
nLevel=len(comm)

In [5]:
community={} # community[i] gives the community partition in dictionary
for level in range(len(comm)):
    community[level]={int(g_Igraph.vs[i]['label']): comm[level].membership[i] for i in range(g_Igraph.vcount())}
    label='level'+str(level)
    #community=pd.Series(commAtLevel,name=label)

community=pd.DataFrame(community)
community.index.name = 'nodeID'
#community.head()

In [6]:
firstLevel=0
lastLevel=nLevel-1
groupBy=lastLevel

partionHere=community.groupby(groupBy) # grouping data by community
sizeOfGroup={}

#from normalized singular values
giniBygroup={}
vonEntropyByGroup={}

#from page rank
#meanPageRank={}
#varPageRank={}
vonPageRank={}

#from betweeness 
#meanBweenNess={}
#varBweenNess={}
vonBweenNess={}

#theil index
#thielPageRank={}
#thielBweenNess={}

#flow hierarchy networkx
flowHierarchy={}

for group in partionHere.groups:
    nodesHere=list(partionHere.get_group(group).index)
    subG=nx.subgraph(g,nodesHere)
        
    N=subG.number_of_nodes()
    sizeOfGroup[group]=N
    
    if N <1:
        continue 
    
    A=nx.to_numpy_matrix(subG)
    u,s,v = np.linalg.svd(A)
    
    pRankListHere=[pageRank[i] for i in nodesHere]
    bWeenListHere=[betwRank[i] for i in nodesHere]
    
    meanPageRank[group]=round(np.mean(pRankListHere),3) 
    varPageRank[group]=round(np.var(pRankListHere),3) 
    vonPageRank[group]=vonEntropy(pRankListHere)
    
    meanBweenNess[group]=round(np.mean(bWeenListHere),3)  
    varBweenNess[group]=round(np.var(bWeenListHere),3)  
    vonBweenNess[group]=vonEntropy(bWeenListHere)

    #thielPageRank[group]=1-vonPageRank[group]
    #thielBweenNess[group]=1-vonBweenNess[group]
        
    if subG.number_of_edges()>0: 
        giniBygroup[group]=gini(s)
        vonEntropyByGroup[group]=vonEntropy(s)
        flowHierarchy[group]=nx.flow_hierarchy(subG)
        