# Read Dataset

In [1]:
import networkx as nx
import numpy as np
import operator

In [2]:
def create_list(filename):
    element_list = []
    with open(filename) as f:
        for line in f:
            line_data = line.split(',') 
            element_list.append(line_data[0])  
    return element_list

x = create_list(r"ca-GrQc.txt")
x[:10]

['19 1\n',
 '1751 1\n',
 '2408 1\n',
 '3078 1\n',
 '3609 1\n',
 '3958 1\n',
 '612 2\n',
 '2405 2\n',
 '3096 2\n',
 '3696 2\n']

In [3]:
def create_edgelist_from_list(x):
    ggg = nx.Graph()
    for i in range(len(x)):
        source = int(x[i].split()[0])
        target = int(x[i].split()[1])
        ggg.add_edge(source, target)
    return ggg

In [4]:
g = create_edgelist_from_list(x)
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 4158
Number of edges: 13422
Average degree:   6.4560


# Check connectedness

In [5]:
nx.is_connected(g)

True

In [6]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Get Macroscopic Characteristics

In [7]:
# Min degree, Max degree (Average degree already gotten)
def degrees(G):
    return [G.degree(u) for u in G]
x = degrees(g)

In [8]:
mink = min(x)
mink

1

In [9]:
def getmax(x_list):
    max = 0
    for i in x_list:
        if i > max:
            max=i
    return max
maxk = getmax(x)
maxk

81

In [10]:
# Topological characteristic = Average path length
nx.average_shortest_path_length(g)

6.049380016182999

In [11]:
# Topological characteristic = Density
nx.density(g)

0.0015530402828935425

In [12]:
# Topological characteristic = Transitivity
nx.transitivity(g)

0.6288944756689877

In [13]:
# Topological characteristic = Assortativity/Degree correlation
nx.degree_assortativity_coefficient(g)

0.6391861406753757

In [14]:
# Topological characteristic = Diameter: the maximum eccecntricity 
# --> eccentricity of node node v is the maximum distance from v to all other nodes in G
nx.diameter(g)

17

In [15]:
# Topological characteristic = Global efficiency
nx.global_efficiency(g)

0.17919243490342168

# Relabel nodes all if nodes are strings
- To be able to run all tests without any inconsistencies

In [16]:
from myTruss import mappingAndRelabeling

In [17]:
g_relabled = mappingAndRelabeling(g)

In [18]:
# Before 
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 4158
Number of edges: 13422
Average degree:   6.4560


In [19]:
# After 
print(nx.info(g_relabled))

Name: 
Type: Graph
Number of nodes: 4158
Number of edges: 13422
Average degree:   6.4560


In [20]:
g = g_relabled

# Get nodes in the key and their neighbors

In [21]:
list = g.nodes()
dict_graph = dict ()  # nodes in the key and their neighbors
for i in list:
    dict_graph[i] = []
for i in list:
    iteri = g.neighbors (i)
    for j in iteri:
        dict_graph[i].append (j)

# Getting Community Structure According to InfoMap

In [22]:
# Using 'community' library to calculate Modularity
# community API: This package implements community detection. 
# Package name is community but refer to python-louvain on pypi
import community
import infomap

In [23]:
im = infomap.Infomap("--two-level")

print("Building Infomap network from a NetworkX graph...")
for source, target in g.edges:
    im.add_link(source, target)

print("Find communities with Infomap...")
im.run()

print(f"Found {im.num_top_modules} modules with codelength: {im.codelength}")

communities = im.get_modules()

Building Infomap network from a NetworkX graph...
Find communities with Infomap...
Found 327 modules with codelength: 6.254582707534089


In [24]:
# Topological characteristic 7 = Modularity based on infomap
modularity_value = community.modularity(communities, g)
modularity_value

0.779518773357885

# Rename the communities

In [25]:
from organizeCommunities import *

# Input: dictionary of keys as nodes and values as community number
# Output: dictionary of keys as community number and values as nodes in that community

def flip_nodes_and_communities(dict_nodes_communities):
    # Step 1: initialize communities as keys
    new_dict = {}
    for k, v in dict_nodes_communities.items():
        new_dict[v]=[]
    
    # Step 2: Fill in nodes
    for kk,vv in new_dict.items():
        for k,v in dict_nodes_communities.items():
            if dict_nodes_communities[k] == kk: # If the community number (value) in `best` is the same as new_dict key (key), append the node (key) in `best`
            #print(k,v)
                new_dict[kk].append(k)
    
    return new_dict

In [26]:
# Step 1
infomap_communities = flip_nodes_and_communities(communities)
print(infomap_communities)

{190: [0, 1, 2, 3, 4, 5, 6, 4093, 4140, 4141], 136: [7, 8, 9, 10, 11, 722, 1823, 2860, 3166, 3239, 3975, 3976, 3978], 23: [12, 13, 14, 15, 16, 17, 593, 594, 595, 1289, 1894, 1895, 2329, 2516, 2517, 2519, 2565, 2566, 3247, 3357, 3428, 3711, 3791], 98: [18, 19, 20, 21, 22, 1872, 1873, 1874, 1875, 1876, 2630, 2752, 2753, 2754, 4076], 191: [23, 24, 992, 993, 1545, 1546, 2134, 2135, 2136, 2650, 3432, 3727, 4007], 1: [25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 670, 706, 708, 709, 711, 1315, 1316, 1318, 1320, 1321, 1505, 1613, 1614, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1858, 1859, 1860, 2061, 2062, 2150, 2347, 2397, 2399, 2400, 2484, 2512, 2712, 3031, 3032, 3140, 3631, 3776, 3816, 4041], 19: [28, 57, 58, 667, 686, 806, 807, 808, 810, 828, 829, 941, 946, 1044, 1045, 1046, 1047, 1048, 1251, 3105, 3106, 3201], 51: [35, 308, 656, 1451, 1452, 1453, 1

In [27]:
# Step 2
infomap_communities_organized = orderCommunities(infomap_communities)
print(infomap_communities_organized)

{0: [0, 1, 2, 3, 4, 5, 6, 4093, 4140, 4141], 1: [7, 8, 9, 10, 11, 722, 1823, 2860, 3166, 3239, 3975, 3976, 3978], 2: [12, 13, 14, 15, 16, 17, 593, 594, 595, 1289, 1894, 1895, 2329, 2516, 2517, 2519, 2565, 2566, 3247, 3357, 3428, 3711, 3791], 3: [18, 19, 20, 21, 22, 1872, 1873, 1874, 1875, 1876, 2630, 2752, 2753, 2754, 4076], 4: [23, 24, 992, 993, 1545, 1546, 2134, 2135, 2136, 2650, 3432, 3727, 4007], 5: [25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 670, 706, 708, 709, 711, 1315, 1316, 1318, 1320, 1321, 1505, 1613, 1614, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1858, 1859, 1860, 2061, 2062, 2150, 2347, 2397, 2399, 2400, 2484, 2512, 2712, 3031, 3032, 3140, 3631, 3776, 3816, 4041], 6: [28, 57, 58, 667, 686, 806, 807, 808, 810, 828, 829, 941, 946, 1044, 1045, 1046, 1047, 1048, 1251, 3105, 3106, 3201], 7: [35, 308, 656, 1451, 1452, 1453, 1454, 1455,

In [28]:
infomap_communities_organized[0]

[0, 1, 2, 3, 4, 5, 6, 4093, 4140, 4141]

In [29]:
communityInfo(infomap_communities, infomap_communities_organized)

Number of partitions:  327
Number of nodes in the communities detected:  4158
Number of repetitions:  0




In [30]:
# Take the raw communities outputted by infomap, will be used with `dict_internal_communities_and_sizes`
communities_flipped = flip_nodes_and_communities(communities)
print(communities_flipped)

{190: [0, 1, 2, 3, 4, 5, 6, 4093, 4140, 4141], 136: [7, 8, 9, 10, 11, 722, 1823, 2860, 3166, 3239, 3975, 3976, 3978], 23: [12, 13, 14, 15, 16, 17, 593, 594, 595, 1289, 1894, 1895, 2329, 2516, 2517, 2519, 2565, 2566, 3247, 3357, 3428, 3711, 3791], 98: [18, 19, 20, 21, 22, 1872, 1873, 1874, 1875, 1876, 2630, 2752, 2753, 2754, 4076], 191: [23, 24, 992, 993, 1545, 1546, 2134, 2135, 2136, 2650, 3432, 3727, 4007], 1: [25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 670, 706, 708, 709, 711, 1315, 1316, 1318, 1320, 1321, 1505, 1613, 1614, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1858, 1859, 1860, 2061, 2062, 2150, 2347, 2397, 2399, 2400, 2484, 2512, 2712, 3031, 3032, 3140, 3631, 3776, 3816, 4041], 19: [28, 57, 58, 667, 686, 806, 807, 808, 810, 828, 829, 941, 946, 1044, 1045, 1046, 1047, 1048, 1251, 3105, 3106, 3201], 51: [35, 308, 656, 1451, 1452, 1453, 1

# Get the size of each community

In [31]:
community_size_dict = {}
for i in communities_flipped:
    community_size_dict[i]=len(communities_flipped[i])

In [32]:
community_size_dict

{190: 10,
 136: 13,
 23: 23,
 98: 15,
 191: 13,
 1: 87,
 19: 22,
 51: 25,
 48: 23,
 206: 7,
 85: 16,
 24: 23,
 125: 16,
 151: 12,
 137: 9,
 183: 8,
 273: 7,
 29: 19,
 153: 12,
 119: 17,
 131: 12,
 56: 22,
 63: 23,
 15: 41,
 2: 46,
 135: 14,
 90: 17,
 291: 6,
 195: 10,
 66: 17,
 58: 19,
 57: 20,
 101: 17,
 20: 28,
 127: 11,
 42: 18,
 178: 8,
 53: 20,
 88: 19,
 208: 9,
 18: 37,
 162: 9,
 311: 4,
 36: 26,
 209: 9,
 37: 23,
 114: 11,
 11: 41,
 61: 23,
 9: 17,
 6: 26,
 286: 5,
 4: 34,
 28: 19,
 64: 10,
 112: 15,
 251: 7,
 141: 12,
 13: 20,
 41: 15,
 124: 13,
 47: 19,
 104: 18,
 199: 7,
 49: 10,
 75: 12,
 79: 15,
 73: 17,
 128: 11,
 211: 6,
 31: 15,
 59: 14,
 227: 8,
 180: 12,
 72: 22,
 117: 15,
 198: 10,
 97: 20,
 283: 5,
 164: 11,
 21: 33,
 193: 7,
 62: 20,
 38: 25,
 87: 19,
 270: 7,
 168: 14,
 298: 5,
 46: 17,
 174: 13,
 197: 7,
 157: 13,
 92: 17,
 7: 22,
 68: 15,
 224: 9,
 14: 19,
 139: 17,
 34: 13,
 186: 7,
 100: 15,
 243: 6,
 219: 9,
 126: 10,
 158: 13,
 295: 4,
 44: 22,
 110: 16,
 254

# Get community number, min size, max size

In [33]:
# community_size_dict: dictionary where key = community number and value = size of the community

In [34]:
len(community_size_dict)

327

In [35]:
maximum = max(community_size_dict.values())
maximum

87

In [36]:
minimum = min(community_size_dict.values())
minimum

3

# Get community graphs
Step 1: Get `dict_internal_communities_and_sizes` <br>
Step 2:  import  `get_community_graph_and_dict()`, and grant it as input `dict_internal_communities_and_sizes` and `communites_flipped` <br>

Note that the function will return 2 outputs: 
Outputs:
1. graph_community_dict: dict of subgraphs for each community
2. graph_community_dict_dict: dict of dicts where the each community as key has a dict as value. The dict as value has key as nodes and vlaue as neighbors of nodes

In [37]:
from communityUtilitiesMe import get_community_graph

In [38]:
# Step 1
dict_internal_communities_and_sizes = {} # node: tup(internal community, size)

for i in communities: # for each node
    community = communities[i] # get its community and put it in a variable
    dict_internal_communities_and_sizes[i]=[]
    for j in dict_graph[i]: # get neighbors of node i 
        if communities[j] == community: # check if the communities of the neighbors are the SAME as node i
            tup = ()
            tup = (communities[j], community_size_dict[communities[j]], j) # internal community, its size, the neighbor of node i in that external community
            dict_internal_communities_and_sizes[i].append(tup)

dict_internal_communities_and_sizes # node: [(community #, size of community, neighbor #),(community #, size of community, neighbor #)... ]

{0: [(190, 10, 1), (190, 10, 2), (190, 10, 3)],
 1: [(190, 10, 0),
  (190, 10, 2),
  (190, 10, 3),
  (190, 10, 4),
  (190, 10, 5),
  (190, 10, 6)],
 2: [(190, 10, 0), (190, 10, 1), (190, 10, 3)],
 3: [(190, 10, 0), (190, 10, 1), (190, 10, 2), (190, 10, 4)],
 4: [(190, 10, 1), (190, 10, 3), (190, 10, 4093)],
 5: [(190, 10, 1), (190, 10, 6), (190, 10, 4140), (190, 10, 4141)],
 6: [(190, 10, 1), (190, 10, 5), (190, 10, 4140), (190, 10, 4141)],
 7: [(136, 13, 8), (136, 13, 10), (136, 13, 11)],
 8: [(136, 13, 7), (136, 13, 9), (136, 13, 10), (136, 13, 11)],
 9: [(136, 13, 8)],
 10: [(136, 13, 7),
  (136, 13, 8),
  (136, 13, 722),
  (136, 13, 11),
  (136, 13, 2860)],
 11: [(136, 13, 7), (136, 13, 8), (136, 13, 10)],
 12: [(23, 23, 13), (23, 23, 14), (23, 23, 15), (23, 23, 16), (23, 23, 17)],
 13: [(23, 23, 12), (23, 23, 14), (23, 23, 15), (23, 23, 16), (23, 23, 17)],
 14: [(23, 23, 12),
  (23, 23, 13),
  (23, 23, 2329),
  (23, 23, 15),
  (23, 23, 2516),
  (23, 23, 16),
  (23, 23, 17),
  (23,

In [39]:
x,y = get_community_graph(dict_internal_communities_and_sizes, communities_flipped)
x

{190: <networkx.classes.graph.Graph at 0x234c8b45320>,
 136: <networkx.classes.graph.Graph at 0x234c8b45358>,
 23: <networkx.classes.graph.Graph at 0x234c8b45d30>,
 98: <networkx.classes.graph.Graph at 0x234c8b45208>,
 191: <networkx.classes.graph.Graph at 0x234c8b45ef0>,
 1: <networkx.classes.graph.Graph at 0x234c8b45908>,
 19: <networkx.classes.graph.Graph at 0x234c872b3c8>,
 51: <networkx.classes.graph.Graph at 0x234c8b45940>,
 48: <networkx.classes.graph.Graph at 0x234c8b45748>,
 206: <networkx.classes.graph.Graph at 0x234c8b45b00>,
 85: <networkx.classes.graph.Graph at 0x234c8b454e0>,
 24: <networkx.classes.graph.Graph at 0x234c8b45eb8>,
 125: <networkx.classes.graph.Graph at 0x234c8b456a0>,
 151: <networkx.classes.graph.Graph at 0x234c8b45c50>,
 137: <networkx.classes.graph.Graph at 0x234c8b45630>,
 183: <networkx.classes.graph.Graph at 0x234c8b451d0>,
 273: <networkx.classes.graph.Graph at 0x234c8b455f8>,
 29: <networkx.classes.graph.Graph at 0x234c8b45cc0>,
 153: <networkx.clas

In [40]:
y

{190: {0: [1, 2, 3],
  1: [0, 2, 3, 4, 5, 6],
  2: [0, 1, 3],
  3: [0, 1, 2, 4],
  4: [1, 3, 4093],
  5: [1, 6, 4140, 4141],
  6: [1, 5, 4140, 4141],
  4093: [4],
  4140: [5, 6, 4141],
  4141: [5, 6, 4140]},
 136: {7: [8, 10, 11],
  8: [7, 9, 10, 11],
  10: [7, 8, 722, 11, 2860],
  11: [7, 8, 10],
  9: [8],
  722: [10, 2860],
  2860: [10, 722, 1823, 3978],
  1823: [3166, 3239, 3975, 3976, 3978, 2860],
  3166: [1823],
  3239: [1823],
  3975: [1823],
  3976: [1823],
  3978: [1823, 2860]},
 23: {12: [13, 14, 15, 16, 17],
  13: [12, 14, 15, 16, 17],
  14: [12, 13, 2329, 15, 2516, 16, 17, 2517, 2519, 1289],
  15: [12, 13, 14, 16, 17, 3711],
  16: [12, 13, 14, 15, 3247, 3791, 17, 2517, 1894, 2519, 1289],
  17: [12, 13, 14, 15, 16, 593, 594, 3247, 3357, 595, 1289],
  2329: [14],
  2516: [14, 1289],
  2517: [14, 16, 1289, 1894, 2565, 2566, 2519],
  2519: [14, 16, 1289, 2517],
  1289: [14, 16, 17, 1895, 3428, 2516, 2517, 1894, 2519],
  3711: [15],
  3247: [16, 17],
  3791: [16],
  1894: [16, 12

In [41]:
print(nx.info(x[1]))

Name: 
Type: Graph
Number of nodes: 87
Number of edges: 1196
Average degree:  27.4943


In [42]:
y[1]

{25: [26,
  75,
  27,
  29,
  30,
  31,
  32,
  33,
  34,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74],
 26: [25,
  27,
  29,
  30,
  31,
  32,
  33,
  34,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74],
 75: [25,
  27,
  29,
  30,
  31,
  32,
  33,
  34,
  36,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  70,
  71,
  72,
  73,
  74],
 27: [25,
  26,
  75,
  29,
  706,
  30,
  31,
  32,
  33,
  34,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  708,
  43,
  44,
  45,
  46,
  47,
  49,
  50,
  709,
  51,
  52,
  53,
  54,


# Metric 1 - Internal Degree

In [43]:
def degrees(G):
    return [G.degree(u) for u in G]

In [44]:
total_communities = len(communities_flipped)
total_communities

327

- This is for 1 community, but we want the average of all communities to quantify the community structure as a whole

In [45]:
sum_deg = 0
avg = 0
avg_final = 0

for i in x: # for each communtiy
    deg = degrees(x[i])
    sumdeg = 0
    for i in deg:
        sumdeg = sumdeg + i
    avg = sumdeg/len(deg)
    avg_final = avg_final + avg
    avg = 0
    
average_internal_degree = avg_final/total_communities
average_internal_degree

3.7305221105923456

# Metric 2 - Internal Density

In [46]:
sum_dens= 0
avg = 0
avg_final = 0

for i in x: # for each communtiy
    dens = nx.density(x[i])
    sum_dens =  dens + sum_dens
    dens = 0

average_internal_density = sum_dens/total_communities
average_internal_density

0.39473152008950113

# Metric 4 - Maximum-Out Degree Fraction (Max-ODF)

## Get the total degree of each node

In [47]:
def degree_method(g):
    list_nodes = g.nodes ()  # list of nodes
    dict_degree = dict ()  # nodes and their degree centrality

    for i in list_nodes:
        dict_degree[i] = g.degree(i)

    return dict_degree

In [48]:
dict_degree = degree_method(g)
print(dict_degree)

{0: 4, 1: 6, 2: 3, 3: 5, 4: 3, 5: 4, 6: 5, 7: 4, 8: 4, 9: 1, 10: 8, 11: 4, 12: 6, 13: 5, 14: 21, 15: 11, 16: 22, 17: 28, 18: 4, 19: 4, 20: 4, 21: 4, 22: 8, 23: 4, 24: 1, 25: 48, 26: 49, 27: 55, 28: 17, 29: 56, 30: 47, 31: 65, 32: 45, 33: 49, 34: 54, 35: 6, 36: 45, 37: 43, 38: 46, 39: 68, 40: 45, 41: 56, 42: 45, 43: 68, 44: 49, 45: 45, 46: 77, 47: 48, 48: 2, 49: 57, 50: 45, 51: 46, 52: 60, 53: 62, 54: 45, 55: 66, 56: 45, 57: 22, 58: 44, 59: 47, 60: 63, 61: 45, 62: 45, 63: 45, 64: 47, 65: 81, 66: 79, 67: 67, 68: 48, 69: 77, 70: 47, 71: 53, 72: 51, 73: 56, 74: 51, 75: 42, 76: 33, 77: 7, 78: 11, 79: 2, 80: 11, 81: 18, 82: 4, 83: 7, 84: 4, 85: 7, 86: 2, 87: 8, 88: 9, 89: 4, 90: 4, 91: 8, 92: 4, 93: 2, 94: 5, 95: 2, 96: 1, 97: 6, 98: 1, 99: 13, 100: 4, 101: 7, 102: 3, 103: 14, 104: 3, 105: 2, 106: 13, 107: 10, 108: 2, 109: 5, 110: 42, 111: 42, 112: 42, 113: 34, 114: 42, 115: 42, 116: 31, 117: 42, 118: 33, 119: 42, 120: 49, 121: 42, 122: 28, 123: 42, 124: 33, 125: 31, 126: 42, 127: 41, 128: 4

## Get the outer degrees of nodes in `dict_out`

In [49]:
def inout_degree(g,partition):
    # partition is dictionarhy of node: community
    list = g.nodes ()
    dict_graph = dict ()  # nodes in the key and their neighbors
    for i in list:
        dict_graph[i] = []
    for i in list:
        iteri = g.neighbors (i)
        for j in iteri:
            dict_graph[i].append (j)

    dict_in = dict ()
    dict_out = dict ()
    d = degree_method (g)

    for i in list:
        community = partition[i] # get the community of node i 
        nbr_in = 0
        for j in dict_graph[i]: # get the neighbors of node i 
            if partition[j] == community: # check if community of node i is the same as community of neighbor
                nbr_in = nbr_in + 1
        idegree = d[i]
        nbr_out = idegree - nbr_in
        dict_in[i] = nbr_in
        dict_out[i] = nbr_out

    return dict_in,dict_out

In [50]:
in_dict, out_dict = inout_degree(g, communities)
print(in_dict)

{0: 3, 1: 6, 2: 3, 3: 4, 4: 3, 5: 4, 6: 4, 7: 3, 8: 4, 9: 1, 10: 5, 11: 3, 12: 5, 13: 5, 14: 10, 15: 6, 16: 11, 17: 11, 18: 4, 19: 4, 20: 4, 21: 4, 22: 8, 23: 4, 24: 1, 25: 46, 26: 45, 27: 49, 28: 9, 29: 50, 30: 47, 31: 51, 32: 45, 33: 47, 34: 50, 35: 2, 36: 45, 37: 43, 38: 46, 39: 47, 40: 45, 41: 51, 42: 45, 43: 56, 44: 47, 45: 45, 46: 53, 47: 45, 48: 2, 49: 54, 50: 45, 51: 45, 52: 48, 53: 56, 54: 45, 55: 46, 56: 45, 57: 11, 58: 15, 59: 46, 60: 46, 61: 45, 62: 45, 63: 45, 64: 47, 65: 50, 66: 49, 67: 58, 68: 48, 69: 47, 70: 46, 71: 47, 72: 47, 73: 51, 74: 45, 75: 42, 76: 12, 77: 2, 78: 8, 79: 2, 80: 6, 81: 4, 82: 3, 83: 4, 84: 4, 85: 5, 86: 1, 87: 6, 88: 3, 89: 4, 90: 4, 91: 4, 92: 4, 93: 2, 94: 4, 95: 2, 96: 1, 97: 5, 98: 1, 99: 11, 100: 3, 101: 5, 102: 3, 103: 6, 104: 3, 105: 2, 106: 9, 107: 7, 108: 1, 109: 3, 110: 42, 111: 42, 112: 42, 113: 31, 114: 42, 115: 42, 116: 31, 117: 42, 118: 33, 119: 42, 120: 44, 121: 42, 122: 25, 123: 42, 124: 33, 125: 31, 126: 42, 127: 41, 128: 42, 129: 

In [51]:
print(out_dict)

{0: 1, 1: 0, 2: 0, 3: 1, 4: 0, 5: 0, 6: 1, 7: 1, 8: 0, 9: 0, 10: 3, 11: 1, 12: 1, 13: 0, 14: 11, 15: 5, 16: 11, 17: 17, 18: 0, 19: 0, 20: 0, 21: 0, 22: 0, 23: 0, 24: 0, 25: 2, 26: 4, 27: 6, 28: 8, 29: 6, 30: 0, 31: 14, 32: 0, 33: 2, 34: 4, 35: 4, 36: 0, 37: 0, 38: 0, 39: 21, 40: 0, 41: 5, 42: 0, 43: 12, 44: 2, 45: 0, 46: 24, 47: 3, 48: 0, 49: 3, 50: 0, 51: 1, 52: 12, 53: 6, 54: 0, 55: 20, 56: 0, 57: 11, 58: 29, 59: 1, 60: 17, 61: 0, 62: 0, 63: 0, 64: 0, 65: 31, 66: 30, 67: 9, 68: 0, 69: 30, 70: 1, 71: 6, 72: 4, 73: 5, 74: 6, 75: 0, 76: 21, 77: 5, 78: 3, 79: 0, 80: 5, 81: 14, 82: 1, 83: 3, 84: 0, 85: 2, 86: 1, 87: 2, 88: 6, 89: 0, 90: 0, 91: 4, 92: 0, 93: 0, 94: 1, 95: 0, 96: 0, 97: 1, 98: 0, 99: 2, 100: 1, 101: 2, 102: 0, 103: 8, 104: 0, 105: 0, 106: 4, 107: 3, 108: 1, 109: 2, 110: 0, 111: 0, 112: 0, 113: 3, 114: 0, 115: 0, 116: 0, 117: 0, 118: 0, 119: 0, 120: 5, 121: 0, 122: 3, 123: 0, 124: 0, 125: 0, 126: 0, 127: 0, 128: 1, 129: 0, 130: 0, 131: 0, 132: 0, 133: 0, 134: 15, 135: 0, 136

## For each community, get the maximum node that has the outer degree in it

In [52]:
max_odf_dict = {}
for i in x:
    max_odf_dict[i]=[]
max_odf_dict

{190: [],
 136: [],
 23: [],
 98: [],
 191: [],
 1: [],
 19: [],
 51: [],
 48: [],
 206: [],
 85: [],
 24: [],
 125: [],
 151: [],
 137: [],
 183: [],
 273: [],
 29: [],
 153: [],
 119: [],
 131: [],
 56: [],
 63: [],
 15: [],
 2: [],
 135: [],
 90: [],
 291: [],
 195: [],
 66: [],
 58: [],
 57: [],
 101: [],
 20: [],
 127: [],
 42: [],
 178: [],
 53: [],
 88: [],
 208: [],
 18: [],
 162: [],
 311: [],
 36: [],
 209: [],
 37: [],
 114: [],
 11: [],
 61: [],
 9: [],
 6: [],
 286: [],
 4: [],
 28: [],
 64: [],
 112: [],
 251: [],
 141: [],
 13: [],
 41: [],
 124: [],
 47: [],
 104: [],
 199: [],
 49: [],
 75: [],
 79: [],
 73: [],
 128: [],
 211: [],
 31: [],
 59: [],
 227: [],
 180: [],
 72: [],
 117: [],
 198: [],
 97: [],
 283: [],
 164: [],
 21: [],
 193: [],
 62: [],
 38: [],
 87: [],
 270: [],
 168: [],
 298: [],
 46: [],
 174: [],
 197: [],
 157: [],
 92: [],
 7: [],
 68: [],
 224: [],
 14: [],
 139: [],
 34: [],
 186: [],
 100: [],
 243: [],
 219: [],
 126: [],
 158: [],
 295: []

In [53]:
for i in x: # for each communtiy
    max_temp = 0  # initialize max_temp
    max_node = -1 # initialize max_node
    for node in x[i]: # for each node in that community 
        #print("Community, node, value:", i, max_node, max_temp)
        
        if max_temp < out_dict[node]: #if there's a node with a greater value of out degrees, replace max_temp and max_node
            max_temp = out_dict[node]
            max_node = node
    
        tup = (max_node, max_temp)
        
    max_odf_dict[i].append(tup) # community number: tuple(max_node, max_outdegree value)
    #print(max_odf_dict)

In [54]:
max_odf_dict

{190: [(0, 1)],
 136: [(2860, 7)],
 23: [(17, 17)],
 98: [(2753, 5)],
 191: [(2650, 3)],
 1: [(65, 31)],
 19: [(58, 29)],
 51: [(308, 8)],
 48: [(76, 21)],
 206: [(77, 5)],
 85: [(78, 3)],
 24: [(81, 14)],
 125: [(716, 4)],
 151: [(1388, 3)],
 137: [(1956, 7)],
 183: [(1319, 6)],
 273: [(94, 1)],
 29: [(1627, 4)],
 153: [(499, 4)],
 119: [(2618, 3)],
 131: [(103, 8)],
 56: [(3072, 7)],
 63: [(107, 3)],
 15: [(1595, 15)],
 2: [(134, 15)],
 135: [(154, 1)],
 90: [(156, 4)],
 291: [(159, 1)],
 195: [(170, 3)],
 66: [(174, 7)],
 58: [(179, 11)],
 57: [(294, 4)],
 101: [(693, 10)],
 20: [(195, 16)],
 127: [(2297, 6)],
 42: [(1226, 31)],
 178: [(191, 5)],
 53: [(192, 16)],
 88: [(1465, 7)],
 208: [(200, 1)],
 18: [(1059, 7)],
 162: [(2071, 7)],
 311: [(211, 1)],
 36: [(213, 5)],
 209: [(216, 6)],
 37: [(219, 18)],
 114: [(223, 10)],
 11: [(564, 8)],
 61: [(877, 5)],
 9: [(239, 2)],
 6: [(250, 17)],
 286: [(3232, 3)],
 4: [(1531, 8)],
 28: [(260, 20)],
 64: [(261, 6)],
 112: [(265, 13)],
 251

## Get the final value by dividing over max_k of the node, and then average over all communities

In [55]:
# Sum all odf-normalized and divide them by the total number of communities
sum_odf_normalized= 0


for i in max_odf_dict: # for each community
    #print(max_odf_dict[i][0])
    node_in_max_odf = max_odf_dict[i][0][0]
    value_in_max_odf = max_odf_dict[i][0][1]
    sum_odf_normalized = sum_odf_normalized + value_in_max_odf/dict_degree[node_in_max_odf]


average_max_odf_normalized = sum_odf_normalized/total_communities
average_max_odf_normalized

0.46190040407789046

- Interpretation: On average, the nodes in their communities have ~46% their links outside their communities

# Metric 5: Average-Out Degree Fraction (Average-ODF)

## Get the total degree of each node

In [56]:
# print(dict_degree) # Already gotten

## Get the outer degrees of nodes in `dict_out`

In [57]:
# print(dict_degree) # Already gotten

## For each community, for each node in that community, get its inter edges / total edges then sum them up to divide them by the total community size in `community_size_dict`

In [58]:
average_odf_dict = {}
for i in x:
    average_odf_dict[i]=0

In [59]:
for i in x: # for each communtiy
    sum_odf_normalized_in_one_community = 0
    for node in x[i]: # for each node in that community
        #print("Node:", node)
        #print("Community: ", i)
        #print("Outer degree", out_dict[node])
        #print("Total degree", dict_degree[node])
        #print("-----------------------")
        sum_odf_normalized_in_one_community = sum_odf_normalized_in_one_community + (out_dict[node]/dict_degree[node])
        #print()
    average_odf_dict[i] = sum_odf_normalized_in_one_community/community_size_dict[i]
    

In [60]:
average_odf_dict

{190: 0.065,
 136: 0.20087412587412587,
 23: 0.19936006022962544,
 98: 0.10555555555555556,
 191: 0.06373626373626373,
 1: 0.08375696002417296,
 19: 0.22855260826517512,
 51: 0.15611057054400707,
 48: 0.16286467156032372,
 206: 0.4669758812615955,
 85: 0.047494172494172496,
 24: 0.23643892339544512,
 125: 0.13824404761904763,
 151: 0.19146825396825395,
 137: 0.3264550264550265,
 183: 0.3208333333333333,
 273: 0.028571428571428574,
 29: 0.013533834586466164,
 153: 0.16105769230769232,
 119: 0.18089851325145445,
 131: 0.19226190476190474,
 56: 0.16586191586191587,
 63: 0.10445134575569358,
 15: 0.11050303304890634,
 2: 0.03785762319776232,
 135: 0.08988095238095238,
 90: 0.2237556561085973,
 291: 0.11666666666666665,
 195: 0.05416666666666666,
 66: 0.2275326797385621,
 58: 0.198582995951417,
 57: 0.08407212885154061,
 101: 0.2889081506728565,
 20: 0.243571697453685,
 127: 0.23520923520923517,
 42: 0.21022927689594356,
 178: 0.41458333333333336,
 53: 0.15835164835164833,
 88: 0.1311194653

## Get the final value by summing up all average-odfs and divide them by total number of communities

In [61]:
sum_average_odf = 0


for i in average_odf_dict: # for each community
    sum_average_odf = sum_average_odf + average_odf_dict[i]


average_sum_average_odf = sum_average_odf/total_communities
average_sum_average_odf

0.15522576883202593

# Metric 6 - Flake-Out Degree Fraction (Flake-ODF)

## Get the inner and outer degrees of nodes in `in_dict` and `out_dict`

In [62]:
# print(in_dict) # Already gotten

In [63]:
# print(out_dict) # Already gotten

## For each community, for each node, if its `out_dict` > `in_dict`, increase counter then divide by the the total community size in `community_size_dict`

In [64]:
# Automatic 1
flake_odf_dict = {}
for i in x:
    flake_odf_dict[i]=0

In [65]:
for i in x: # for each communtiy
    counter_nodes_in_one_community = 0
    for node in x[i]: # for each node in that community
        if (out_dict[node] >= in_dict[node]):
            counter_nodes_in_one_community = counter_nodes_in_one_community + 1      
    flake_odf_dict[i] = counter_nodes_in_one_community/community_size_dict[i]
    

In [66]:
flake_odf_dict

{190: 0.0,
 136: 0.23076923076923078,
 23: 0.21739130434782608,
 98: 0.13333333333333333,
 191: 0.0,
 1: 0.0,
 19: 0.22727272727272727,
 51: 0.12,
 48: 0.13043478260869565,
 206: 0.5714285714285714,
 85: 0.0,
 24: 0.13043478260869565,
 125: 0.0625,
 151: 0.16666666666666666,
 137: 0.4444444444444444,
 183: 0.375,
 273: 0.0,
 29: 0.0,
 153: 0.16666666666666666,
 119: 0.17647058823529413,
 131: 0.25,
 56: 0.13636363636363635,
 63: 0.043478260869565216,
 15: 0.04878048780487805,
 2: 0.0,
 135: 0.07142857142857142,
 90: 0.23529411764705882,
 291: 0.16666666666666666,
 195: 0.0,
 66: 0.17647058823529413,
 58: 0.15789473684210525,
 57: 0.05,
 101: 0.29411764705882354,
 20: 0.25,
 127: 0.2727272727272727,
 42: 0.2222222222222222,
 178: 0.5,
 53: 0.1,
 88: 0.15789473684210525,
 208: 0.1111111111111111,
 18: 0.05405405405405406,
 162: 0.1111111111111111,
 311: 0.25,
 36: 0.0,
 209: 0.1111111111111111,
 37: 0.043478260869565216,
 114: 0.36363636363636365,
 11: 0.024390243902439025,
 61: 0.130434

## Get the final value by summing up all flake-odfs and divide them by total number of communities

In [67]:
# Automatic 1
sum_flake_odf = 0


for i in flake_odf_dict: # for each community
    sum_flake_odf = sum_flake_odf + flake_odf_dict[i]


average_sum_flake_odf = sum_flake_odf/total_communities
average_sum_flake_odf

0.1519485763059475

# Metric 7 -  Embeddedness 
__NB: It is opposite to Average-ODF__ <BR>
__NB: Originally, it is defined for each node__ <br>

## Get the total degree of each node

In [68]:
# print(dict_degree) # Already gotten

## Get the inner degrees of nodes in `dict_in`

In [69]:
# print(in_dict) # Already gotten

## For each community, for each node in that community, get its intra edges / total edges then sum them up to divide them by the total community size in `community_size_dict`

In [70]:
average_embeddedness_dict = {}
for i in x:
    average_embeddedness_dict[i]=0

In [71]:
for i in x: # for each communtiy
    sum_embeddedness_normalized_in_one_community = 0
    for node in x[i]: # for each node in that community
        sum_embeddedness_normalized_in_one_community = sum_embeddedness_normalized_in_one_community + (in_dict[node]/dict_degree[node])
    average_embeddedness_dict[i] = sum_embeddedness_normalized_in_one_community/community_size_dict[i]
    

In [72]:
for i in average_embeddedness_dict:
    print(average_embeddedness_dict[i]+average_odf_dict[i])
# --> Simply 1 - average_odf_dict we can get average embeddedness

1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0000000000000002
1.0
0.9999999999999999
0.9999999999999999
1.0
0.9999999999999999
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
0.9999999999999999
1.0
1.0000000000000002
1.0
1.0
1.0
1.0000000000000002
1.0
1.0
1.0000000000000002
1.0
1.0000000000000002
1.0
1.0000000000000002
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0000000000000002
0.9999999999999999
0.9999999999999999
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0000000000000002
1.0
1.0000000000000002
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
0.9999999999999999
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
1.0000000000000002
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1

## Get the final value by summing up all average-embededdness and divide them by total number of communities

In [73]:
# Solution 1
sum_average_embeddedness= 0


for i in average_embeddedness_dict: # for each community
    sum_average_embeddedness = sum_average_embeddedness + average_embeddedness_dict[i]


average_sum_average_embeddedness = sum_average_embeddedness/total_communities
average_sum_average_embeddedness

0.8447742311679748

In [74]:
# Steph: Dolphins average odf = 0.29 --> On average, the nodes have 29% of their edges to outside communities
# In other words, 70% have edges inside their communities

In [75]:
# Solution 2
way2 = 1 - average_sum_average_odf
way2

0.844774231167974

# Metric 8 - Average Distance

In [76]:
sum_sp= 0
avg = 0
avg_final = 0

for i in x: # for each communtiy, which doesn't have to be connected, and if so, take its LCC
    if nx.is_connected(x[i])==False:
            largestComponents = [x[i].subgraph(c).copy() for c in sorted(nx.connected_components(x[i]), key=len, reverse=True)] 
            #print(nx.info(largestComponents[0]))
            sp = nx.average_shortest_path_length(largestComponents[0])
            sum_sp =  sp + sum_sp
            #print(nx.info(x[i]))
            print("----------------")
    else:
        sp = nx.average_shortest_path_length(x[i])
        sum_sp =  sp + sum_sp   

average_average_distance = sum_sp/total_communities
average_average_distance

1.867777365915602

# Metric 9 - Hub Dominance

## Get the inner degrees of nodes in `dict_in`

In [77]:
# print(in_dict) # Already gotten 

## For each community, get the maximum node that has the inner degree in it

In [78]:
# Automatic 1
max_hub_dominance_dict = {}
for i in x:
    max_hub_dominance_dict[i]=[]

In [79]:
for i in x: # for each communtiy
    max_temp = 0  # initialize max_temp
    max_node = -1 # initialize max_node
    for node in x[i]: # for each node in that community 
        #print("Community, node, value:", i, max_node, max_temp)
        
        if max_temp < in_dict[node]: #if there's a node with a greater value of out degrees, replace max_temp and max_node
            max_temp = in_dict[node]
            max_node = node
    
        tup = (max_node, max_temp) # Hub dominance definition in action 
        
    max_hub_dominance_dict[i].append(tup) # community number: tuple(max_node, max_indegree value)
    #print(max_odf_dict)

In [80]:
max_hub_dominance_dict = {}
for i in x:
    max_hub_dominance_dict[i]=[]

In [81]:
for i in x: # for each communtiy
    max_temp = 0  # initialize max_temp
    max_node = -1 # initialize max_node
    for node in x[i]: # for each node in that community 
        #print("Community, node, value:", i, max_node, max_temp)
        
        if max_temp < in_dict[node]: #if there's a node with a greater value of out degrees, replace max_temp and max_node
            max_temp = in_dict[node]
            max_node = node
    
        tup = (max_node, max_temp/(community_size_dict[i]-1)) # Hub dominance definition in action 
        
    max_hub_dominance_dict[i].append(tup) # community number: tuple(max_node, max_indegree value)
    #print(max_odf_dict)

## Get the final value by dividing over max_k of the node, and then average over all communities

In [82]:
sum_average_hub_dominance = 0


for i in max_hub_dominance_dict: # for each community
    sum_average_hub_dominance = sum_average_hub_dominance + max_hub_dominance_dict[i][0][1]


average_sum_average_hub_dominance = sum_average_hub_dominance/total_communities
average_sum_average_hub_dominance

0.7651211255561088