In [43]:
import numpy as np
import pandas as pd
import networkx as nx

from scipy import stats

In [44]:
# Define functions
def graphStats(g):
    #Determine number of components
    gComps = [set(c) for c in sorted(nx.connected_component_subgraphs(g), key=len)]
    nComps = len(gComps)
    
    #Find the largest component and determine the % size based on nodes
    gLargest = max(nx.connected_component_subgraphs(g), key=len)
    percSize = float(len(gLargest.nodes()))/len(g.nodes())
    
    # Calculate diameter
    d = float(nx.diameter(gLargest))
    
    #Calculate averages shortest path
    asp = nx.average_shortest_path_length(gLargest)
    
    #Average Degree
    deg = [float(g.degree(a)) for a in g.nodes()] 
    avgDeg = np.mean(deg)
    
    # Clustering coefficient
    cc = nx.average_clustering(g)
    
    # Return stats
    return pd.DataFrame([nComps, percSize, d, asp, avgDeg, cc],
                     ["nComps", "percSize", "diameter", "asp", "avgDeg","cc"])

def confIntMean(a, conf=0.95):
  mean, sem, m = np.mean(a), stats.sem(a), stats.t.ppf((1+conf)/2., len(a)-1)
  return mean - m*sem, mean + m*sem 

In [45]:
# Create a random erdos-renyi graphs and tabulate stats 
dataFrames = []
for i in range(0,1000):
    erGraph = nx.erdos_renyi_graph(100, 0.02)
    graphMetrics = graphStats(erGraph)
    dataFrames.append(graphMetrics)

finalResults = pd.concat(dataFrames, axis=1).T

In [46]:
# Calculate confidence intervals
ciDict = {}
graphMetrics = ["nComps", "percSize", "diameter", "asp", "avgDeg","cc"]
for g in graphMetrics:
    a = finalResults[g].as_matrix()
    ci = confIntMean(a, conf=0.95)
    ciDict[g] = ci

print ciDict

{'diameter': (13.437063254303331, 13.738936745696668), 'avgDeg': (1.960433061608168, 1.9846069383918321), 'asp': (5.6435460347269784, 5.7355809763078343), 'cc': (0.010675410155043159, 0.012093193019560012), 'nComps': (17.201275082993835, 17.774724917006164), 'percSize': (0.7805486677200949, 0.79017133227990499)}


In [47]:
lada = nx.read_pajek("LadaFacebookAnon.net")
lada = nx.Graph(lada) 

#Nodes
print("Nodes:", len(lada.nodes()))

#Edges
print("Edges:", len(lada.edges()))

#Largest Component
ladaComponents = [set(c) for c in sorted(nx.connected_component_subgraphs(lada), key=len)]
ladaLargest = max(nx.connected_component_subgraphs(lada), key=len)
print("# Components:", len(ladaComponents))
print("Largest Component Nodes:", len(ladaLargest.nodes()))
print("% Largest Component:", len(ladaLargest.nodes())/float(len(lada.nodes())))

# Graph Stats
print("Graph Stats", graphStats(ladaLargest))

('Nodes:', 388)
('Edges:', 3598)
('# Components:', 20)
('Largest Component Nodes:', 350)
('% Largest Component:', 0.9020618556701031)
('Graph Stats',                   0
nComps     1.000000
percSize   1.000000
diameter   8.000000
asp        2.783856
avgDeg    19.954286
cc         0.491352)


In [48]:
# Define parameters for ER Graph to be similar to Lada graph
erGraph = nx.erdos_renyi_graph(350, 0.056)
print("ER Graph Nodes", len(erGraph.nodes()))
print("ER Graph Edges", len(erGraph.edges()))

('ER Graph Nodes', 350)
('ER Graph Edges', 3327)


In [49]:
# Define parameters for Watts Strogatz Graph to be similar to Lada graph
swGraph = nx.watts_strogatz_graph(n=350, k=20, p=0.2)
print("SW Graph Nodes", len(swGraph.nodes()))
print("SW Graph Edges", len(swGraph.edges()))

('SW Graph Nodes', 350)
('SW Graph Edges', 3500)


In [50]:
# Define parameters for Barabasi Albert Graph to be similar to Lada graph
baGraph = nx.barabasi_albert_graph(n=350, m=10)
print("BA Graph Nodes", len(baGraph.nodes()))
print("BA Graph Edges", len(baGraph.edges()))

('BA Graph Nodes', 350)
('BA Graph Edges', 3400)


In [51]:
# Create a Erdos-Renyi Graphs and tabulate stats 
erDF = []
for i in range(0,1000):
    erGraph = nx.erdos_renyi_graph(350, 0.056)
    erMetrics = graphStats(erGraph)
    erDF.append(erMetrics)

erResults = pd.concat(erDF, axis=1).T

In [52]:
# Calculate confidence intervals of Erdos-Renyi graphs
erCIDict = {}
col = ["nComps", "percSize", "diameter", "asp", "avgDeg","cc"]
for c in col:
    a = erResults[c].as_matrix()
    ci = confIntMean(a, conf=0.95)
    erCIDict[c] = ci

print erCIDict

{'diameter': (3.2793370925578689, 3.3366629074421308), 'avgDeg': (19.526434808329725, 19.56699376309885), 'asp': (2.2595430661491775, 2.2611472654103801), 'cc': (0.055862082378210373, 0.056085377775375492), 'nComps': (1.0, 1.0), 'percSize': (1.0, 1.0)}


In [53]:
# Create a random  Watts Strogatz Graph and tabulate stats 
swDF = []
for i in range(0,1000):
    swGraph = nx.watts_strogatz_graph(n=350, k=20, p=0.2)
    swMetrics = graphStats(swGraph)
    swDF.append(swMetrics)

swResults = pd.concat(swDF, axis=1).T

# Calculate confidence intervals
swCIDict = {}
col = ["nComps", "percSize", "diameter", "asp", "avgDeg","cc"]
for c in col:
    a = swResults [c].as_matrix()
    ci = confIntMean(a, conf=0.95)
    swCIDict[c] = ci

print swCIDict

{'diameter': (4.0, 4.0), 'avgDeg': (20.0, 20.0), 'asp': (2.5171327947801956, 2.5181753018223416), 'cc': (0.37923974374758829, 0.3803811619755223), 'nComps': (1.0, 1.0), 'percSize': (1.0, 1.0)}


In [54]:
# Create a random  Barabasi Albert Graph and tabulate stats 
baDF = []
for i in range(0,1000):
    baGraph = nx.barabasi_albert_graph(n=350, m=10)
    baMetrics = graphStats(baGraph)
    baDF.append(baMetrics)

baResults = pd.concat(baDF, axis=1).T

# Calculate confidence intervals
baCIDict = {}
col = ["nComps", "percSize", "diameter", "asp", "avgDeg","cc"]
for c in col:
    a = baResults[c].as_matrix()
    ci = confIntMean(a, conf=0.95)
    baCIDict[c] = ci

print baCIDict

{'diameter': (3.1315902311098172, 3.1764097688901827), 'avgDeg': (19.428571428571427, 19.428571428571427), 'asp': (2.2370793074619959, 2.2379871190627689), 'cc': (0.12507790425858767, 0.12575040221147787), 'nComps': (1.0, 1.0), 'percSize': (1.0, 1.0)}
