In [4]:
# Library imports
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Snap2Metis import metis_map
import pickle

# Prunning Youtube Network for Analysis

In [None]:
youtubeG = nx.read_edgelist("data/com-youtube.ungraph.txt", create_using = nx.Graph, nodetype = int)

The next cell will make the youtube network much smaller so we can 

In [None]:

# Obtains list of degrees that have degree less than 3
degree_Thres = 6
remove = [node for node,degree in dict(youtubeG.degree()).items() if  degree < degree_Thres]

# Removes nodes with small degree and creates sparse matrix
youtubeG.remove_nodes_from(remove)
nx.write_edgelist(youtubeG,'youtube_sparse.txt', data=False)


# Cluster Analysis

Load in every graph (not YouTube yet). We'll look at some simple metrics to get some baseline understanding.

In [27]:
wikiG = nx.read_edgelist("input/wiki-Vote.txt", create_using = nx.Graph, nodetype = int)
collabG = nx.read_edgelist("input/ca-GrQc.txt", create_using = nx.Graph, nodetype = int)
p2pG = nx.read_edgelist("input/p2p-Gnutella08.txt", create_using = nx.Graph, nodetype = int)
fbG = nx.read_edgelist("input/facebook_combined.txt", create_using = nx.Graph, nodetype = int)

mean_nodes = (wikiG.number_of_nodes() + collabG.number_of_nodes() + p2pG.number_of_nodes() + fbG.number_of_nodes()) / 4
mean_edges = (wikiG.number_of_edges() + collabG.number_of_edges() + p2pG.number_of_edges() + fbG.number_of_edges()) / 4
pd.DataFrame({"Graph": ["Wikipedia", "Collaborations", "Gnutella", "Facebook", "Mean(G)"],
    "Node Count": [wikiG.number_of_nodes(), collabG.number_of_nodes(), p2pG.number_of_nodes(), fbG.number_of_nodes(), mean_nodes],
    "Edge Count": [wikiG.number_of_edges(), collabG.number_of_edges(), p2pG.number_of_edges(), fbG.number_of_edges(), mean_edges]})

Unnamed: 0,Graph,Node Count,Edge Count
0,Wikipedia,7115.0,100762.0
1,Collaborations,5242.0,14496.0
2,Gnutella,6301.0,20777.0
3,Facebook,4039.0,88234.0
4,Mean(G),5674.25,56067.25


These few lines will import our clustering and also get our mapping from the METIS function

In [28]:
wikiCluster = np.loadtxt("output/wiki-Vote.metis.part.100", dtype=int) #load cluster output
metis_map("input/wiki-Vote.txt", "wiki-Vote_map.obj") #mapping for node_id
objFile = open("wiki-Vote_map.obj", "rb")
wikiMap = pickle.load(objFile)
wikiList = [ele for ele in wikiMap]

The graph is undirected.
7115 103689


Reorganizing the clustering to be suitable for networkx functions

In [37]:
clusters = [[] for i in range(100)]
for i in range(len(wikiCluster)):
    clusters[wikiCluster[i]].append(wikiList[i])

Loop through each of our clusters to pull metrics

In [41]:
results = []
V = set(wikiG.nodes) #does not change
modularity = nx.algorithms.community.modularity(wikiG, clusters) #does not change
for i in range(100):
    C = set(clusters[i])
    C_bar = V - C
    ncut = nx.cut_size(wikiG, C, C_bar) / nx.cut_size(wikiG, C, V)
    conductance = nx.conductance(wikiG, C)
    results.append({"Modularity": modularity, "n-cut": ncut, "Conductance": conductance})
pd.DataFrame(results)

Unnamed: 0,Modularity,n-cut,Conductance
0,0.004638,0.986629,0.973611
1,0.004638,0.982236,0.965093
2,0.004638,0.991280,0.982712
3,0.004638,0.977700,0.956374
4,0.004638,0.980661,0.962056
...,...,...,...
95,0.004638,0.998487,0.996979
96,0.004638,0.998440,0.996885
97,0.004638,0.999329,0.998660
98,0.004638,1.000000,1.000000
