In [1]:
# Library imports
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from helper import print_results, metis_map

# Prunning Youtube Network for Analysis

In [None]:
youtubeG = nx.read_edgelist("data/com-youtube.ungraph.txt", create_using = nx.Graph, nodetype = int)

The next cell will make the youtube network much smaller so we can 

In [None]:

# Obtains list of degrees that have degree less than 3
degree_Thres = 6
remove = [node for node,degree in dict(youtubeG.degree()).items() if  degree < degree_Thres]

# Removes nodes with small degree and creates sparse matrix
youtubeG.remove_nodes_from(remove)
nx.write_edgelist(youtubeG,'youtube_sparse.txt', data=False)


# Cluster Analysis

### Data Imports
Load in every graph. We'll look at some simple metrics to get some baseline understanding.

In [2]:
wikiG = nx.read_edgelist("input/wiki-Vote.txt", create_using = nx.Graph, nodetype = int)
collabG = nx.read_edgelist("input/ca-GrQc.txt", create_using = nx.Graph, nodetype = int)
p2pG = nx.read_edgelist("input/p2p-Gnutella08.txt", create_using = nx.Graph, nodetype = int)
fbG = nx.read_edgelist("input/facebook_combined.txt", create_using = nx.Graph, nodetype = int)
#youtubeG = nx.read_edgelist("input/com-youtube.ungraph.txt", create_using = nx.Graph, nodetype = int)
    
# pd.DataFrame({"Graph": ["Wikipedia", "Collaborations", "Gnutella", "Facebook", "Youtube"],
#     "Node Count": [wikiG.number_of_nodes(), collabG.number_of_nodes(), p2pG.number_of_nodes(), fbG.number_of_nodes(), youtubeG.number_of_nodes()],
#     "Edge Count": [wikiG.number_of_edges(), collabG.number_of_edges(), p2pG.number_of_edges(), fbG.number_of_edges(), youtubeG.number_of_edges()]})

First we'll create our METIS mappings and save them as pickle files (code found in helper.py)

In [4]:
names = ["wiki-Vote", "p2p-Gnutella08", "facebook_combined", "ca-GrQc", "com-youtube.ungraph"]
for i in names:
    fname = "input/" + i + ".txt"
    obj_fname = "MetisAlgo/" + i + "_map.obj"
    metis_map(fname, obj_fname)

The graph is undirected.
7115 103689
The graph is undirected.
6301 20777
The graph is undirected.
The graph is undirected.
5242 28980
The graph is undirected.
1134890 2987624


### Wiki-Vote

In [7]:
print_results(wikiG, "wiki-Vote", "metis")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.010801,0.864518,0.864518
1,0.010801,0.995062,0.995062
2,0.010801,0.824171,0.824171
3,0.010801,0.996769,0.996769
4,0.010801,0.997680,0.997680
...,...,...,...
360,0.010801,1.000000,1.000000
361,0.010801,1.000000,1.000000
362,0.010801,1.000000,1.000000
363,0.010801,1.000000,1.000000


In [None]:
from helper import get_map
mmap, mlist = get_map("wiki-Vote")
cluster_data = np.loadtxt('output/wiki_community.txt', dtype = int)
cluster_map = {i[0]: i[1] for i in cluster_data}
clustering = [cluster_map[mlist[i]] for i in range(len(mlist))]

In [3]:
print_results(wikiG, "wiki-Vote", "community")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.350042,0.527057,0.527057
1,0.350042,0.318746,0.374815
2,0.350042,0.375265,0.375265
3,0.350042,0.808656,0.808656
4,0.350042,0.714286,0.714286
5,0.350042,0.719178,0.719178
6,0.350042,0.75,0.75
7,0.350042,0.2,0.2
8,0.350042,0.5,0.5
9,0.350042,0.833333,0.833333


### Peer-To-Peer Filesharing

In [5]:
print_results(p2pG, "p2p-Gnutella08", "metis")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.006187,0.869565,0.869565
1,0.006187,1.000000,1.000000
2,0.006187,1.000000,1.000000
3,0.006187,1.000000,1.000000
4,0.006187,0.981678,0.981678
...,...,...,...
649,0.006187,1.000000,1.000000
650,0.006187,1.000000,1.000000
651,0.006187,0.932432,0.932432
652,0.006187,1.000000,1.000000


### Facebook

In [6]:
print_results(fbG, "facebook_combined", "mcl")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.022139,0.989519,0.989519
1,0.022139,0.988159,0.988159
2,0.022139,0.990840,0.990840
3,0.022139,0.988824,0.988824
4,0.022139,0.988095,0.988095
...,...,...,...
95,0.022139,0.991874,0.991874
96,0.022139,0.977131,0.977131
97,0.022139,0.979416,0.979416
98,0.022139,0.981105,0.981105


### Collaborations

In [None]:
print_results(collabG, names[1], "metis") #this case does not work for some reason

### Youtube

In [None]:
print_results(youtubeG, names[2], "mcl")