In [1]:
# Library imports
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from helper import print_results, metis_map

# Prunning Youtube Network for Analysis

In [None]:
youtubeG = nx.read_edgelist("data/com-youtube.ungraph.txt", create_using = nx.Graph, nodetype = int)

The next cell will make the youtube network much smaller so we can 

In [None]:

# Obtains list of degrees that have degree less than 3
degree_Thres = 6
remove = [node for node,degree in dict(youtubeG.degree()).items() if  degree < degree_Thres]

# Removes nodes with small degree and creates sparse matrix
youtubeG.remove_nodes_from(remove)
nx.write_edgelist(youtubeG,'youtube_sparse.txt', data=False)


# Cluster Analysis

### Data Imports
Load in every graph. We'll look at some simple metrics to get some baseline understanding.

In [10]:
wikiG = nx.read_edgelist("input/wiki-Vote.txt", create_using = nx.Graph, nodetype = int)
collabG = nx.read_edgelist("input/ca-GrQc.txt", create_using = nx.Graph, nodetype = int)
p2pG = nx.read_edgelist("input/p2p-Gnutella08.txt", create_using = nx.Graph, nodetype = int)
fbG = nx.read_edgelist("input/facebook_combined.txt", create_using = nx.Graph, nodetype = int)
#youtubeG = nx.read_edgelist("input/com-youtube.ungraph.txt", create_using = nx.Graph, nodetype = int)
    
# pd.DataFrame({"Graph": ["Wikipedia", "Collaborations", "Gnutella", "Facebook", "Youtube"],
#     "Node Count": [wikiG.number_of_nodes(), collabG.number_of_nodes(), p2pG.number_of_nodes(), fbG.number_of_nodes(), youtubeG.number_of_nodes()],
#     "Edge Count": [wikiG.number_of_edges(), collabG.number_of_edges(), p2pG.number_of_edges(), fbG.number_of_edges(), youtubeG.number_of_edges()]})

First we'll create our METIS mappings and save them as pickle files (code found in helper.py)

In [None]:
names = ["wiki-Vote", "p2p-Gnutella08", "facebook_combined", "ca-GrQc", "com-youtube.ungraph"]
for i in names:
    fname = "input/" + i + ".txt"
    obj_fname = "MetisAlgo/" + i + "_map.obj"
    metis_map(fname, obj_fname)

### Wiki-Vote

In [20]:
print_results(wikiG, "wiki-Vote", "metis")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.004638,0.986629,0.986629
1,0.004638,0.982236,0.982236
2,0.004638,0.991280,0.991280
3,0.004638,0.977700,0.977700
4,0.004638,0.980661,0.980661
...,...,...,...
95,0.004638,0.998487,0.998487
96,0.004638,0.998440,0.998440
97,0.004638,0.999329,0.999329
98,0.004638,1.000000,1.000000


### Peer-To-Peer Filesharing

In [None]:
print_results(p2pG, "p2p-Gnutella08", "metis")

### Facebook

In [6]:
print_results(fbG, "facebook_combined", "mcl")

Unnamed: 0,Modularity,n-cut,Conductance
0,0.022139,0.989519,0.989519
1,0.022139,0.988159,0.988159
2,0.022139,0.990840,0.990840
3,0.022139,0.988824,0.988824
4,0.022139,0.988095,0.988095
...,...,...,...
95,0.022139,0.991874,0.991874
96,0.022139,0.977131,0.977131
97,0.022139,0.979416,0.979416
98,0.022139,0.981105,0.981105


### Collaborations

In [18]:
collabG.remove_nodes_from([12295])
print_results(collabG, "ca-GrQc", "mcl") #this case does not work for some reason

Unnamed: 0,Modularity,n-cut,Conductance
0,0.001119,1.000000,1.000000
1,0.001119,1.000000,1.000000
2,0.001119,1.000000,1.000000
3,0.001119,0.991189,0.991189
4,0.001119,1.000000,1.000000
...,...,...,...
748,0.001119,1.000000,1.000000
749,0.001119,1.000000,1.000000
750,0.001119,1.000000,1.000000
751,0.001119,1.000000,1.000000


In [11]:
collab = np.loadtxt("input/ca-GrQc.txt", dtype = int)
for i in collab:
    if i[0] == i[1]:
        print(i)

[16703 16703]
[11372 11372]
[1343 1343]
[4442 4442]
[18314 18314]
[11318 11318]
[25777 25777]
[14840 14840]
[6648 6648]
[13 13]
[4685 4685]
[12295 12295]


In [10]:
for c in nx.connected_components(collabG):
    if len(c) == 1:
        print(c)

{12295}


### Youtube

In [None]:
print_results(youtubeG, names[2], "mcl")