In [1]:
import matplotlib
matplotlib.use("agg")
import snap
import matplotlib.pyplot as plt
import numpy as np


In [2]:
def stackoverflow():
    g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
    components = snap.TCnComV()
    snap.GetWccs(g, components)
    print "Num connected comp = ", components.Len()
    mxwcc = snap.GetMxWcc(g)
    print "Num edges in largest = ", mxwcc.GetEdges()
    print "Num nodes in largest = ", mxwcc.GetNodes()
    rank = snap.GetPageRank(g, rank)
    rank.SortByDat(False)
    count = 0
    for node in rank:
        if count >= 3:
            break
        count += 1
        print "largest page rank score nodes = ", node, " (score = ", rank[node]

    hubs = snap.TIntFltH()
    auths = snap.TIntFltH()
    snap.GetHits(g, hubs, auths)
    
    hubs.SortByDat(False)
    count = 0
    for node in hubs:
        if count >= 3:
            break
        count += 1
        print "largest hub score nodes = ", node, " (score = ", hubs[node]

    auths.SortByDat(False)
    count = 0
    for node in auths:
        if count >= 3:
            break
        count += 1
        print "largest auth score nodes = ", node, " (score = ", auths[node]

In [9]:
def make_plots():
    gwiki = snap.LoadEdgeList(snap.PNGraph, "wiki-Vote.txt", 0, 1)
    out_deg_counts = {}
    for node in gwiki.Nodes():
        if node.GetOutDeg() not in out_deg_counts.keys():
            out_deg_counts[node.GetOutDeg()] = 0
        out_deg_counts[node.GetOutDeg()] += 1
    x = []
    y = []
    for key in sorted(out_deg_counts.keys()):
        if key == 0 or out_deg_counts[key] == 0:
            continue
        x.append(key)
        y.append(out_deg_counts[key])

    x = np.log10(x)
    y = np.log10(y)
    plt.plot(x, y, linestyle="", marker="o")
    
    coef = np.polyfit(x, y, 1)
    polynomial = np.poly1d(coef)
    y_estimated = polynomial(x)
    plt.plot(x, y_estimated)
    plt.savefig('out-degree-hist.png')   
    print coef
    print ""


In [4]:
def wiki_analysis(gwiki=None):
    if gwiki is None:
        gwiki = snap.LoadEdgeList(snap.PNGraph, "wiki-Vote.txt", 0, 1)
    print("n nodes = ", gwiki.GetNodes()) 
    n_self_edges = 0
    n_zero_out_deg = 0
    n_zero_in_deg = 0
    n_large_out_deg = 0
    n_small_in_deg = 0
    for node in gwiki.Nodes():
        if gwiki.IsEdge(node.GetId(), node.GetId()):
            n_self_edges += 1
        if node.GetOutDeg() == 0:
            n_zero_out_deg += 1
        if node.GetInDeg() == 0:
            n_zero_in_deg += 1
        if node.GetOutDeg() > 10:
            n_large_out_deg += 1
        if node.GetInDeg() < 10:
            n_small_in_deg += 1
    print("n self edges = ", n_self_edges)
    print("n 0 out = ", n_zero_out_deg)
    print("n 0 in = ", n_zero_in_deg)
    print("n large out = ", n_large_out_deg)
    print("n small in = ", n_small_in_deg)
    n_directed = 0
    n_undirected = 0
    n_reciprocated = 0
    for edge in gwiki.Edges():
        if edge.GetSrcNId() != edge.GetDstNId():
            n_directed += 1
            if gwiki.IsEdge(edge.GetDstNId(), edge.GetSrcNId()): #test whether the other direction is an edge.
                n_reciprocated += 0.5
                n_undirected += 0.5
            else: 
                n_undirected += 1
    print("n directed = ", n_directed)
    print("n undirected = ", n_undirected)
    print("n recip = ", n_reciprocated)
    print ""

In [5]:
stackoverflow()

Num connected comp =  10143
Num edges in largest =  322486
Num nodes in largest =  131188
largest page rank score nodes =  992484  (score =  0.0139805404122
largest page rank score nodes =  135152  (score =  0.0100055388957
largest page rank score nodes =  22656  (score =  0.00710353269313
largest hub score nodes =  892029  (score =  0.0733638037339
largest hub score nodes =  1194415  (score =  0.0595507441879
largest hub score nodes =  359862  (score =  0.0568756458807
largest auth score nodes =  22656  (score =  0.604723973041
largest auth score nodes =  157882  (score =  0.298699348873
largest auth score nodes =  571407  (score =  0.283907193414


In [10]:
make_plots()

[-1.28106471  3.1324547 ]



In [11]:
# TODO fix undirected count
gtest = snap.TNGraph.New()
gtest.AddNode(1)
gtest.AddNode(2)
gtest.AddNode(3)
gtest.AddEdge(1,2)
gtest.AddEdge(2,1)
gtest.AddEdge(1,3)
gtest.AddEdge(1,1)
wiki_analysis(gwiki=gtest)

wiki_analysis()

('n nodes = ', 3)
('n self edges = ', 1)
('n 0 out = ', 1)
('n 0 in = ', 0)
('n large out = ', 0)
('n small in = ', 3)
('n directed = ', 3)
('n undirected = ', 2.0)
('n recip = ', 1.0)

('n nodes = ', 7115)
('n self edges = ', 0)
('n 0 out = ', 1005)
('n 0 in = ', 4734)
('n large out = ', 1612)
('n small in = ', 5165)
('n directed = ', 103689)
('n undirected = ', 100762.0)
('n recip = ', 2927.0)

