In [1]:
import gc
import operator
import pickle
import snap

In [2]:
def get_comm_info(comm_file):
    '''
    get community information, two maps
    map1: key: user id, value: community id array
    map2: key: community id, value: user id array
    '''
    comm_map_usr = {}
    comm_map_comm = {}
    comm_id = 0
    with open(comm_file, 'r') as cf:
        for line in cf:
            node_list = line.split('\t')
            node_list = [int(id) for id in node_list]
            for id in node_list:
                if id in comm_map_usr:
                    comm_map_usr[id].append(comm_id)
                else:
                    comm_map_usr[id] = [comm_id]
            comm_map_comm[comm_id] = node_list[:]
            comm_id += 1
    return comm_map_usr, comm_map_comm

In [3]:
gf_file = "data/com-lj.ungraph.txt"
gf = snap.LoadEdgeList(snap.PUNGraph, gf_file, 0, 1)

In [4]:
print "Load graph! With nodes ", gf.GetNodes(), " and edges ", gf.GetEdges()

Load graph! With nodes  3997962  and edges  34681189


In [5]:
comm_file = 'data/com-lj.all.cmty.txt'

comm_map_usr, comm_map_comm = get_comm_info(comm_file)

In [5]:
##--get eigenvector centrality
NIdEigenH = snap.TIntFltH()
snap.GetEigenVectorCentr(gf, NIdEigenH)

In [6]:
##--convert hash to map and dump to pkl file
eignv_map = {}
for key in NIdEigenH:
    eignv_map[key] = NIdEigenH[key]

eignv_file = "eignv.pkl"

with  open(eignv_file, "wb") as fl:
    pickle.dump(eignv_map, fl)

In [7]:
##--load map from pkl file
eignv_file = "eignv.pkl"

eignv_map = {}
with  open(eignv_file, "r") as fl:
    eignv_map = pickle.load(fl)

In [10]:
##--sorted pagerank
sorted_eignv = sorted(eignv_map.items(), key = operator.itemgetter(1), reverse=True)

sorted_id = [ item[0] for item in sorted_eignv ]

In [15]:
##--check if the id in a community
in_comm = []
for id in sorted_id:
    if id in comm_map_usr:
        in_comm.append(1)
    else:
        in_comm.append(0)

In [16]:
print sorted_id[:100]
print in_comm[:100]

[15094, 15102, 15096, 619335, 15101, 619338, 619339, 15100, 15103, 619336, 619341, 185507, 15097, 1871938, 1871941, 1871943, 1889317, 1337762, 559255, 1871935, 180799, 1889144, 1871934, 1804132, 619369, 1889141, 1871939, 1889185, 642475, 1283053, 1889140, 15099, 619337, 1889138, 1334175, 1871936, 1889188, 1889137, 1334160, 15104, 1871940, 1415396, 1889186, 1889189, 1871937, 173441, 1871951, 1889172, 15098, 1871953, 1889143, 1889146, 1435609, 619342, 1889182, 1889192, 1889174, 1889139, 1871954, 1889167, 1889145, 15105, 11433, 1871952, 1035174, 1889184, 2241866, 1871942, 15106, 1607125, 619344, 1889183, 1415397, 619340, 2241864, 1589984, 2241891, 1889195, 2241869, 1889169, 2241867, 1889173, 1871944, 2241868, 1871960, 15122, 2050630, 2241857, 1610905, 1889199, 619374, 1283094, 2241865, 1871961, 1889142, 1215907, 1871945, 1889197, 1889198, 2241884]
[1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0

In [8]:
##--get community size
comm_size = {}

for id in comm_map_comm:
    comm_size[id] = len(comm_map_comm[id])
    
##--sorted community by its size
sorted_comm_size = sorted(comm_size.items(), key = operator.itemgetter(1), reverse=True)

sorted_id_comm_size = [ item[0] for item in sorted_comm_size ]

In [18]:
##--we see a single community as a graph
def get_single_comm(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if nbr_id in node_id_list:
                gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [19]:
##--find the node with highest degree centrality in each community, if we see each single community as a graph
comm_high_eignv_cnt = {}
for i in xrange(len(sorted_id_comm_size)):
    comm_gf = get_single_comm(gf, comm_map_comm, sorted_id_comm_size[i])
    tmp_NIdEigenH = snap.TIntFltH()
    snap.GetEigenVectorCentr(comm_gf, tmp_NIdEigenH)
    tmp_eignv_map = {}
    for key in tmp_NIdEigenH:
        tmp_eignv_map[key] = tmp_NIdEigenH[key]
    tmp_sorted_eignv = sorted(tmp_eignv_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_eignv = [ item[0] for item in tmp_sorted_eignv ]
    comm_high_eignv_cnt[sorted_id_comm_size[i]] = tmp_sorted_id_eignv

In [20]:
eignv_file = "eignv_commonly.pkl"

with  open(eignv_file, "wb") as fl:
    pickle.dump(comm_high_eignv_cnt, fl)

In [6]:
##--we see a single community and its immediate friend as a graph
def get_single_comm1st(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if not gf_comm.IsNode(nbr_id):
                gf_comm.AddNode(nbr_id)
            gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [9]:
##--find the node with highest degree centrality in each community, if we see each single community as a graph
comm_high_eignv_cnt1st = {}
for i in xrange(len(sorted_id_comm_size)):
    gc.collect()
    comm_gf = get_single_comm1st(gf, comm_map_comm, sorted_id_comm_size[i])
    tmp_NIdEigenH = snap.TIntFltH()
    snap.GetEigenVectorCentr(comm_gf, tmp_NIdEigenH)
    tmp_eignv_map = {}
    for key in tmp_NIdEigenH:
        tmp_eignv_map[key] = tmp_NIdEigenH[key]
    tmp_sorted_eignv = sorted(tmp_eignv_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_eignv = [ item[0] for item in tmp_sorted_eignv ]
    comm_high_eignv_cnt1st[sorted_id_comm_size[i]] = tmp_sorted_id_eignv

KeyboardInterrupt: 

In [None]:
eignv_file = "eignv_comm1st.pkl"

with  open(eignv_file, "wb") as fl:
    pickle.dump(comm_high_eignv_cnt1st, fl)