In [1]:
import operator
import pickle
import snap

In [2]:
def get_comm_info(comm_file):
    '''
    get community information, two maps
    map1: key: user id, value: community id array
    map2: key: community id, value: user id array
    '''
    comm_map_usr = {}
    comm_map_comm = {}
    comm_id = 0
    with open(comm_file, 'r') as cf:
        for line in cf:
            node_list = line.split('\t')
            node_list = [int(id) for id in node_list]
            for id in node_list:
                if id in comm_map_usr:
                    comm_map_usr[id].append(comm_id)
                else:
                    comm_map_usr[id] = [comm_id]
            comm_map_comm[comm_id] = node_list[:]
            comm_id += 1
    return comm_map_usr, comm_map_comm

In [3]:
gf_file = "data/com-lj.ungraph.txt"
gf = snap.LoadEdgeList(snap.PUNGraph, gf_file, 0, 1)

In [4]:
print "Load graph! With nodes ", gf.GetNodes(), " and edges ", gf.GetEdges()

Load graph! With nodes  3997962  and edges  34681189


In [5]:
comm_file = 'data/com-lj.all.cmty.txt'

comm_map_usr, comm_map_comm = get_comm_info(comm_file)

In [5]:
##--Degree centrality
degr_centr_map = {}

for ni in gf.Nodes():
    degr_centr_map[ni.GetId()] = snap.GetDegreeCentr(gf, ni.GetId())

In [6]:
##--dump degree centrality
degr_centr_file = "deg_ctr.pkl"

with  open(degr_centr_file, "wb") as fl:
    pickle.dump(degr_centr_map, fl)

In [7]:
##--reload degree centrality
degr_centr_file = "deg_ctr.pkl"

with  open(degr_centr_file, "r") as fl:
    degr_centr_map = pickle.load(fl)

In [16]:
##--sorted degree centrality
sorted_dgc = sorted(degr_centr_map.items(), key = operator.itemgetter(1), reverse=True)

sorted_id = [ item[0] for item in sorted_dgc ]

In [19]:
in_comm = []
for id in sorted_id:
    if id in comm_map_usr:
        in_comm.append(1)
    else:
        in_comm.append(0)

In [20]:
print sorted_id[:100]
print in_comm[:100]

[9766, 9765, 54974, 6395, 11509, 52171, 53198, 56256, 52022, 57045, 13664, 42463, 53252, 4435, 7832, 2806, 6691, 52977, 53947, 55514, 6693, 52984, 52062, 33625, 23205, 53767, 58926, 50516, 5442, 6698, 53777, 9783, 55776, 53232, 52154, 6697, 52153, 170786, 54465, 12370, 52669, 6396, 53251, 146797, 63840, 208336, 58764, 54480, 10428, 52220, 6398, 4492, 7486, 13294, 78458, 6707, 2883, 52206, 9772, 54484, 77918, 2890632, 864248, 6732, 9779, 53220, 9261, 53959, 4128, 6431, 1656406, 1554534, 155706, 3389, 470862, 55732, 77451, 447075, 75598, 1489387, 3900700, 12368, 53236, 170465, 817235, 3269998, 12369, 52205, 6728, 2791103, 54190, 2181652, 53349, 6744, 57303, 53279, 1071911, 57449, 48478, 38254]
[0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0

In [8]:
##--get community size
comm_size = {}

for id in comm_map_comm:
    comm_size[id] = len(comm_map_comm[id])
    
##--sorted community by its size
sorted_comm_size = sorted(comm_size.items(), key = operator.itemgetter(1), reverse=True)

sorted_id_comm_size = [ item[0] for item in sorted_comm_size ]

In [26]:
print sorted_id_comm_size[:100]

[664006, 663287, 662541, 661354, 661646, 659019, 660448, 659444, 658401, 658204, 656578, 656690, 656062, 655618, 656293, 657769, 653762, 655297, 652794, 655187, 652773, 654692, 656993, 655724, 650822, 653769, 650830, 649876, 651333, 650282, 655378, 654480, 648906, 649277, 653852, 652810, 654792, 647361, 649472, 646601, 650692, 646614, 651867, 654102, 646467, 646457, 647536, 650839, 649199, 646443, 651075, 650517, 645207, 646633, 644803, 648913, 643217, 650292, 645070, 647231, 646669, 649549, 645193, 644280, 646805, 644289, 646919, 647921, 643516, 646745, 651391, 649020, 648031, 645395, 646141, 647076, 641906, 644238, 649321, 649884, 647371, 641437, 641679, 640634, 638559, 645436, 643522, 638781, 644419, 636069, 646476, 636074, 641446, 642258, 637719, 645993, 638888, 642763, 636050, 641642]


In [28]:
##--we see a single community as a graph
def get_single_comm(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if nbr_id in node_id_list:
                gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [37]:
##--find the node with highest degree centrality in each community, if we see each single community as a graph
comm_high_deg_cnt = {}
for i in xrange(len(sorted_id_comm_size)):
    comm_gf = get_single_comm(gf, comm_map_comm, sorted_id_comm_size[i])
    deg_map = {}
    for ni in comm_gf.Nodes():
        deg_map[ni.GetId()] = snap.GetDegreeCentr(comm_gf, ni.GetId())
    tmp_sorted_deg = sorted(deg_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_deg = [ item[0] for item in tmp_sorted_deg ]
    comm_high_deg_cnt[sorted_id_comm_size[i]] = tmp_sorted_id_deg

In [38]:
##--dump degree centrality
degr_centr_file = "deg_ctr_comm_only.pkl"

with  open(degr_centr_file, "wb") as fl:
    pickle.dump(comm_high_deg_cnt, fl)

In [6]:
##--we see a single community and its immediate friend as a graph
def get_single_comm1st(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if not gf_comm.IsNode(nbr_id):
                gf_comm.AddNode(nbr_id)
            gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [9]:
comm_high_deg_cnt_1st = {}
for i in xrange(len(sorted_id_comm_size)):
    comm_gf = get_single_comm1st(gf, comm_map_comm, sorted_id_comm_size[i])
    deg_map = {}
    for ni in comm_gf.Nodes():
        deg_map[ni.GetId()] = snap.GetDegreeCentr(comm_gf, ni.GetId())
    tmp_sorted_deg = sorted(deg_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_deg = [ item[0] for item in tmp_sorted_deg ]
    comm_high_deg_cnt_1st[sorted_id_comm_size[i]] = tmp_sorted_id_deg

In [10]:
##--dump degree centrality
degr_centr_file = "deg_ctr_comm_1st.pkl"

with  open(degr_centr_file, "wb") as fl:
    pickle.dump(comm_high_deg_cnt_1st, fl)