In [1]:
import operator
import pickle
import snap

In [2]:
def get_comm_info(comm_file):
    '''
    get community information, two maps
    map1: key: user id, value: community id array
    map2: key: community id, value: user id array
    '''
    comm_map_usr = {}
    comm_map_comm = {}
    comm_id = 0
    with open(comm_file, 'r') as cf:
        for line in cf:
            node_list = line.split('\t')
            node_list = [int(id) for id in node_list]
            for id in node_list:
                if id in comm_map_usr:
                    comm_map_usr[id].append(comm_id)
                else:
                    comm_map_usr[id] = [comm_id]
            comm_map_comm[comm_id] = node_list[:]
            comm_id += 1
    return comm_map_usr, comm_map_comm

In [3]:
gf_file = "data/com-lj.ungraph.txt"
gf = snap.LoadEdgeList(snap.PUNGraph, gf_file, 0, 1)

In [4]:
print "Load graph! With nodes ", gf.GetNodes(), " and edges ", gf.GetEdges()

Load graph! With nodes  3997962  and edges  34681189


In [5]:
comm_file = 'data/com-lj.all.cmty.txt'

comm_map_usr, comm_map_comm = get_comm_info(comm_file)

In [5]:
##--Get hubs and authorities score
NIdHubH = snap.TIntFltH()
NIdAuthH = snap.TIntFltH()
snap.GetHits(gf, NIdHubH, NIdAuthH)

In [6]:
##--convert hash to map and dump to pkl file
NIdHub_map = {}
NIdAuth_map = {}
for key in NIdHubH:
    NIdHub_map[key] = NIdHubH[key]
    
for key in NIdAuthH:
    NIdAuth_map[key] = NIdAuthH[key]

hit_file = "hit.pkl"

with  open(hit_file, "wb") as fl:
    pickle.dump(NIdHub_map, fl)
    pickle.dump(NIdAuth_map, fl)

In [7]:
##--reload pagerank data from pkl file
NIdHub_map = {}
NIdAuth_map = {}

hit_file = "hit.pkl"

with  open(hit_file, "r") as fl:
    NIdHub_map = pickle.load(fl)
    NIdAuth_map = pickle.load(fl)

In [6]:
##--get community size
comm_size = {}

for id in comm_map_comm:
    comm_size[id] = len(comm_map_comm[id])
    
##--sorted community by its size
sorted_comm_size = sorted(comm_size.items(), key = operator.itemgetter(1), reverse=True)

sorted_id_comm_size = [ item[0] for item in sorted_comm_size ]

In [14]:
##--we see a single community as a graph
def get_single_comm(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if nbr_id in node_id_list:
                gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [15]:
##--rank nodes with ordered hub and authority in each community, if we see each single community as a graph
comm_high_hub_cnt = {}
comm_high_auth_cnt = {}
for i in xrange(len(sorted_id_comm_size)):
    comm_gf = get_single_comm(gf, comm_map_comm, sorted_id_comm_size[i])
    tmp_NIdHubH = snap.TIntFltH()
    tmp_NIdAuthH = snap.TIntFltH()
    snap.GetHits(comm_gf, tmp_NIdHubH, tmp_NIdAuthH)
    tmp_NIdHub_map = {}
    tmp_NIdAuth_map = {}
    for key in tmp_NIdHubH:
        tmp_NIdHub_map[key] = tmp_NIdHubH[key] 
    for key in tmp_NIdAuthH:
        tmp_NIdAuth_map[key] = tmp_NIdAuthH[key]
    tmp_sorted_hub = sorted(tmp_NIdHub_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_hub = [ item[0] for item in tmp_sorted_hub ]
    tmp_sorted_auth = sorted(tmp_NIdAuth_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_auth = [ item[0] for item in tmp_sorted_auth ]    
    comm_high_hub_cnt[sorted_id_comm_size[i]] = tmp_sorted_id_hub
    comm_high_auth_cnt[sorted_id_comm_size[i]] = tmp_sorted_id_auth

In [16]:
hit_file = "hit_commonly.pkl"

with  open(hit_file, "wb") as fl:
    pickle.dump(comm_high_hub_cnt, fl)
    pickle.dump(comm_high_auth_cnt, fl)

In [None]:
##--we see a single community and its immediate friend as a graph
def get_single_comm1st(gf, comm_map_comm, c_id):
    '''
    return a graph from a community in a graph
    '''
    node_id_list = comm_map_comm[c_id]
    gf_comm = snap.TUNGraph.New()
    for id in node_id_list:
        gf_comm.AddNode(id)
    for id in node_id_list:
        ni = gf.GetNI(id)
        deg = ni.GetDeg()
        for nbr_i in xrange(deg):
            nbr_id = ni.GetNbrNId(nbr_i)
            if not gf_comm.IsNode(nbr_id):
                gf_comm.AddNode(nbr_id)
            gf_comm.AddEdge(id, nbr_id)
    return gf_comm

In [None]:
##--rank nodes with ordered hub and authority in each community, if we see each single community as a graph
comm_high_hub_cnt1st = {}
comm_high_auth_cnt1st = {}
for i in xrange(len(sorted_id_comm_size)):
    comm_gf = get_single_comm1st(gf, comm_map_comm, sorted_id_comm_size[i])
    tmp_NIdHubH = snap.TIntFltH()
    tmp_NIdAuthH = snap.TIntFltH()
    snap.GetHits(comm_gf, tmp_NIdHubH, tmp_NIdAuthH)
    tmp_NIdHub_map = {}
    tmp_NIdAuth_map = {}
    for key in tmp_NIdHubH:
        tmp_NIdHub_map[key] = tmp_NIdHubH[key] 
    for key in tmp_NIdAuthH:
        tmp_NIdAuth_map[key] = tmp_NIdAuthH[key]
    tmp_sorted_hub = sorted(tmp_NIdHub_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_hub = [ item[0] for item in tmp_sorted_hub ]
    tmp_sorted_auth = sorted(tmp_NIdAuth_map.items(), key = operator.itemgetter(1), reverse=True)
    tmp_sorted_id_auth = [ item[0] for item in tmp_sorted_auth ]    
    comm_high_hub_cnt1st[sorted_id_comm_size[i]] = tmp_sorted_id_hub
    comm_high_auth_cnt1st[sorted_id_comm_size[i]] = tmp_sorted_id_auth

In [None]:
hit_file = "hit_comm1st.pkl"

with  open(hit_file, "wb") as fl:
    pickle.dump(comm_high_hub_cnt1st, fl)
    pickle.dump(comm_high_auth_cnt1st, fl)