In [19]:
import operator
import pandas as pd
import pickle
import random
import snap

In [2]:
def get_comm_info(comm_file):
    '''
    get community information, two maps
    map1: key: user id, value: community id array
    map2: key: community id, value: user id array
    '''
    comm_map_usr = {}
    comm_map_comm = {}
    comm_id = 0
    with open(comm_file, 'r') as cf:
        for line in cf:
            node_list = line.split('\t')
            node_list = [int(id) for id in node_list]
            for id in node_list:
                if id in comm_map_usr:
                    comm_map_usr[id].append(comm_id)
                else:
                    comm_map_usr[id] = [comm_id]
            comm_map_comm[comm_id] = node_list[:]
            comm_id += 1
    return comm_map_usr, comm_map_comm

In [3]:
gf_file = "data/com-lj.ungraph.txt"
gf = snap.LoadEdgeList(snap.PUNGraph, gf_file, 0, 1)

In [4]:
print "Load graph! With nodes ", gf.GetNodes(), " and edges ", gf.GetEdges()

Load graph! With nodes  3997962  and edges  34681189


In [42]:
##--get graph map
def get_graph_info(gf_file):
    '''
    get graph information as a map
    key: user id, value: list of friend id
    '''
    gf_map = {}
    with open(gf_file, 'r') as cf:
        for line in cf:
            if line[0] != '#':
                node_list = line.split('\t')
                if len(node_list) == 2:
                    node_list = [int(id) for id in node_list]
                    if node_list[0] in gf_map:
                        gf_map[node_list[0]].add(node_list[1])
                    else:
                        gf_map[node_list[0]] = set([node_list[1]])
                    if node_list[1] in gf_map:
                        gf_map[node_list[1]].add(node_list[0])
                    else:
                        gf_map[node_list[1]] = set([node_list[0]])                    
                else:
                    print "wrong format line: ", line
    return gf_map

In [43]:
gf_map = get_graph_info(gf_file)

In [44]:
len(gf_map)

3997962

In [33]:
def ind_cas_mod_1step(gf, act_nod_list, new_act_nod, prob):
    '''
    perform 1 step independent cascade model
    '''
    new_pre_nod = []
    for id in new_act_nod:
        ni = gf.GetNI(id)
        for i in xrange(ni.GetDeg()):
            nbr_id = ni.GetNbrNId(i)
            if nbr_id not in act_nod_list:
                lim = random.random()
                if lim < prob:
                    new_pre_nod.append(nbr_id)
                    act_nod_list.append(nbr_id)
    return act_nod_list, new_pre_nod

In [36]:
##--get run the model with random initial
prob = 0.01
mean_list = {}
std_list = {}

for total_iter in [10, 100, 1000, 10000, 100000, 1000000]:
    total_influence = [0] * total_iter
    for i in xrange(total_iter):
        init_set = 1
        new_act_nod = []
        for j in xrange(init_set):
            new_act_nod.append(gf.GetRndNId())
        act_nod_list = new_act_nod[:]
        while len(new_act_nod) != 0:
            act_nod_list, new_act_nod = ind_cas_mod_1step(gf, act_nod_list, new_act_nod, prob)
        total_influence[i] = len(act_nod_list)
    total_influence = pd.DataFrame(total_influence)
    mean_list[total_iter] = total_influence.mean()[0]
    std_list[total_iter] = total_influence.std()[0]
    print total_iter, mean_list[total_iter], std_list[total_iter]

10 6676.2 21100.4043237
100 668.1 6667.66680149


KeyboardInterrupt: 

In [45]:
def ind_cas_mod_1step_v2(gf_map, act_nod_list, new_act_nod, prob):
    '''
    perform 1 step independent cascade model
    '''
    new_pre_nod = set()
    for id in new_act_nod:
        for nbr_id in gf_map[id]:
            if nbr_id not in act_nod_list:
                lim = random.random()
                if lim < prob:
                    new_pre_nod.add(nbr_id)
                    act_nod_list.add(nbr_id)
    return act_nod_list, new_pre_nod

In [None]:
##--get run the model with random initial
prob = 0.01
mean_list = {}
std_list = {}

for total_iter in [10, 100, 1000, 10000, 100000, 1000000]:
    total_influence = [0] * total_iter
    for i in xrange(total_iter):
        init_set = 1
        new_act_nod = set()
        for j in xrange(init_set):
            new_act_nod.add(gf.GetRndNId())
        act_nod_list = new_act_nod.copy()
        while len(new_act_nod) != 0:
            act_nod_list, new_act_nod = ind_cas_mod_1step_v2(gf_map, act_nod_list, new_act_nod, prob)
            if i % (total_iter / 10) == 0:
                print total_iter, i, count, len(act_nod_list), len(new_act_nod)
        total_influence[i] = len(act_nod_list)
    total_influence = pd.DataFrame(total_influence)
    mean_list[total_iter] = total_influence.mean()[0]
    std_list[total_iter] = total_influence.std()[0]
    print total_iter, mean_list[total_iter], std_list[total_iter]

10 0 2 1 0
10 1 2 1 0
10 2 2 1 0
10 3 2 1 0
10 4 2 1 0
10 5 2 1 0
10 6 2 1 0
10 7 2 1 0
10 8 2 1 0
10 9 2 1 0
10 1.0 0.0
100 0 2 1 0
100 10 2 2 1
100 10 2 2 0
100 20 2 2 1
100 20 2 3 1
100 20 2 3 0
100 30 2 1 0
100 40 2 1 0
100 50 2 1 0
100 60 2 1 0
100 70 2 1 0
100 80 2 1 0
100 90 2 3 2
100 90 2 4 1
100 90 2 4 0
100 673.41 6715.41506211
1000 0 2 1 0
1000 100 2 1 0
1000 200 2 1 0
1000 300 2 1 0
1000 400 2 1 0
1000 500 2 1 0
1000 600 2 1 0
1000 700 2 1 0
1000 800 2 1 0
1000 900 2 1 0
1000 1336.065 9350.91794484
10000 0 2 1 0
10000 1000 2 1 0
10000 2000 2 1 0
10000 3000 2 1 0


In [28]:
total_influence.mean()[0]

1.2