In [3]:
import numpy as np
import networkx as nx
import random

# IC model

In [None]:
def IC(g,S,P):
    """
    Input:  graph object, set of seed nodes, propagation probability matrix P
    Output: number of nodes influenced by the seed nodes
    """
        
    # Simulate propagation process      
    new_active = S
    A = S
    
    #flip a coin for each edge to find "live" edges
    E = list(g.edges())
    #flatten the probability according to edges of P
    flatten_probs = np.array([P[e[0]][e[1]] for e in E])
    #an indicator whether an edge is live
    is_live = np.zeros(np.random.binomial(1,p) for p in flatten_probs)
    live_dict = {e[i]:is_live[i] for i in range(len(E))}
    num_live_edges = sum(is_live)
    
    #while new_active:
    while(num_live_edges > 0)
        # For each newly active node, find its neighbors that become activated
        new_ones = []
        
        for node in new_active:
            # Determine neighbors that become infected
            neighbor_edges = np.array([(node,ngbr) for ngbr in g.neighbors(node)])
            for e in neighbor_edges:
                if(live_dict[e] > 0):
                    new_ones.append(e[1])
                    live_dict[e] -= 1
                    num_live_edges -= 1
            #success = np.random.uniform(0,1,len(g.neighbors(node,mode="out"))) < p
            #new_ones += list(np.extract(success, g.neighbors(node,mode="out")))

        new_active = list(set(new_ones) - set(A))

        # Add newly activated nodes to the set of activated nodes
        A += new_active
            
        #spread.append(len(A))
        
    #return(np.mean(spread))
    return len(A)

# Greedy

In [None]:
def greedy(g,k,P):
    """
    Input:  graph object, number of seed nodes
    Output: optimal seed set, resulting spread
    """

    S, spread = [], []
    
    # Find k nodes with largest marginal gain
    for _ in range(k):

        # Loop over nodes that are not yet in seed set to find biggest marginal gain
        best_spread = 0
        for j in set(range(g.vcount()))-set(S):

            # Get the spread
            s = IC(g,S + [j],P)

            # Update the winning node and spread so far
            if s > best_spread:
                best_spread, node = s, j

        # Add the selected node to the seed set
        S.append(node)
        
        # Add estimated spread and elapsed time
        spread.append(best_spread)
        #timelapse.append(time.time() - start_time)

    #return(S,spread,timelapse)
    return(S,spread)

# CELF

In [None]:
def celf(g,k,P):  
    """
    Input:  graph object, number of seed nodes
    Output: optimal seed set, resulting spread, time for each iteration
    """
      
    # --------------------
    # Find the first node with greedy algorithm
    # --------------------
    
    # Calculate the first iteration sorted list
    #start_time = time.time() 
    marg_gain = [IC(g,[node],P) for node in range(g.vcount())]

    # Create the sorted list of nodes and their marginal gain 
    Q = sorted(zip(range(g.vcount()),marg_gain), key=lambda x: x[1],reverse=True)

    # Select the first node and remove from candidate list
    S, spread, SPREAD = [Q[0][0]], Q[0][1], [Q[0][1]]
    #Q, LOOKUPS, timelapse = Q[1:], [g.vcount()], [time.time()-start_time]
    Q, LOOKUPS, timelapse = Q[1:], [g.vcount()], [time.time()-start_time]
    
    # --------------------
    # Find the next k-1 nodes using the list-sorting procedure
    # --------------------
    
    for _ in range(k-1):    

        check, node_lookup = False, 0
        
        while not check:
            
            # Count the number of times the spread is computed
            node_lookup += 1
            
            # Recalculate spread of top node
            current = Q[0][0]
            
            # Evaluate the spread function and store the marginal gain in the list
            Q[0] = (current,IC(g,S+[current],P) - spread)

            # Re-sort the list
            Q = sorted(Q, key = lambda x: x[1], reverse = True)

            # Check if previous top node stayed on top after the sort
            check = (Q[0][0] == current)

        # Select the next node
        spread += Q[0][1]
        S.append(Q[0][0])
        SPREAD.append(spread)
        LOOKUPS.append(node_lookup)
        #timelapse.append(time.time() - start_time)

        # Remove the selected node from the list
        Q = Q[1:]

    #return(S,SPREAD,timelapse,LOOKUPS)
    return(S,SPREAD)