In [3]:
import json
import numpy as np
import networkx as nx
import gurobipy as gp
from gurobipy import *
import random
import copy
np.random.seed(0)

In [4]:
print(gp.gurobi.version())

(9, 5, 1)


## Construct network graph from Reddit data

Refer to http://snap.stanford.edu/data/web-RedditNetworks.html

In [5]:
# read in networks for subreddit /r/politics
with open("politics.json") as fp:
    month_nets = json.load(fp)

# see who the user sn00gan replied to in the first month
print(month_nets[0]["sn00gan"])

['TedTheGreek_Atheos', 'ptwonline', 'sn00gan', 'TiiziiO', 'caged_raptor']


In [6]:
# Create graph
G = nx.DiGraph()
for i in range(len(month_nets)):
    for k,v in month_nets[i].items():
        for vs in v:
            G.add_edge(k, vs)

In [7]:
len(G.edges)

1688750

In [8]:
len(G.nodes)

119780

## Cascade Functions

In [9]:
def simulate_cascade(G, p_lb=0, p_ub=1):
    """
    Simulate a cascade in graph by flipping coins for all edges in G to construct a subgraph G'
    
    Parameters:
    G (networkx.Graph): The graph on which to run the model.
    
    Returns:
    G_prime (networkx.Graph): A subgraph of G
    """
    
    G_prime = copy.deepcopy(G)
    
    removed_edges = []
    for e in G.edges:
        p = np.random.uniform(p_lb, p_ub)
        u = np.random.uniform()
        if p < u:
            removed_edges.append(e)
    
    G_prime.remove_edges_from(removed_edges)
    
    
    return G_prime

In [11]:
# G_p = simulate_cascade(DAG2)
# len(G_p.edges)

In [100]:
def simulate_fixed_y_cascade(G, ys, p_lb=0, p_ub=1):
    '''
    Simulate a cascade with fixed initial set of nodes
    '''
    # Set all nodes as not activated
    activated = set()
    # Add the seed nodes to the activated set
    activated.update(ys)
    # Initialize the newly activated nodes list with the seed nodes
    newly_activated = set(ys)
    
    # Run the model until there are no more newly activated nodes
    while len(newly_activated) != 0:
        # Get the neighbors of all newly activated nodes
        neighbors = set()
        for node in newly_activated:
            neighbors.update(set(G.successors(node)))
        # Remove already activated neighbors
        neighbors -= activated
        # Check for each neighbor if it gets activated
        newly_activated = set()
        for node in neighbors:
            p = np.random.uniform(p_lb, p_ub)
            u = np.random.uniform()
            if p < u:
                newly_activated.add(node)
        # Add newly activated nodes to the activated set
        activated.update(newly_activated)
    
    return len(activated)

In [13]:
# simulate_fixed_y_cascade(DAG2, np.arange(10))

## MIP Model

In [58]:
def solve_max_spread(graphs, B, C):
    """
    Solves the MIP model for maximizing the spread of cascades using network design as presented in the paper
    "Maximizing the Spread of Cascades Using Network Design" by Sheldon et al. for the given graph and budget.
    
    Parameters:
    graphs (list [networkx.Graph]): List of training cascades graphs, which are subgraphs of the original network
    budget (int): The budget of edges that can be added to the graph.
    costs (list): Cost of each action
    """
    # Initialize the MIP model
    model = gp.Model('maximize_spread_cascades')
    
    # Number of training cascades
    N = len(graphs)

    # Initialize the decision variables
    x = {}
    y = {}
    
    nodes = graphs[0].nodes
    
    for k in range(N):
        for v in nodes:
            x[k, v] = model.addVar(vtype=GRB.CONTINUOUS, lb = 0, ub = 1, name=f'x_{k}_{v}')
    for v in nodes:
        y[v] = model.addVar(vtype=GRB.BINARY, name=f'y_{v}') # Asssuming each action corresponds to purchasing one node.

    # Initialize the objective function to maximize the expected spread
    obj = 1/N * gp.quicksum(x[k, v] for k in range(N) for v in nodes)
    model.setObjective(obj, GRB.MAXIMIZE)
    
    # Add the budget constraint
    model.addConstr(gp.quicksum(C[v]*y[v] for v in nodes) <= B, name='budget_constraint')
    
    # Add the edge constraints
    model.addConstrs((x[k, v] <= gp.quicksum(x[k, u] for u in graphs[k].predecessors(v)) for k in range(N) for v in nodes), name=f'edge_constraint_{k}_{v}')

    # Add the coverage constraint
    model.addConstrs((x[k, v] <= y[v] for v in nodes for k in range(N)), name=f'coverage_constraint_{k}_{v}')

    # Optimize the model
    model.optimize()
    
    # Get Objective value
    obj = model.getObjective().getValue()
    ys = [v for v in nodes if y[v].x == 1]
    
    return model, obj, ys

## SAA Algo

In [15]:
def SAA_phase1(input_graph, B, C, num_sample = 50, num_train = 10,):
    sol_lst = []
    obj_ub_lst = []
    for _ in range(num_sample):
        # For each sample, generate num_train of training cascades.
        graphs = [simulate_cascade(input_graph) for _ in range(num_train)]
        model, obj, ys = solve_max_spread(graphs, B, C)
        sol_lst.append(ys)
        obj_ub_lst.append(obj)
        
    return sol_lst, obj_ub_lst

In [16]:
def SAA_phase2(input_graph, sol_lst, num_sample = 50, num_train = 10, num_valid = 500, num_test = 500):
    # Choose the best solution by re-estimating using num_valid training cascades.
    obj_lst = []
    for i in range(num_sample):
        graphs = [simulate_cascade(input_graph) for _ in range(num_valid)]
        obj = simulate_fixed_y_cascade(graphs, sol_lst[i])
        obj_lst.append(obj)

    y_opt = sol_lst[np.argmax(obj_lst)]
    # Generate num_test training cascades fro testing the best solution.
    graphs = [simulate_cascade(input_graph) for _ in range(num_test)]
    obj_opt = simulate_fixed_y_cascade(graphs, y_opt)
    
    return y_opt, obj_opt

## Experiment

### Random DAG

In [19]:
# # Create a random graph
# G = nx.gnp_random_graph(30000, 0.01, seed=0, directed=True)
# # Create a directed acyclic graph by only keeping edges that point from lower (newer) indices to higher (older)
# # There's a directed edge between paper i and paper j if paper i cites paper j.
# DAG = nx.DiGraph([(u,v) for (u,v) in G.edges() if u<v])
# nx.is_directed_acyclic_graph(DAG)
# len(DAG.edges)

In [87]:
# Create a random graph (smaller instance)
G2 = nx.gnp_random_graph(500, 0.2, seed=0, directed=True)

# Create a directed acyclic graph by only keeping edges that point from lower (newer) indices to higher (older)
DAG2 = nx.DiGraph([(u,v) for (u,v) in G2.edges() if u<v])
nx.is_directed_acyclic_graph(DAG2)

len(DAG2.edges)

25077

In [89]:
B = 3000
np.random.seed(0)
C = [np.random.uniform(0, 100) for _ in range(len(DAG2))]

In [90]:
# SAA_phase1(DAG2, B, C, num_sample = 1, num_train = 10)

In [91]:
graphs = [simulate_cascade(DAG2) for _ in range(10)]
model, obj, ys = solve_max_spread(graphs, B, C)

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (mac64[x86])
Thread count: 2 physical cores, 4 logical processors, using up to 4 threads
Optimize a model with 10001 rows, 5500 columns and 141146 nonzeros
Model fingerprint: 0x2b3d8f01
Variable types: 5000 continuous, 500 integer (500 binary)
Coefficient statistics:
  Matrix range     [5e-01, 1e+02]
  Objective range  [1e-01, 1e-01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+03, 3e+03]
Found heuristic solution: objective -0.0000000
Presolve removed 10001 rows and 5500 columns
Presolve time: 0.27s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.33 seconds (0.17 work units)
Thread count was 1 (of 4 available processors)

Solution count 1: -0 
No other solutions better than -0

Optimal solution found (tolerance 1.00e-04)
Best objective -0.000000000000e+00, best bound -0.000000000000e+00, gap 0.0000%


In [101]:
print('len_ys', len(ys))
print('ys', ys)
simulate_fixed_y_cascade(DAG2, ys)

len_ys 49
ys [433, 451, 460, 475, 482, 488, 491, 494, 498, 499, 459, 461, 485, 495, 496, 497, 455, 458, 470, 471, 480, 493, 468, 472, 474, 477, 481, 487, 438, 462, 463, 478, 489, 450, 464, 465, 490, 448, 454, 469, 476, 442, 479, 483, 484, 456, 486, 492, 473]


54