In [286]:
import json
import numpy as np
import networkx as nx
import gurobipy as gp
from gurobipy import *
import random
import copy
np.random.seed(0)

In [287]:
print(gp.gurobi.version())

(9, 5, 1)


## Cascade Functions

In [288]:
def simulate_cascade(G, p_lb=0, p_ub=1):
    """
    Simulate a cascade in graph by flipping coins for all edges in G to construct a subgraph G'
    
    Parameters:
    G (networkx.Graph): The graph on which to run the model.
    
    Returns:
    G_prime (networkx.Graph): A subgraph of G
    """
    
    G_prime = copy.deepcopy(G)
    
    removed_edges = []
    for e in G.edges:
        u = np.random.uniform()
        if u < G[e[0]][e[1]]['weight']:
            removed_edges.append(e)
    
    G_prime.remove_edges_from(removed_edges)
    
    
    return G_prime

In [309]:
def simulate_fixed_y_cascade(G, ys):
    '''
    Simulate a cascade with fixed initial set of nodes
    '''
    # Set all nodes as not activated
    activated = set()
    # Add the seed nodes to the activated set
    activated.update(set(ys))
    # Initialize the newly activated nodes list with the seed nodes
    newly_activated = set(ys)
    
    # Run the model until there are no more newly activated nodes
    while len(newly_activated) != 0:
        temp_activated = set()
        for v in newly_activated:
            # Check for each successor if it gets activated
            for w in G.successors(v):
                if w not in activated:
                    u = np.random.uniform()
                    if u < G[v][w]['weight']:
                        temp_activated.add(w)
        # Add newly activated nodes to the activated set
        newly_activated = temp_activated
        activated.update(newly_activated)
    
    return activated

## MIP Model

In [361]:
def solve_max_spread(graphs, B, C):
    """
    Solves the MIP model for maximizing the spread of cascades using network design as presented in the paper
    "Maximizing the Spread of Cascades Using Network Design" by Sheldon et al. for the given graph and budget.
    
    Parameters:
    graphs (list [networkx.Graph]): List of training cascades graphs, which are subgraphs of the original network
    budget (int): The budget of edges that can be added to the graph.
    costs (list): Cost of each action
    """
    # Initialize the MIP model
    model = gp.Model('maximize_spread_cascades')
    
    # Number of training cascades
    N = len(graphs)

    # Initialize the decision variables
    x = {}
    y = {}
    
    nodes = np.sort(list(graphs[0].nodes))
    
    for k in range(N):
        for v in nodes:
            x[k, v] = model.addVar(vtype=GRB.CONTINUOUS, lb = 0, ub = 1, name=f'x_{k}_{v}')
    for v in nodes:
        y[v] = model.addVar(vtype=GRB.BINARY, name=f'y_{v}') # Asssuming each action corresponds to purchasing one node.

    # Initialize the objective function to maximize the expected spread
    obj = 1/N * gp.quicksum(x[k, v] for k in range(N) for v in nodes)
    model.setObjective(obj, GRB.MAXIMIZE)
    
    # Add the budget constraint
    model.addConstr(gp.quicksum(C[v]*y[v] for v in nodes) <= B, name='budget_constraint')
    
    # Add the edge constraints
    for k in range(N):
        for v in nodes:
            preds = set(graphs[k].predecessors(v))
            if len(preds) != 0:
                model.addConstr((x[k, v] <= gp.quicksum(x[k, u] for u in preds)), name=f'edge_constraint_{k}_{v}')

    # Add the coverage constraint
    model.addConstrs((x[k, v] <= y[v] for v in nodes for k in range(N)), name=f'coverage_constraint_{k}_{v}')
    
    model.update()

    # Optimize the model
    model.optimize()
    
    # Get Objective value
    obj = model.getObjective().getValue()
    ys = [v for v in nodes if y[v].x == 1]
    return model, obj, ys

## SAA Algo

In [292]:
def SAA_phase1(input_graph, B, C, num_sample = 50, num_train = 10):
    sol_lst = []
    obj_ub_lst = []
    for i in range(num_sample):
        np.random.seed(i)
        # For each sample, generate num_train of training cascades.
        graphs = [simulate_cascade(input_graph) for _ in range(num_train)]
        model, obj, ys = solve_max_spread(graphs, B, C)
        sol_lst.append(ys)
        obj_ub_lst.append(obj)
        
    return sol_lst, obj_ub_lst

In [293]:
def SAA_phase2(input_graph, sol_lst, num_sample = 50, num_train = 10, num_valid = 500, num_test = 500):
    # Choose the best solution by re-estimating using num_valid training cascades.
    obj_lst = []
    for i in range(num_sample):
        graphs = [simulate_cascade(input_graph) for _ in range(num_valid)]
        obj = simulate_fixed_y_cascade(graphs, sol_lst[i])
        obj_lst.append(obj)

    y_opt = sol_lst[np.argmax(obj_lst)]
    # Generate num_test training cascades fro testing the best solution.
    graphs = [simulate_cascade(input_graph) for _ in range(num_test)]
    obj_opt = simulate_fixed_y_cascade(graphs, y_opt)
    
    return y_opt, obj_opt

## Experiment

### Random DAG

In [294]:
# # Create a random graph
# G = nx.gnp_random_graph(30000, 0.01, seed=0, directed=True)
# # Create a directed acyclic graph by only keeping edges that point from lower (newer) indices to higher (older)
# # There's a directed edge between paper i and paper j if paper i cites paper j.
# DAG = nx.DiGraph([(u,v) for (u,v) in G.edges() if u<v])
# nx.is_directed_acyclic_graph(DAG)
# len(DAG.edges)

In [337]:
# Create a random graph (smaller instance)
G2 = nx.gnp_random_graph(5, 0.5, seed=0, directed=True)

# Create a directed acyclic graph by only keeping edges that point from lower (newer) indices to higher (older)
DAG2 = nx.DiGraph()
DAG2.add_nodes_from(G2.nodes)
DAG2.add_edges_from([(u,v) for (u,v) in G2.edges() if u<v])
print('Is the graph acyclic:', nx.is_directed_acyclic_graph(DAG2))
print('Num edges: ', len(DAG2.edges))

Is the graph acyclic: True
Num edges:  5


In [338]:
# Add random activation probabilities for each edge
np.random.seed(0)
for e in DAG2.edges():
    DAG2[e[0]][e[1]]['weight'] = np.random.uniform(0, 1)

In [339]:
# Input parameters - budget and action cost
B = 300
np.random.seed(0)
# C = [np.random.uniform(0, 100) for _ in range(len(DAG2))]
C = 100 * np.ones(len(DAG2))

In [341]:
# sol_lst, obj_ub_lst = SAA_phase1(DAG2, B, C, num_sample = 20, num_train = 10)

In [362]:
graphs = [simulate_cascade(DAG2) for _ in range(1)]
model, obj, ys = solve_max_spread(graphs, B, C)

Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (mac64[x86])
Thread count: 2 physical cores, 4 logical processors, using up to 4 threads
Optimize a model with 9 rows, 10 columns and 21 nonzeros
Model fingerprint: 0x5d976ded
Variable types: 5 continuous, 5 integer (5 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+02, 3e+02]
Found heuristic solution: objective -0.0000000
Presolve removed 5 rows and 5 columns
Presolve time: 0.00s
Presolved: 4 rows, 5 columns, 11 nonzeros
Variable types: 0 continuous, 5 integer (5 binary)
Found heuristic solution: objective 3.0000000

Root relaxation: cutoff, 0 iterations, 0.00 seconds (0.00 work units)

Explored 1 nodes (0 simplex iterations) in 0.13 seconds (0.00 work units)
Thread count was 4 (of 4 available processors)

Solution count 2: 3 -0 

Optimal solution found (tolerance 1.00e-04)
Best objective 3.000000000000e+00, best bound 3.0000000000

In [363]:
print(model.display())

Maximize
  <gurobi.LinExpr: x_0_0 + x_0_1 + x_0_2 + x_0_3 + x_0_4>
Subject To
budget_constraint: <gurobi.LinExpr: 100.0 y_0 + 100.0 y_1 + 100.0 y_2 + 100.0 y_3 +
 100.0 y_4> <= 300
  edge_constraint_0_2: <gurobi.LinExpr: -1.0 x_0_1 + x_0_2> <= 0
  edge_constraint_0_3: <gurobi.LinExpr: -1.0 x_0_0 + x_0_3> <= 0
  edge_constraint_0_4: <gurobi.LinExpr: -1.0 x_0_3 + x_0_4> <= 0
  coverage_constraint_0_4[0,0]: <gurobi.LinExpr: x_0_0 + -1.0 y_0> <= 0
  coverage_constraint_0_4[1,0]: <gurobi.LinExpr: x_0_1 + -1.0 y_1> <= 0
  coverage_constraint_0_4[2,0]: <gurobi.LinExpr: x_0_2 + -1.0 y_2> <= 0
  coverage_constraint_0_4[3,0]: <gurobi.LinExpr: x_0_3 + -1.0 y_3> <= 0
  coverage_constraint_0_4[4,0]: <gurobi.LinExpr: x_0_4 + -1.0 y_4> <= 0
Bounds
  0 <= x_0_0 <= 1
  0 <= x_0_1 <= 1
  0 <= x_0_2 <= 1
  0 <= x_0_3 <= 1
  0 <= x_0_4 <= 1
Binaries
  ['y_0', 'y_1', 'y_2', 'y_3', 'y_4']
None


In [364]:
graphs[0].edges

OutEdgeView([(0, 3), (1, 2), (3, 4)])

In [369]:
ys

[0, 1, 2]

In [370]:
model.getVars()

[<gurobi.Var x_0_0 (value 1.0)>,
 <gurobi.Var x_0_1 (value 1.0)>,
 <gurobi.Var x_0_2 (value 1.0)>,
 <gurobi.Var x_0_3 (value 0.0)>,
 <gurobi.Var x_0_4 (value 0.0)>,
 <gurobi.Var y_0 (value 1.0)>,
 <gurobi.Var y_1 (value 1.0)>,
 <gurobi.Var y_2 (value 1.0)>,
 <gurobi.Var y_3 (value 0.0)>,
 <gurobi.Var y_4 (value 0.0)>]

In [368]:
# print('ys', sol_lst)
activated = simulate_fixed_y_cascade(DAG2, ys)
activated

{0, 1, 2, 3, 4}

### Construct network graph from Reddit data

Refer to http://snap.stanford.edu/data/web-RedditNetworks.html

In [5]:
# read in networks for subreddit /r/politics
with open("politics.json") as fp:
    month_nets = json.load(fp)

# see who the user sn00gan replied to in the first month
print(month_nets[0]["sn00gan"])

['TedTheGreek_Atheos', 'ptwonline', 'sn00gan', 'TiiziiO', 'caged_raptor']


In [6]:
# Create graph
G = nx.DiGraph()
for i in range(len(month_nets)):
    for k,v in month_nets[i].items():
        for vs in v:
            G.add_edge(k, vs)

In [7]:
len(G.edges)

1688750

In [8]:
len(G.nodes)

119780

In [124]:
G = nx.Graph()  # or DiGraph, etc
nx.add_path(G, [0, 1, 2, 3])
G.remove_edges_from([(0, 1)])
list(G.nodes)
list(G.edges)

[(1, 2), (2, 3)]