In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import dgl
from dgl.nn import ChebConv


import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import pandas as pd
import numpy.linalg as alg

import os
from tqdm import tqdm
from functools import reduce
import argparse

DGL backend not selected or invalid.  Assuming PyTorch for now.
  from .autonotebook import tqdm as notebook_tqdm


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [2]:
def node_connects_cluster(node):
    return set(map(lambda x: inverse_cluster_dict[x], list(g[node]))).union(set([inverse_cluster_dict[node]]))

def po_linear_model(graph, alpha=1, beta=1, sigma=0.1, gamma=2):    
    for i in graph.nodes:
        graph.nodes[i]["y"] = alpha + beta * graph.nodes[i]["z"] + sigma * np.random.normal() + gamma * sum([graph.nodes[ngbr]['z'] for ngbr in graph[i]])/graph.degree[i]  # 

def po_multiplicative_model(graph, alpha=1, sigma=0.1, delta=1, gamma=2): 
    for i in graph.nodes:
        graph.nodes[i]["y"] = ( (alpha + sigma * np.random.normal()) * graph.degree[i]/avg_deg )  *  (1 + delta * graph.nodes[i]["z"] + gamma * sum([graph.nodes[ngbr]['z'] for ngbr in graph[i]]) / len(graph[i]) )


def po_linear_model_square_expo(graph, alpha=1, beta=1, sigma=0.1, gamma=2):    
    for i in graph.nodes:
        graph.nodes[i]["y"] = alpha + beta * graph.nodes[i]["z"] + sigma * np.random.normal() +  gamma * (sum([graph.nodes[ngbr]['z'] for ngbr in graph[i]])/graph.degree[i])**2   




In [3]:
path = 'Dataset/socfb-Stanford3.mtx'

df = pd.read_table(path, skiprows=1, names = ["source", "target"], sep=" ")
g = nx.from_pandas_edgelist(df)

# calculate basic elements
num_nodes = g.number_of_nodes()
num_edges = g.number_of_edges()
degs = [g.degree[i] for i in g.nodes]
avg_deg = sum(degs)/len(degs)

# clustering
# generally, we fix the outcome of clustering
clusters = nx_comm.louvain_communities(g, seed = 10, resolution=5)
clusters = sorted(clusters, key = len, reverse=True)
cluster_sizes = list(map(len, clusters))
num_cluster = len(clusters)

# dict: from node to its cluster
inverse_cluster_dict = {
    node: cl for cl in range(num_cluster) for node in clusters[cl]
}

# dict: from node to its connected cluster
node_to_connected_clusters = {
    node: node_connects_cluster(node) for node in range(1, num_nodes + 1)
}

In [4]:
A = np.array(nx.adjacency_matrix(g).todense(), dtype = np.float64)
deg_array = np.array(list(dict(g.degree).values()))

D_inv_A = np.zeros_like(A)

for i in range(num_nodes):
    D_inv_A[i] = A[i] / deg_array[i]

multi_hop_A = torch.load("A_2hop.pkl")


In [5]:
# set diagonal of 2-hop adjacency to 0
for i in range(num_nodes):
    multi_hop_A[i, i] = 0

node_list = list(g.nodes.keys())

def po_2hop_linear_model(graph, z_vec, alpha=1, beta=1, sigma=0.1, gamma=1, r1=1, r2=0.5):        
    y_vec = alpha + beta * z_vec + sigma * np.random.normal(size=(num_nodes, 1)) + gamma * (
    r1 * np.matmul(D_inv_A, z_vec) + r2 * np.matmul(multi_hop_A, z_vec)
)
    for i in range(num_nodes):
        graph.nodes[node_list[i]]["y"] = y_vec[i][0]
        graph.nodes[node_list[i]]["z"] = z_vec[i][0]

In [6]:
# role of merge data

num_repeat = 1000
# ramps = [0.5, 0.5, 0.5, 0.5, 0.5]
ramps = [0.02, 0.05, 0.1, 0.25, 0.5]

In [None]:

ht_array = np.zeros((num_repeat, len(ramps)))

for seed in tqdm(range(num_repeat)):
    np.random.seed(seed)   

    rollout_index = np.random.uniform(0, 1, size=(num_nodes))   
    
    for num_step in range(len(ramps)):        
        p_list = ramps[num_step:]
        ht_list = []
        for p in p_list:
            z = (rollout_index < np.quantile(rollout_index, p))            
            nx.set_node_attributes(g, 0, "z")
            nx.set_node_attributes(g, {unit:1 for unit in range(num_nodes) if z[unit]}, "z")   
                
            po_linear_model(g, gamma = 1)
            
            # HT estimator
            mo1, mo0 = 0, 0
            for unit in g.nodes:
                if g.nodes[unit]['z'] == 1:
                    mo1 += g.nodes[unit]['y']
                else:
                    mo0 += g.nodes[unit]['y']
            HT = (mo1/p - mo0/(1-p))/num_nodes
            ht_list.append(HT)
        
        ht_array[seed, num_step] = sum(ht_list)/len(ht_list)
                                    
    torch.save(ht_array, "Result/ht_ramp_unit.pkl")

 14%|█▍        | 139/1000 [18:33<1:55:15,  8.03s/it]

# Incremental

In [13]:
ramps = [0.02, 0.05, 0.1, 0.25, 0.5]

In [15]:
ht_array = np.zeros((num_repeat, len(ramps)))

for seed in tqdm(range(num_repeat)):
    np.random.seed(seed)   

    rollout_index = np.random.uniform(0, 1, size=(num_nodes))   
    
    for num_step in range(len(ramps)):        
        p = ramps[num_step]        
        z = (rollout_index < np.quantile(rollout_index, p))
        
        nx.set_node_attributes(g, 0, "z")
        nx.set_node_attributes(g, {unit:1 for unit in range(num_nodes) if z[unit]}, "z")   

        po_linear_model(g, gamma = 1)
        
        # HT estimator
        mo1, mo0 = 0, 0
        for unit in g.nodes:
            if g.nodes[unit]['z'] == 1:
                mo1 += g.nodes[unit]['y']
            else:
                mo0 += g.nodes[unit]['y']
        HT = (mo1/p - mo0/(1-p))/num_nodes
        ht_list.append(HT)
    
    ht_array[seed, num_step] = HT
                                
    torch.save(ht_array, "Result/ht_incre_unit.pkl")

100%|██████████| 1000/1000 [45:38<00:00,  2.74s/it]


# Sqaure Exposure

In [9]:
ramps = [0.5] # single step

In [10]:

ht_array = np.zeros((num_repeat, len(ramps)))

for seed in tqdm(range(num_repeat)):
    np.random.seed(seed)   

    rollout_index = np.random.uniform(0, 1, size=(num_nodes))   
    
    for num_step in range(len(ramps)):        
        p_list = ramps[num_step:]
        ht_list = []
        for p in p_list:
            z = (rollout_index < np.quantile(rollout_index, p))            
            nx.set_node_attributes(g, 0, "z")
            nx.set_node_attributes(g, {unit:1 for unit in range(num_nodes) if z[unit]}, "z")                                   
            po_linear_model(g, gamma = 1)
            
            # HT estimator
            mo1, mo0 = 0, 0
            for unit in g.nodes:
                if g.nodes[unit]['z'] == 1:
                    mo1 += g.nodes[unit]['y']
                else:
                    mo0 += g.nodes[unit]['y']
            HT = (mo1/p - mo0/(1-p))/num_nodes
            ht_list.append(HT)
        
        ht_array[seed, num_step] = sum(ht_list)/len(ht_list)
                                    
    torch.save(ht_array, "Result/Square_ht_ramp_unit.pkl")

100%|██████████| 1000/1000 [09:10<00:00,  1.82it/s]


In [11]:
ht_array.mean(axis=0)

array([0.99952315])

In [12]:
ht_array.std(axis=0)

array([0.00371132])