In [1]:
# !pip install causalnex
# !pip install igraph
import utils, os, shutil
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import matplotlib.pyplot as plt
from lingam.utils import make_dot

utils.set_random_seed(123)

In [2]:
def dataset_setting(n=2000, d=5, graph_type='ER', sem_type='gauss'):
    # d (int): num of nodes
    # e (int): expected num of edges 預期的邊數
    # graph_type (str): ER, SF, BP
    # gauss 常態分佈
    
    #e = d // 2
    e = round(d * 0.6)

    w_ranges = ((-0, -1.0), (0, 1))
    # noise_scale = 1
    # noise_scale = 0.15
    noise_scale = 1
    expt_name = "./datasets/nonlinear/n_{}_d_{}_e_{}_{}".format(n, d, e, graph_type)
    
    return create_dataset(n, d, e, graph_type, sem_type, w_ranges, noise_scale, expt_name)

In [3]:
def create_dataset(n, d, e, graph_type, sem_type, w_ranges, noise_scale, expt_name):
    
    if os.path.isdir(expt_name):
        shutil.rmtree(expt_name)

    os.mkdir(expt_name)
    os.chmod(expt_name, 0o777)
    perf = defaultdict(list)
    
    #Simulate random DAG with some expected number of edges.
    #Args:
    #    d (int): num of nodes
    #    e (int): expected num of edges
    #    graph_type (str): ER, SF, BP
    #Returns:
    #    B (np.ndarray): [d, d] binary adj matrix of DAG
    B_true = utils.simulate_dag(d, e, graph_type)


    #Simulate SEM parameters for a DAG.
    #Args:
    #    B (np.ndarray): [d, d] binary adj matrix of DAG
    #    w_ranges (tuple): disjoint weight ranges
    #Returns:
    #    W (np.ndarray): [d, d] weighted adj matrix of DAG
    W_true = utils.simulate_parameter(B_true, w_ranges=w_ranges)

    W_true_fn = os.path.join(expt_name, 'W_true.csv')
    np.savetxt(W_true_fn, W_true, delimiter=',')


    #Simulate samples from linear SEM with specified type of noise.
    #For uniform, noise z ~ uniform(-a, a), where a = noise_scale.
    #Args:
    #    W (np.ndarray): [d, d] weighted adj matrix of DAG
    #    n (int): num of samples, n=inf mimics population risk
    #    sem_type (str): gauss, exp, gumbel, uniform, logistic, poisson
    #    noise_scale (np.ndarray): scale parameter of additive noise, default all ones
#     X = utils.simulate_linear_sem(W_true, n, sem_type, noise_scale=noise_scale)

    """Simulate samples from nonlinear SEM.
    Args:
        B (np.ndarray): [d, d] binary adj matrix of DAG
        n (int): num of samples
        sem_type (str): mlp, mim, gp, gp-add
        noise_scale (np.ndarray): scale parameter of additive noise, default all ones
    Returns:
        X (np.ndarray): [n, d] sample matrix
    """
    X = utils.simulate_nonlinear_sem(W_true, n, sem_type, noise_scale=noise_scale)

    X_fn = os.path.join(expt_name, 'X.csv')
    np.savetxt(X_fn, X, delimiter=',')
    return B_true, W_true, X

In [4]:
## ER

# # samples 2000
# dataset_setting(n=2000, d=300, graph_type='ER', sem_type='gauss')
# dataset_setting(n=2000, d=100, graph_type='ER', sem_type='gauss')
# dataset_setting(n=2000, d=20, graph_type='ER', sem_type='gauss')

# # samples 500
# dataset_setting(n=500, d=300, graph_type='ER', sem_type='gauss')
# dataset_setting(n=500, d=100, graph_type='ER', sem_type='gauss')
# B_true, W_true, X = dataset_setting(n=500, d=20, graph_type='ER', sem_type='gauss')

#nonliner

# samples 2000
dataset_setting(n=2000, d=300, graph_type='ER', sem_type='mim')
dataset_setting(n=2000, d=100, graph_type='ER', sem_type='mim')
dataset_setting(n=2000, d=20, graph_type='ER', sem_type='mim')

# samples 500
dataset_setting(n=500, d=300, graph_type='ER', sem_type='mim')
dataset_setting(n=500, d=100, graph_type='ER', sem_type='mim')
B_true, W_true, X = dataset_setting(n=500, d=20, graph_type='ER', sem_type='mim')

# draw graph

dot = make_dot(W_true)

# Save png
dot.format = 'png'

dot.render('./dag')

from IPython.display import Image
Image(filename='dag.png') 

TypeError: 'int' object is not subscriptable

In [None]:
## SF

# # samples 2000
# dataset_setting(n=2000, d=300, graph_type='SF', sem_type='gauss')
# dataset_setting(n=2000, d=100, graph_type='SF', sem_type='gauss')
# dataset_setting(n=2000, d=20, graph_type='SF', sem_type='gauss')

# # samples 500
# dataset_setting(n=500, d=300, graph_type='SF', sem_type='gauss')
# dataset_setting(n=500, d=100, graph_type='SF', sem_type='gauss')
# B_true, W_true, X = dataset_setting(n=500, d=20, graph_type='SF', sem_type='gauss')


# samples 2000
dataset_setting(n=2000, d=300, graph_type='SF', sem_type='mim')
dataset_setting(n=2000, d=100, graph_type='SF', sem_type='mim')
dataset_setting(n=2000, d=20, graph_type='SF', sem_type='mim')

# samples 500
dataset_setting(n=500, d=300, graph_type='SF', sem_type='mim')
dataset_setting(n=500, d=100, graph_type='SF', sem_type='mim')
B_true, W_true, X = dataset_setting(n=500, d=20, graph_type='SF', sem_type='mim')

# draw graph

dot = make_dot(W_true)

# Save png
dot.format = 'png'

dot.render('./dag')

from IPython.display import Image
Image(filename='dag.png') 

In [None]:
B_true, W_true, X = dataset_setting(n=500, d=5, graph_type='ER', sem_type='gauss')

# draw graph

dot = make_dot(W_true)

# Save png
dot.format = 'png'

dot.render('./dag')

from IPython.display import Image
Image(filename='dag.png') 

In [None]:
B_true

In [None]:
W_true

In [None]:
X.shape