In [2]:
# Causal Discovery using PC algorithm on Tabular Data
from causalai.models.tabular.pc import PCSingle, PC
from causalai.models.common.CI_tests.partial_correlation import PartialCorrelation
from causalai.data.data_generator import DataGenerator # for generating data randomly
from causalai.models.common.prior_knowledge import PriorKnowledge
from causalai.data.tabular import TabularData # tabular data object
from causalai.data.transforms.time_series import StandardizeTransform

#### Generate a ground truth causal graph and data radom using it, for illustration purposes
fn = lambda x:x # non-linearity
coef = 0.1
# Structural equation model (SEM) defining the ground truth causal graph
sem = {
        'a': [], 
        'b': [('a', coef, fn), ('f', coef, fn)], # b = coef* fn(a) + coef* fn(f) + noise
        'c': [('b', coef, fn), ('f', coef, fn)],
        'd': [('b', coef, fn), ('g', coef, fn)],
        'e': [('f', coef, fn)], 
        'f': [],
        'g': [],
        }
T = 5000 # number of samples
data_array, var_names, graph_gt = DataGenerator(sem, T=T, seed=0, discrete=False)
# data_array is a (T x 7) NumPy array
# var_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
# graph_gt is a Python dictionary

### standardize data and create a CausalAI Tabular data object
StandardizeTransform_ = StandardizeTransform()
StandardizeTransform_.fit(data_array)
data_trans = StandardizeTransform_.transform(data_array)
data_obj = TabularData(data_trans, var_names=var_names)

### Run PC algorithm

# provide optional (use None) prior knowledge saying b->a is forbidden.
prior_knowledge = PriorKnowledge(forbidden_links={'a': ['b']}) 

pvalue_thres = 0.01
CI_test = PartialCorrelation() 
pc = PC(
        data=data_obj,
        prior_knowledge=prior_knowledge,
        CI_test=CI_test,
        use_multiprocessing=False
        )
result = pc.run(pvalue_thres=pvalue_thres, max_condition_set_size=2)

# print estimated causal graph
graph_est={n:[] for n in result.keys()}
for key in result.keys():
    parents = result[key]['parents']
    graph_est[key].extend(parents)
    print(f'{key}: {parents}')

########### prints
# a: []
# b: ['d', 'a', 'c', 'f']
# c: ['f', 'b']
# d: ['g', 'b']
# e: ['f']
# f: ['e', 'b', 'c']
# g: ['d']
###########

### Evaluate the estimated causal graph given we have ground truth in this case
from causalai.misc.misc import plot_graph, get_precision_recall

precision, recall, f1_score = get_precision_recall(graph_est, graph_gt)
print(f'Precision {precision:.2f}, Recall: {recall:.2f}, F1 score: {f1_score:.2f}')
# Precision 0.64, Recall: 1.00, F1 score: 0.67

2024-07-12 12:52:48,751	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


a: []
b: ['c', 'a', 'd', 'f']
c: ['b', 'f']
d: ['g', 'b']
e: ['f']
f: ['c', 'b', 'e']
g: ['d']
Precision 0.64, Recall: 1.00, F1 score: 0.67
