In [1]:
# Imports
import numpy as np
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
# %matplotlib notebook
## use `%matplotlib notebook` for interactive figures
plt.style.use('ggplot')

import tigramite
from tigramite import data_processing as pp
from tigramite import plotting as tp
from tigramite.pcmci import PCMCI

from tigramite.independence_tests.parcorr import ParCorr

In [2]:
def get_precision(ground_truth_parents: dict, predicted_parents: dict):
    # Precision = TP / (TP + FP)
    true_positives = 0
    for effect, causes in predicted_parents.items():
        true_positives += len([cause for cause in causes if cause in ground_truth_parents.get(effect, [])])
    
    predicted_positives = sum([len(causes) for causes in predicted_parents.values()])
    
    return true_positives / predicted_positives

def get_recall(ground_truth_parents: dict, predicted_parents: dict):
    # Recall = TP / (TP + FN)
    true_positives = 0
    for effect, causes in predicted_parents.items():
        true_positives += len([cause for cause in causes if cause in ground_truth_parents.get(effect, [])])
        
    ground_truth_positives = sum([len(causes) for causes in ground_truth_parents.values()])
    
    return true_positives / ground_truth_positives

In [3]:
def test_toy_data(name, parents_extractor):
    # Load causal process dictionary
    with open(f'toy_data/causal_process{name}.txt') as f:
        line = f.readline()
        causal_process = eval(line)
    # Load example data
    df = pd.read_csv(f'toy_data/data{name}.csv', header=None)
    dataframe = pp.DataFrame(df.values, var_names=df.columns)
    dataframe.values[0].shape
    
    parents = parents_extractor(dataframe)
    
    # Compare results
    print(f'Predicted parents: {parents}')
    print(f'Causal process: {causal_process}')
    
    print(f'Precision: {get_precision(parents, causal_process)}')
    print(f'Recall: {get_recall(parents, causal_process)}')
    print(f'F1: {2 * get_precision(parents, causal_process) * get_recall(parents, causal_process) / (get_precision(parents, causal_process) + get_recall(parents, causal_process))}')

In [4]:
def extract_parents_pcmci(dataframe):
    parcorr = ParCorr(significance='analytic')
    pcmci = PCMCI(
        dataframe=dataframe,
        cond_ind_test=parcorr,
        verbosity=0
    )

    tau_max = 3
    pc_alpha = None # Optimize in a list
    results = pcmci.run_pcmci(tau_min=1, tau_max=tau_max, pc_alpha=pc_alpha)

    parents = pcmci.return_parents_dict(graph=results['graph'], val_matrix=results['val_matrix'])
    return parents

In [5]:
test_toy_data(1, extract_parents_pcmci)

Predicted parents: {0: [(0, -1), (11, -2), (7, -1)], 1: [(1, -1), (8, -3), (2, -3), (11, -3)], 2: [(7, -2)], 3: [(13, -1)], 4: [(4, -1), (2, -3), (9, -1), (9, -2), (8, -3), (3, -1)], 5: [(3, -2), (5, -1), (9, -1)], 6: [(6, -1), (19, -2), (12, -2), (19, -3), (6, -2)], 7: [(7, -1), (12, -2), (13, -2)], 8: [(8, -1), (2, -2), (13, -1), (14, -3), (0, -1), (18, -2), (10, -1), (2, -1)], 9: [(9, -1), (11, -2), (4, -1), (11, -3), (12, -1), (2, -1), (18, -2), (7, -1), (3, -1)], 10: [(10, -1), (16, -1), (15, -1)], 11: [(11, -1)], 12: [(12, -1), (19, -3), (14, -3), (19, -1), (6, -1)], 13: [(3, -1), (6, -3), (13, -1), (18, -1), (8, -1), (10, -3), (4, -1), (5, -1), (13, -3), (6, -2)], 14: [(0, -3), (13, -3), (16, -3), (11, -2), (4, -2), (2, -1)], 15: [(15, -1), (14, -1), (8, -3), (8, -1), (12, -3)], 16: [(16, -1), (7, -3), (1, -2)], 17: [(17, -1), (18, -3), (4, -2), (12, -1), (12, -3), (15, -1)], 18: [(17, -1), (7, -2), (9, -3), (18, -1), (0, -1)], 19: [(19, -1), (0, -1), (6, -2), (10, -1), (11, -1)

In [14]:
def extract_parents_pcmci_2(dataframe):
    parcorr = ParCorr(significance='analytic')
    pcmci = PCMCI(
        dataframe=dataframe,
        cond_ind_test=parcorr,
        verbosity=0
    )

    tau_max = 3
    pc_alpha = 0.05 # Default value
    results = pcmci.run_pcmci(tau_min=1, tau_max=tau_max, pc_alpha=pc_alpha)

    parents = pcmci.return_parents_dict(graph=results['graph'], val_matrix=results['val_matrix'])
    return parents

In [15]:
test_toy_data(1, extract_parents_pcmci_2)

Predicted parents: {0: [(0, -1)], 1: [(1, -1), (8, -3), (5, -1), (11, -2), (2, -3)], 2: [(2, -1), (17, -3), (16, -2), (1, -1), (16, -1)], 3: [(3, -1), (4, -2), (14, -2)], 4: [(4, -1), (2, -3), (9, -1), (5, -2)], 5: [(5, -1), (13, -2), (3, -2), (3, -1)], 6: [(6, -1), (19, -2), (11, -2), (6, -2)], 7: [(7, -1), (12, -2), (0, -1)], 8: [(8, -1), (2, -2), (13, -1), (18, -1), (11, -2), (10, -3)], 9: [(9, -1), (11, -1), (5, -3), (10, -3), (3, -1)], 10: [(10, -1), (16, -1), (5, -1)], 11: [(11, -1), (14, -1)], 12: [(12, -1), (19, -1), (14, -3), (6, -1), (19, -2)], 13: [(13, -1), (4, -3), (6, -3), (18, -1), (8, -1), (5, -1), (19, -2), (3, -1), (2, -3), (17, -1)], 14: [(0, -3), (13, -3), (16, -3), (14, -1), (12, -3), (3, -2), (6, -2), (10, -1), (11, -2)], 15: [(15, -1), (8, -3), (19, -3)], 16: [(16, -1), (7, -3), (1, -2)], 17: [(17, -1), (18, -3), (6, -2), (1, -3)], 18: [(18, -1), (14, -3), (7, -2), (5, -1)], 19: [(19, -1), (10, -1), (6, -2), (11, -1), (9, -1)]}
Causal process: {0: [(0, -1)], 1: [