In [85]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [224]:
import utils
input = utils.load_data([
    "../results/synthetic/1000/set1/",
    "../results/synthetic/1000/set2/"
])
# get exact rows
exact = input[input.method == "exact"].copy()
exact["identifiable"] = (exact.pns_u - exact.pns_l).abs() < 0.00001

In [225]:
input.shape

(2255, 21)

In [226]:
def plot_box(data3, bins, by, key, value, l=-0.25, u=1):
    import math
    fig, ax = plt.subplots(2,1, figsize=(15,15))

    axu = data3.boxplot("r_pns_u", by=by,  ax=ax[0])
    axl = data3.boxplot("r_pns_l", by=by,  ax=ax[1])

    axu.set_title("difference with exact upper")
    axl.set_title("difference with exact lower")

    l= math.floor(l * 5) / 5
    u = math.ceil(u * 5) / 5
    
    axu.set_ylim(l,u)
    axl.set_ylim(l,u)

    fig.suptitle(f"Errors on Pns by discretized Ps1. {key} == {value}")
    
    fig.savefig(f"plots/error_by_{bins}_{by}_{key}_{value}.jpg")
    plt.close()


In [227]:
groups = {
    "num_endogenous": input.num_endogenous.unique(), 
    "markovianity": input.markovianity.unique(), 
    "network_type":input.network_type.unique(),
    "identifiable": [True, False]
}

In [228]:
binss= [5]
bys = ["quant_ps1", "disc_ps1"]

In [230]:
# add excat column to all rows

data = input.merge(exact[["file", "identifiable", "pns_l", "pns_u"]], on="file")
data = data[(data.method=="EMCC") & (data.selector)]

data["r_pns_l"] = (data.pns_l_x - data.pns_l_y)
data["r_pns_u"] = (data.pns_u_x - data.pns_u_y)

lower = data["r_pns_l"].min()
upper = data["r_pns_u"].max()
data2 = data

for bins in binss:
    # discretize
    data2["disc_ps1"] = pd.cut(data2.ps1, bins=bins)
    data2["quant_ps1"] = pd.qcut(data2.ps1, q=bins, duplicates="drop")


    for by in bys:

        plot_box(data2, bins, by, "All", "All")

        for key in groups:
            for value in groups[key]:

                filter = data2[key] == value
                plot_box(data2[filter], bins, by, key, value, lower, upper)


# Time plots

In [143]:
sel = data[(data.method=="EMCC") & ( data.selector)]
ps1 = data[(data.method=="EMCC") & (~data.selector)]

data2 = sel.merge(ps1[["file", 'time_pns', 'time_ace', 'time_learn']], on="file")
data2['r_time_pns'] = data2["time_pns_x"]/data2["time_pns_y"]
data2['r_time_ace'] = data2["time_ace_x"]/data2["time_ace_y"]
data2['r_time_learn'] = data2["time_learn_x"]/data2["time_learn_y"]


In [164]:
data2[data2.markovianity == "quasi-markovian"].plot.scatter("ps1", "r_time_learn")
plt.savefig("plots/r_time_learn.jpg")
plt.close()

data2[data2.markovianity == "quasi-markovian"].plot.scatter("ps1", "r_time_pns")
plt.savefig("plots/r_time_pns.jpg")
plt.close()

In [178]:
data2.groupby("num_endogenous")['r_time_learn'].describe().T


num_endogenous,5,7
count,459.0,369.0
mean,1.162287,0.958071
std,0.728536,0.631727
min,0.001639,0.000449
25%,0.535145,0.428117
50%,1.201763,0.970471
75%,1.807446,1.489225
max,2.464846,2.117224


In [156]:
data.columns

Index(['index', 'method', 'ace_l', 'pns_u', 'selector', 'ace_u', 'pns_l',
       'model_path', 'ps1', 'time_pns', 'time_ace', 'time_learn',
       'n_convergence', 'file', 'network_type', 'markovianity', 'max_distance',
       'num_endogenous', 'reduction_k', 'max_iter', 's_parents'],
      dtype='object')

In [158]:
data.num_endogenous.unique()

array(['5', '7'], dtype=object)