In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr 
import matplotlib.pyplot as plt
from pathlib import Path
from typing import Tuple

from src.constants import SEEDS
sns.set_theme()

In [2]:
dirpath_save = Path('/home/gaurang/bayesian_network/figures')

In [9]:
fpath_sachs = Path('/home/gaurang/bayesian_network/experiments/thesis/seed_777/sachs/sachs_inference.csv')
fpath_asia = Path('/home/gaurang/bayesian_network/experiments/thesis/seed_777/asia/asia_inference.csv')
fpath_cancer = Path('/home/gaurang/bayesian_network/experiments/thesis/seed_777/cancer/cancer_inference.csv')
fpath_alarm = Path('/home/gaurang/bayesian_network/experiments/thesis/seed_777/alarm/alarm_inference.csv')
fpath_child = Path('/home/gaurang/bayesian_network/experiments/thesis/seed_777/child/child_inference.csv')
fpath_water = Path("/home/gaurang/bayesian_network/experiments/thesis/seed_777/water/water_inference.csv")

fpath_hailfinder = Path("/home/gaurang/bayesian_network/experiments/thesis/seed_777/hailfinder/hailfinder_inference.csv")
fpath_hepar2 = Path("/home/gaurang/bayesian_network/experiments/thesis/seed_777/hepar2/hepar2_inference.csv")
fpath_diabetes = Path("/home/gaurang/bayesian_network/experiments/thesis/seed_777/diabetes/diabetes_inference.csv")
fpath_munin = Path("/home/gaurang/bayesian_network/experiments/thesis/seed_777/munin/munin_inference.csv")

In [39]:
dirpath_sachs = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs')
dirpath_asia = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/asia')
dirpath_cancer = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/cancer')
dirpath_alarm = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/alarm')
dirpath_child = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/child')
dirpath_water = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/water')

dirpath_hailfinder = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/hailfinder')
dirpath_hepar2 = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/hepar2')
dirpath_diabetes = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/diabetes')
dirpath_munin = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/munin')

In [11]:
df_sachs = pd.read_csv(fpath_sachs)
df_asia = pd.read_csv(fpath_asia)
df_cancer = pd.read_csv(fpath_cancer)
df_alarm = pd.read_csv(fpath_alarm)
df_child = pd.read_csv(fpath_child)
df_water = pd.read_csv(fpath_water)


In [None]:
df_sachs

Unnamed: 0,loss_test,acc_test,bif_perturbed,bif_gt,target_node,noise,num_edges_gt,num_edges_perturb,num_gt_edges_retained,num_extra_edges,num_total_diff_edges
0,0.656972,0.6785,-14865.177943,-14865.177943,Akt,0.0,17.0,17.0,17,0,0.0
1,0.656763,0.678,-16340.509102,-14865.177943,Akt,0.2,17.0,17.0,13,4,8.0
2,0.652959,0.678,-18102.823038,-14865.177943,Akt,0.4,17.0,17.0,10,7,14.0
3,0.712966,0.6465,-18559.165008,-14865.177943,Akt,0.6,17.0,17.0,6,11,22.0
4,0.713918,0.6475,-18012.643736,-14865.177943,Akt,0.8,17.0,17.0,3,14,28.0
5,0.699999,0.6525,-18766.433965,-14865.177943,Akt,1.0,17.0,17.0,0,17,34.0


In [43]:
def plot_results(dirpath_inference: Path, dirpath_save: Path) -> Tuple[object, object, object]:
    """
    Create individual plots for the inference file and get correlation results for noise, bic and
    accuracy

    Args:
        fpath_inference: the path to the inference csv for given seed for given bn and for all
            noise level
        dirpath_save: the directory where it plot will be save

    Returns:
        corr_noise_acc: the pearson correlation coefficient for noise and accuracy
        corr_noise_bic: the pearson correlation coefficient for noise and bic
        corr_bic_acc: the pearson correlation coefficient for bic and accuracy
    """


    fig_name = dirpath_inference.stem
    fpath_save = dirpath_save.joinpath(fig_name + ".jpg")
    seeds = []
    dfs = []

    for dpath in dirpath_inference.iterdir():
        dfs.append(pd.read_csv(dpath.joinpath(f"{fig_name}_inference.csv")))
        seeds.append(dpath.stem)
    
    df = pd.concat(dfs)
 
    fig = plt.figure(figsize=(6, 6), dpi=150)
    ax1 = plt.subplot(111)
    
    noise = df["noise"].to_numpy()
    bic = df["bif_perturbed"].to_numpy()
    acc = df["acc_test"].to_numpy()

    plot = sns.lineplot(data=df, x='noise', y='acc_test', color='red')
    lns1 = plot.lines
    ax1.set_ylabel("Accuracy")

    ax2 = ax1.twinx()
    plot = sns.lineplot(data=df, x='noise', ax=ax2, y='bif_perturbed', color='blue')
    lns2 = plot.lines
    ax2.set_ylabel("BIC Score")
    ax2.grid(None)

    lns = lns1 + lns2
    labs = ['Accuracy', 'BIC']
    ax1.legend(lns, labs, loc="upper right")

    ax1.set_xlabel("Noise")
    corr_noise_acc = pearsonr(noise, acc)
    corr_noise_bic = pearsonr(noise, bic)
    corr_bic_acc = pearsonr(bic, acc)
    plt.savefig(fpath_save, bbox_inches="tight", dpi=150)
    plt.close()
    return corr_noise_acc, corr_noise_bic, corr_bic_acc


In [51]:
corr_noise_acc, corr_noise_bic, corr_bic_acc = plot_results(dirpath_inference=dirpath_hepar2, dirpath_save=dirpath_save)

print('Corr noise acc: ', corr_noise_acc)
print('Corr noise bic: ', corr_noise_bic)
print('Corr bic acc: ', corr_bic_acc)

Corr noise acc:  PearsonRResult(statistic=0.0, pvalue=0.9999999999999994)
Corr noise bic:  PearsonRResult(statistic=0.009030581344075, pvalue=0.9514247656284788)
Corr bic acc:  PearsonRResult(statistic=0.1504812956321256, pvalue=0.30729646933116955)


In [30]:
dirpath_sachs  = Path('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs')

dirpath_seeds = list(dirpath_sachs.iterdir())
dirpath_seeds

[PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/100'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/200'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/300'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/400'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/500'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/600'),
 PosixPath('/home/gaurang/bayesian_network/experiments/thesis/20230329_032224_/sachs/700')]

In [33]:
df_sachs = [pd.read_csv(dirpath.joinpath('sachs_inference.csv')) for dirpath in dirpath_sachs.iterdir()]

In [None]:
from src.constants import SEEDS
for i, df in enumerate(df_sachs):
    df['seed']


In [34]:
df_res = pd.concat(df_sachs)
df_res

Unnamed: 0,loss_test,acc_test,bif_perturbed,bif_gt,target_node,noise,num_edges_gt,num_edges_perturb,num_gt_edges_retained,num_extra_edges,num_total_diff_edges
0,0.658416,0.69,-14894.466861,-14894.466861,Akt,0.0,17.0,17.0,17,0,0.0
1,0.658416,0.69,-16229.359903,-14894.466861,Akt,0.2,17.0,17.0,13,4,8.0
2,0.490967,0.8025,-21713.976831,-14894.466861,Akt,0.4,17.0,17.0,10,7,14.0
3,0.505291,0.794,-18007.815109,-14894.466861,Akt,0.6,17.0,17.0,6,11,22.0
4,0.485302,0.8025,-17954.695359,-14894.466861,Akt,0.8,17.0,19.0,4,15,28.0
5,0.776083,0.6325,-20342.331546,-14894.466861,Akt,1.0,17.0,17.0,0,17,34.0
0,0.652602,0.6895,-14859.679381,-14859.679381,Akt,0.0,17.0,17.0,17,0,0.0
1,0.694247,0.6645,-15781.014816,-14859.679381,Akt,0.2,17.0,17.0,13,4,8.0
2,0.653362,0.689,-16686.92005,-14859.679381,Akt,0.4,17.0,17.0,10,7,14.0
3,0.473897,0.812,-18792.827971,-14859.679381,Akt,0.6,17.0,17.0,6,11,22.0
