In [1]:
import pandas as pd
import numpy as np
import os
import re
from collections import defaultdict

In [2]:
! ls ../experiments/networks/alarm/ges

ls: ../experiments/networks/alarm/ges: No such file or directory


In [3]:
for root, dirs, files in os.walk("."):
    for d in dirs:
        print(os.path.relpath(os.path.join(root, d), "."))
    for f in files:
        print(os.path.relpath(os.path.join(root, f), "."))

.ipynb_checkpoints
Experiment Cleaning.ipynb
.ipynb_checkpoints/Experiment Cleaning-checkpoint.ipynb


In [4]:
data_folder = "../experiments/local/raw/networks/"
networks = ["alarm"]#, "cancer", "earthquake", "mildew", "barley", "child", "insurance", "water"]
algorithms = ["ges", "hc", "pfhcbes", "pges", "phc"]
data_size = ["10k", "50k"]
threads = [2, 4, 6, 8]
interleaving = [5, 10, 15]
threads_str = list(map(lambda x: "T" + str(x), threads))
interleaving_str = list(map(lambda x: "I" + str(x), interleaving))

ending = "_global_results.csv"

In [5]:
comment = """
experiments = {}
for net in networks:
    experiments[net] = {}
    for alg in algorithms:
        net_paths = []
        experiments[net][alg] = {}
        
        for root, _, files in os.walk(data_folder + net + "/" + alg):
            for f in files:
                net_paths.append(os.path.relpath(os.path.join(root, f), "."))
        experiments[net][alg]["paths"] = net_paths
"""

In [6]:
experiments = {}
for net in networks:
    experiments[net] = {}
    net_paths = []
    for alg in algorithms:
        experiments[net][alg] = {}
        for size in data_size:
            experiments[net][alg][size] = {}
            if alg != "ges":
                for thread in threads_str:
                    experiments[net][alg][size][thread] = {}
                    for inter in interleaving_str:
                        experiments[net][alg][size][thread][inter] = {}
                        path = data_folder + net +"/"+ alg + "/" + net \
                                + size + str(thread) + "_" + str(inter) +  ending
                        experiments[net][alg][size][thread][inter]["path"] = path
                        experiments[net][alg][size][thread][inter]["data"] = pd.read_csv(path)
            else:
                path = data_folder + net +"/"+ alg + "/" + net \
                                + size + ending
                experiments[net][alg][size]["path"] = path
                experiments[net][alg][size]["data"] = pd.read_csv(path)

In [7]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(experiments)

{   'alarm': {   'ges': {   '10k': {   'data':    SHD     BDeu Score      dfMM  dfMM plus  dfMM minus  Total time(s)
0   16 -110492.244849  1.027027       12.0        26.0              1,
                                       'path': '../experiments/local/raw/networks/alarm/ges/alarm10k_global_results.csv'},
                            '50k': {   'data':    SHD     BDeu Score      dfMM  dfMM plus  dfMM minus  Total time(s)
0    8 -529624.355066  0.378378        6.0         8.0              2,
                                       'path': '../experiments/local/raw/networks/alarm/ges/alarm50k_global_results.csv'}},
                 'hc': {   '10k': {   'T2': {   'I10': {   'data':    SHD     BDeu Score      dfMM  dfMM plus  dfMM minus  Total iterations  \
0   38 -111830.872655  1.783784       32.0        34.0                 6   

   Total time(s)  
0              0  ,
                                                           'path': '../experiments/local/raw/networks/alarm/hc/alarm10

In [8]:
df_control_alarm_10k = experiments["alarm"]["ges"]["10k"]["data"]
df_control_alarm_50k = experiments["alarm"]["ges"]["50k"]["data"]
        

In [9]:
df_control_alarm_50k

Unnamed: 0,SHD,BDeu Score,dfMM,dfMM plus,dfMM minus,Total time(s)
0,8,-529624.355066,0.378378,6.0,8.0,2


In [10]:
df_pges_alarm_50k_2T_5I = experiments["alarm"]["pges"]["50k"]["T2"]["I5"]["data"]
df_pges_alarm_50k_2T_5I

Unnamed: 0,SHD,BDeu Score,dfMM,dfMM plus,dfMM minus,Total iterations,Total time(s)
0,6,-528437.800533,0.216216,0.0,8.0,10,3


In [11]:
df_example = df_pges_alarm_50k_2T_5I - df_control_alarm_50k
df_example

Unnamed: 0,BDeu Score,SHD,Total iterations,Total time(s),dfMM,dfMM minus,dfMM plus
0,1186.554533,-2,,1,-0.162162,0.0,-6.0


In [12]:
df_example = df_example.add_suffix("_delta")
df_example

Unnamed: 0,BDeu Score_delta,SHD_delta,Total iterations_delta,Total time(s)_delta,dfMM_delta,dfMM minus_delta,dfMM plus_delta
0,1186.554533,-2,,1,-0.162162,0.0,-6.0


In [13]:
pd.concat([df_pges_alarm_50k_2T_5I, df_example], axis=1)

Unnamed: 0,SHD,BDeu Score,dfMM,dfMM plus,dfMM minus,Total iterations,Total time(s),BDeu Score_delta,SHD_delta,Total iterations_delta,Total time(s)_delta,dfMM_delta,dfMM minus_delta,dfMM plus_delta
0,6,-528437.800533,0.216216,0.0,8.0,10,3,1186.554533,-2,,1,-0.162162,0.0,-6.0


In [14]:
?pd.concat

In [25]:
from pathlib import Path
def save_experiment(df, output_path, output_file):    
    output_dir = Path(output_path)
    output_dir.mkdir(parents=True, exist_ok=True)
    df.to_csv(output_dir / output_file)  # can join path elements with / operator


In [26]:
def delta_score_control_ges(experiments):
    result = {}
    for net in experiments:
        result[net] = {}
        for alg in experiments[net]:
            result[net][alg] = {}
            for size in experiments[net][alg]:
                result[net][alg][size] = {}
                df_experiment = None
                df_control = experiments[net]["ges"][size]["data"]
                if alg != "ges":
                    for thread in experiments[net][alg][size]:
                        result[net][alg][size][thread] = {}
                        for inter in experiments[net][alg][size][thread]:
                            df_experiment = experiments[net][alg][size][thread][inter]["data"]
                            df_result = df_experiment - df_control
                            df_result.drop(["Total iterations"], axis=1, inplace=True)
                            df_result = df_result.add_suffix('_deltaGES')
                             
                            result[net][alg][size][thread][inter] = df_final
                            dir_path = "../experiments/local/clean/networks/" + net + "/" + alg
                            file_name = net + size + thread+ "_" + inter + "_" + "results.csv"
                            save_experiment(df_final, dir_path, file_name )
    return result

In [27]:
results_control_ges = delta_score_control_ges(experiments)

In [19]:
experiments["alarm"]["ges"]["10k"]["data"]

Unnamed: 0,SHD,BDeu Score,dfMM,dfMM plus,dfMM minus,Total time(s)
0,16,-110492.244849,1.027027,12.0,26.0,1


In [17]:
results_control_ges

{'alarm': {'ges': {'10k': {}, '50k': {}},
  'hc': {'10k': {'T2': {'I5':    SHD     BDeu Score      dfMM  dfMM plus  dfMM minus  Total iterations  \
     0   38 -111830.872655  1.783784       32.0        34.0                11   
     
        Total time(s)  BDeu Score_delta  SHD_delta  Total time(s)_delta  \
     0              1      -1338.627806         22                    0   
     
        dfMM_delta  dfMM minus_delta  dfMM plus_delta  
     0    0.756757               8.0             20.0  ,
     'I10':    SHD     BDeu Score      dfMM  dfMM plus  dfMM minus  Total iterations  \
     0   38 -111830.872655  1.783784       32.0        34.0                 6   
     
        Total time(s)  BDeu Score_delta  SHD_delta  Total time(s)_delta  \
     0              0      -1338.627806         22                   -1   
     
        dfMM_delta  dfMM minus_delta  dfMM plus_delta  
     0    0.756757               8.0             20.0  ,
     'I15':    SHD     BDeu Score      dfMM  dfMM pl

In [None]:
alarm10kT2_I5_global_results