EM size vs number of points
===========================

In [2]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

import config
#from experiments import *

In [3]:
# IMPORTANT: set the following pahts and the JAVA_HOME environment variable

config.prj_path = Path("../").resolve()

config.exp_folder = f"{config.prj_path}/experiments/"
config.log_folder = f"{config.exp_folder}/logs/"
config.res_folder = f"{config.exp_folder}/results/"
config.model_folder = f"{config.exp_folder}/models/"
config.jarfile=f"{config.prj_path}/target/experiments.jar"

os.environ['JAVA_HOME'] = "/Library/Java/JavaVirtualMachines/openjdk-12.0.1.jdk/Contents/Home"

rebuild = True

In [4]:
# Set the project path as the working directory
if os.getcwd() != config.prj_path: os.chdir(config.prj_path)

In [15]:
# Rebuild the package if required
if rebuild:
    output = !mvn clean package | grep BUILD\ SUCCESS | wc -l 
if int(output[0])>0: print("Build success") 
else: print("error")

Build success


Note that all the cells below are disabled, just change the `if` condition in the desiered block of experiments 

In [93]:
import config
import os
import socket
import subprocess
import pandas as pd
import io
from contextlib import redirect_stdout
import itertools
import warnings
from datetime import datetime
import time
import timeout_decorator
import numpy as np
import matplotlib.pyplot as plt


def print_project():
    print(config.prj_path)



def strdate(): return datetime.today().strftime('%Y-%m-%d-%H-%M-%S')



@timeout_decorator.timeout(7*1*60)
def run_java(jarfile, javafile):
    cmd=f"java -cp {jarfile} {javafile}"
    if config.disable_java: cmd= "echo []"
    print(cmd)
    result = subprocess.run(cmd, universal_newlines = True, shell=True,stdout = subprocess.PIPE)
    print(result.stdout)
    return result


def run(model, numberPoints, numberEMiter=15, samples=2000):

    #--numberPoints 20 --numberEMiter 10 --samples 2000 --model ./models/scm2.uai
    # -N 20 -n 10 -s 2000 -m ./models/poly4_12181.uai
    javafile = f"{config.exp_folder}/RunExperiments.java -N {numberPoints} -n {numberEMiter}  -s {samples} -m {config.model_folder}/{model}"
    try:
        lines = run_java(config.jarfile, javafile).stdout.splitlines()
        j,k = [i for i in range(len(lines)) if lines[i] in ["<output>", "</output>"]]
        output = eval("".join([lines[i] for i in list(range(j+1, k))]))
    except:
        output = {[]}
    return output


In [94]:
output = run(model="poly4_12181.uai", numberPoints = 5)
output[0]


java -cp /Users/rcabanas/GoogleDrive/IDSIA/causality/dev/idsia-papers/2020-EM-causal/target/experiments.jar /Users/rcabanas/GoogleDrive/IDSIA/causality/dev/idsia-papers/2020-EM-causal/experiments//RunExperiments.java -N 5 -n 15  -s 2000 -m /Users/rcabanas/GoogleDrive/IDSIA/causality/dev/idsia-papers/2020-EM-causal/experiments//models//poly4_12181.uai

0 iterations .........
10 iterations ....
[P([6]) [0.24629316937383625, 0.006132544125294398, 0.4420742865008664, 0.24382926829268672, 0.06167073170731627], P([5]) [0.24244713530504383, 0.06665184312044503, 0.008133158638761658, 0.4004010215745073, 0.28236684136124224], P([4]) [0.4307093101551249, 0.06959000000593642, 0.45550240859061636, 0.021360903938913857, 0.022837377309408467], P([3, 6]) [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], P([0, 2, 3, 4]) [0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1

{'Pcn0_lbound': 0.8390320476219654,
 'Pcn0_ubound': 0.8390320476219654,
 'Pcn1_lbound': 0.16096795237803463,
 'Pcn1_ubound': 0.16096795237803463,
 'Ptrue0_lbound': 0.8072,
 'Ptrue0_ubound': 0.98,
 'Ptrue1_lbound': 0.02,
 'Ptrue1_ubound': 0.19280000000000003,
 'Puq0_lbound': 0.8390320476219654,
 'Puq0_ubound': 0.8390320476219654,
 'Puq1_lbound': 0.16096795237803463,
 'Puq1_ubound': 0.16096795237803463,
 'num_points': 1.0}

In [95]:
def run_experiments(f, args, fargs=None, verbose=False, lenght_dep_vars = None, non_evaluable=[]):


    print("=========")
    print(args)
    print("=========")
    
    outkeys = ["output"]
 
    result = None
    log_file = f"{config.log_folder}{strdate()}_{f.__name__}.txt"

    data = pd.DataFrame(list(itertools.product(*list(args.values()))), columns = args.keys())

    fargs = fargs or {}
    for k,v in fargs.items():
        data[k]=data.apply(v, axis=1)


    non_evaluable = non_evaluable or []

    lenght_dep_vars = lenght_dep_vars or ["N"]

    def is_evaluable(args):
        current = {k:v for (k,v) in args.items() if k not in lenght_dep_vars}
        previous = [{k:v for (k,v) in a.items() if k not in lenght_dep_vars} for a in non_evaluable]

        print(f"current: {current}")
        print(f"previous: {previous}")


        return not current in previous

    def single_experiment(argsv):
        if is_evaluable(argsv):
            outvals = f(**argsv)
            if outvals == [{}]:
                non_evaluable.append(argsv)
                print(f"setting as not evaluable: {argsv}")
        else:
            outvals=[{}]
        return outvals


    for argsv in data.to_dict(orient="row"):

        strio = io.StringIO()
        print(strdate())
        with open(log_file, 'a+') as logger:
            if verbose == False:
                with redirect_stdout(strio):
                    print(strdate())
                    outvals = single_experiment(argsv)
            else:
                outvals = single_experiment(argsv)

            logger.write(strio.getvalue())


        r = pd.DataFrame([{**argsv, ** v} for v in outvals])
        if result is None:
            result= r
        else:
            result = pd.concat([result, r], ignore_index=True)
        print("\n\n")

    return result

def get_args(**kwargs): return kwargs

In [96]:
# example
args = dict(model=["poly4_12181.uai", "tree6_47048.uai"], numberPoints = [2])
out = run_experiments(run, args)

{'model': ['poly4_12181.uai', 'tree6_47048.uai'], 'numberPoints': [2]}
2020-10-05-18-09-59







2020-10-05-18-10-08





In [110]:
label = "tree"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [10])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)

{'model': ['tree4_48362.uai', 'tree4_54366.uai'], 'numberPoints': [2]}
2020-10-05-18-19-53



2020-10-05-18-20-00





In [111]:
label = "poly"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [10])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)

{'model': ['poly4_52551.uai', 'poly4_52961.uai', 'poly4_73359.uai', 'poly6_11613.uai', 'poly4_58657.uai', 'poly4_77924.uai', 'poly4_33912.uai', 'poly4_82302.uai', 'poly4_58959.uai', 'poly6_39497.uai', 'poly6_28808.uai', 'poly6_87150.uai', 'poly6_59370.uai', 'poly6_98512.uai', 'poly6_63710.uai', 'poly6_56998.uai', 'poly4_70842.uai', 'poly6_42643.uai', 'poly4_12181.uai', 'poly6_46754.uai'], 'numberPoints': [10]}
2020-10-05-18-22-59


TypeError: unhashable type: 'list'