EM size vs number of points
===========================

In [None]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

import config
#from experiments import *

In [None]:
# IMPORTANT: set the following pahts and the JAVA_HOME environment variable

config.prj_path = Path("../").resolve()

config.exp_folder = f"{config.prj_path}/experiments/"
config.log_folder = f"{config.exp_folder}/logs/"
config.res_folder = f"{config.exp_folder}/results/"
config.model_folder = f"{config.exp_folder}/models/"
config.jarfile=f"{config.prj_path}/target/experiments.jar"

os.environ['JAVA_HOME'] = "/Library/Java/JavaVirtualMachines/openjdk-12.0.1.jdk/Contents/Home"

rebuild = True

In [None]:
# Set the project path as the working directory
if os.getcwd() != config.prj_path: os.chdir(config.prj_path)

In [None]:
# Rebuild the package if required
if rebuild:
    output = !mvn clean package | grep BUILD\ SUCCESS | wc -l 
if int(output[0])>0: print("Build success") 
else: print("error")

Note that all the cells below are disabled, just change the `if` condition in the desiered block of experiments 

In [None]:
import config
import os
import socket
import subprocess
import pandas as pd
import io
from contextlib import redirect_stdout
import itertools
import warnings
from datetime import datetime
import time
import timeout_decorator
import numpy as np
import matplotlib.pyplot as plt


def print_project():
    print(config.prj_path)



def strdate(): return datetime.today().strftime('%Y-%m-%d-%H-%M-%S')



#@timeout_decorator.timeout(8*1*60)
def run_java(jarfile, javafile):
    cmd=f"java -cp {jarfile} {javafile}"
    if config.disable_java: cmd= "echo []"
    print(cmd)
    result = subprocess.run(cmd, universal_newlines = True, shell=True,stdout = subprocess.PIPE)
    print(result.stdout)
    return result


def run(model, numberPoints, numberEMiter=100, samples=2000, filter=True):

    #--numberPoints 20 --numberEMiter 10 --samples 2000 --model ./models/scm2.uai
    # -N 20 -n 10 -s 2000 -m ./models/poly4_12181.uai
    f = "-f" if(filter) else ""
    javafile = f"{config.exp_folder}/RunExperiments.java {f} -N {numberPoints} -n {numberEMiter}  -s {samples} -m {config.model_folder}/{model}"
    try:
        lines = run_java(config.jarfile, javafile).stdout.splitlines()
        j,k = [i for i in range(len(lines)) if lines[i] in ["<output>", "</output>"]]
        output = eval("".join([lines[i] for i in list(range(j+1, k))]))
    except:
        output = {[]}
    return output


In [None]:
def run_experiments(f, args, fargs=None, verbose=False, lenght_dep_vars = None, non_evaluable=[]):


    print("=========")
    print(args)
    print("=========")
    
    outkeys = ["output"]
 
    result = None
    log_file = f"{config.log_folder}{strdate()}_{f.__name__}.txt"

    data = pd.DataFrame(list(itertools.product(*list(args.values()))), columns = args.keys())

    fargs = fargs or {}
    for k,v in fargs.items():
        data[k]=data.apply(v, axis=1)


    non_evaluable = non_evaluable or []

    lenght_dep_vars = lenght_dep_vars or ["N"]

    def is_evaluable(args):
        current = {k:v for (k,v) in args.items() if k not in lenght_dep_vars}
        previous = [{k:v for (k,v) in a.items() if k not in lenght_dep_vars} for a in non_evaluable]

        print(f"current: {current}")
        print(f"previous: {previous}")


        return not current in previous

    def single_experiment(argsv):
        if is_evaluable(argsv):
            outvals = f(**argsv)
            if outvals == [{}]:
                non_evaluable.append(argsv)
                print(f"setting as not evaluable: {argsv}")
        else:
            outvals=[None]
        return outvals


    for argsv in data.to_dict(orient="row"):

        strio = io.StringIO()
        print(strdate())
        with open(log_file, 'a+') as logger:
            if verbose == False:
                with redirect_stdout(strio):
                    print(strdate())
                    outvals = single_experiment(argsv)
            else:
                outvals = single_experiment(argsv)

            logger.write(strio.getvalue())


        r = pd.DataFrame([{**argsv, ** v} for v in outvals])
        if result is None:
            result= r
        else:
            result = pd.concat([result, r], ignore_index=True)
        print("\n\n")

    return result

def get_args(**kwargs): return kwargs

In [None]:
label = "tree4"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)

In [None]:
label = "poly4"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)

In [None]:
label = "tree6"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)
print(res_file)

In [None]:
label = "poly6"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)
print(res_file)

In [None]:
label = "tree10"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)
print(res_file)

In [None]:
label = "poly10"
files = [f for f in os.listdir(config.model_folder) if f.startswith(label)]
args = dict(model=files, numberPoints = [20])
results = run_experiments(run, args)

res_file = f"{config.res_folder}{strdate()}_{label}.csv"
results.to_csv(res_file)
print(res_file)