# Precission 2

In [1]:
import os
import socket
import subprocess
import pandas as pd
import io
from contextlib import redirect_stdout
import itertools
import warnings
from datetime import datetime
import time
import timeout_decorator
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def islocal(): return socket.gethostname()=="rc-idsia-macBook.local"
def isserver(): return socket.gethostname()=="758606aabdd8"
def strdate(): return datetime.today().strftime('%Y-%m-%d-%H-%M-%S')
def ishomeserver(): return socket.gethostname()=='rc-aau-macBook'
print(socket.gethostname())

if islocal():
    prj_path = "/Users/rcabanas/GoogleDrive/IDSIA/causality/dev/crema/CreMA"
    os.environ['JAVA_HOME'] = "/Library/Java/JavaVirtualMachines/openjdk-12.0.1.jdk/Contents/Home"
elif ishomeserver():
    prj_path = "/Users/rcabanas/work_java_crema/crema/CreMA"
    os.environ['JAVA_HOME']="/Library/Java/JavaVirtualMachines/adoptopenjdk-12.jdk/Contents/Home"
elif isserver():
    prj_path="/home/rcabanas/crema/CreMA/"
else:
    warnings.warn("Unknown server, set prj_path variable manually.")
os.chdir(prj_path)
!pwd

758606aabdd8
/home/rcabanas/crema/CreMA


In [3]:
exp_folder = f"{prj_path}/papers/pgm20/experiments/"
log_folder = f"{exp_folder}/logs/"
res_folder = f"{exp_folder}/results/"
jarfile=f"{prj_path}/target/CreMA-0.1.1-SNAPSHOT-jar-with-dependencies.jar"

rebuild = False

In [4]:
if rebuild:
    !mvn install:install-file -Dfile=./external/lpsolve55j.jar -DgroupId=net.sf.lpsolve -DartifactId=lp_solve -Dversion=5.5.2 -Dpackaging=jar
    !mvn install:install-file -Dfile=./external/polco.jar -DgroupId=ch.javasoft.polco -DartifactId=polco -Dversion=4.7.1 -Dpackaging=jar
    !mvn clean compile assembly:single

In [5]:
disable_java = False
@timeout_decorator.timeout(7*1*60)
def run_java(jarfile, javafile):
    cmd=f"java -cp {jarfile} {javafile}"
    if disable_java: cmd= "echo 0,0,0,0,0,0"    
    print(cmd)
    result =subprocess.run(cmd, universal_newlines = True, shell=True,stdout = subprocess.PIPE)
    print(result.stdout)
    return result

# ChainNonMarkovian 6 5 1 -1 0 CCALP 1234
def run_chain(model, N, endovarsize, exovarsize, target, obsvar, dovar, method, seed):
    
    
    print(strdate())
    exovarsize = exovarsize or endovarsize*endovarsize + 1
    
    if obsvar is None: obsvar = -1;
    elif obsvar<0: obsvar = N + obsvar;
    
    if dovar is None: dovar = -1;    
    elif dovar<0: dovar = N + dovar;
    
    if target is None: target = N//2;
    elif target<0: target = N + target;
    
    warmups = 0
    repetitions = 1
    eps = 0.0001
    #ChainNonMarkovian 4  -v 5 -V 9 -t 1 -o -1 -d 0 -m  CCALP --seed 12234 --warmpus 0 --repetitions 1
    javafile = f"{exp_folder}/RunExperiments.java {model} {N} -v {endovarsize} -V {exovarsize} -t {target} -o {obsvar} -d {dovar} -m {method} -e {eps} -s {seed} -w {warmups} -r {repetitions} "  
    try:
        result = run_java(jarfile, javafile)
        output = [float(x) for x in result.stdout.splitlines()[-1].split(",")]
    except:
        output = [float("inf"),float("inf")]+[float("nan")]* (endovarsize*2)
        
    return output

def run_chain_markovian(N=4, endovarsize=2, exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):       
    return run_chain("ChainMarkovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)

def run_chain_nonmarkovian(N=4, endovarsize=2,exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):
    return run_chain("ChainNonMarkovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)


def run_hmm_markovian(N=4, endovarsize=2, exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):       
    return run_chain("HMM-Markovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)

def run_hmm_nonmarkovian(N=4, endovarsize=2,exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):
    return run_chain("HMM-NonMarkovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)



def run_rhmm_markovian(N=4, endovarsize=2, exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):       
    return run_chain("RHMM-Markovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)

def run_rhmm_nonmarkovian(N=4, endovarsize=2,exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):
    return run_chain("RHMM-NonMarkovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)

def run_squares_markovian(N=4, endovarsize=2, exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):       
    return run_chain("Squares-Markovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)

def run_squares_nonmarkovian(N=4, endovarsize=2,exovarsize=None, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):
    return run_chain("Squares-NonMarkovian", N, endovarsize, exovarsize, target, obsvar, dovar, method, seed)



def run_chain_terbin_markovian(N=4, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):       
    return run_chain("TerBinChainMarkovian", N, 2, None, target, obsvar, dovar, method, seed)

def run_chain_terbin_nonmarkovian(N=4, target=None, obsvar=-1, dovar=0, method="CVE", seed=1234):
    return run_chain("TerBinChainNonMarkovian", N, 2, None, target, obsvar, dovar, method, seed)



In [6]:
run_hmm_nonmarkovian(N=5, method="CCVE")

2020-05-28-18-30-27
java -cp /home/rcabanas/crema/CreMA//target/CreMA-0.1.1-SNAPSHOT-jar-with-dependencies.jar /home/rcabanas/crema/CreMA//papers/pgm20/experiments//RunExperiments.java HMM-NonMarkovian 5 -v 2 -V 5 -t 2 -o 4 -d 0 -m CCVE -e 0.0001 -s 1234 -w 0 -r 1 

HMM-NonMarkovian
   N=5 endovarsize=2 exovarsize=5 target=2 obsvar=4 dovar=0 method=CCVE seed=1234
Running experiments...
deleted = [1, 3, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17]
Measurement #0 in 785.94 ms.
[0.9660574412532636, 0.9660574412532636]
785.94,38.22,0.9660574412532636,0.9660574412532636,0.033942558746736295,0.033942558746736295



[785.94,
 38.22,
 0.9660574412532636,
 0.9660574412532636,
 0.033942558746736295,
 0.033942558746736295]

In [7]:

def run_experiments(f, args, outkeys, fargs=None, verbose=False, lenght_dep_vars = None, non_evaluable=[]):
    
    
    
    print("=========")
    print(args)
    print("=========")         
        
    result = pd.DataFrame(columns=list(args.keys())+list(outkeys))
    log_file = f"{log_folder}{strdate()}_{f.__name__}.txt"
    
    data = pd.DataFrame(list(itertools.product(*list(args.values()))), columns = args.keys())
    
    fargs = fargs or {}
    for k,v in fargs.items():
        data[k]=data.apply(v, axis=1)
        
        
    non_evaluable = non_evaluable or [] 
    
    lenght_dep_vars = lenght_dep_vars or ["N"]
    
    def is_evaluable(args):
        current = {k:v for (k,v) in args.items() if k not in lenght_dep_vars}
        previous = [{k:v for (k,v) in a.items() if k not in lenght_dep_vars} for a in non_evaluable]
        
        print(f"current: {current}")
        print(f"previous: {previous}")

        
        return not current in previous
       
    def single_experiment(argsv):
        if is_evaluable(argsv):
            outvals = f(**argsv)
            if np.isnan(outvals).any(): 
                non_evaluable.append(argsv)
                print(f"setting as not evaluable: {argsv}")
        else:
            outvals=[float("nan")]*len(outkeys)
        return outvals    
        
    
    for argsv in data.to_dict(orient="row"):    
        
        strio = io.StringIO()
        
           
        print(strdate())
        with open(log_file, 'a+') as logger:
            if verbose == False:
                with redirect_stdout(strio):
                    print(strdate())
                    outvals = single_experiment(argsv)
            else:
                outvals = single_experiment(argsv)   
            
            logger.write(strio.getvalue())
            
            
        result = result.append({**argsv, **dict(zip(list(outkeys), outvals))}, ignore_index=True)
        print(dict(result.iloc[-1]))
        print("\n\n")
    
    return result

def get_args(**kwargs): return kwargs

##




In [8]:
SEED = [1234+i*1234//2 for i in range(1,100,2)]
res = {}
#SEED = [1234+i*1234 for i in range(3)]
len(SEED)

50

In [9]:
## with evidence chain NON markovian ...
if False:
    endovarsize = 2    
    args = dict(N=range(3,11), dovar=[0], obsvar=[-1], seed=SEED, exovarsize=[6], method=["CCVE", "CCALPeps"])
    outkeys = ["time", "query_time"]+ sum([[f"lowerbound{i}", f"upperbound{i}"] for i in range(0,endovarsize)], [])


    results1 = run_experiments(run_chain_nonmarkovian, args, outkeys)
    res_file = f"{res_folder}{strdate()}_chain_nonmarkovian_prec.csv"
    results1.to_csv(res_file)

    res["chain_nonmarkovian_prec"] = results1

In [10]:
# with evidence HMM nonmarkovian 
if False:
    endovarsize = 2
    args = dict(N=range(3,11), dovar=[0], seed=SEED, exovarsize=[6], method=["CCVE","CCALPeps"])
    outkeys = ["time", "query_time"]+ sum([[f"lowerbound{i}", f"upperbound{i}"] for i in range(0,endovarsize)], [])
    fargs = dict(target = lambda t: int(((t["N"]-1)//2)*2),
                 obsvar = lambda t: int(t["N"]*2 - 1))

    lenght_dep_vars = ["N", "target", "obsvar"]

    results1 = run_experiments(run_hmm_nonmarkovian, args, outkeys, fargs, lenght_dep_vars=lenght_dep_vars)
    res_file = f"{res_folder}{strdate()}_hmm_nonmarkovian_prec.csv"
    results1.to_csv(res_file)

    res["hmm_nonmarkovian_prec"] = results1

In [None]:
# with evidence RevHMM nonmarkovian ... RUN
if True:
    endovarsize=2
    args = dict(N=[3,4,5,6], dovar=[0], seed=SEED, exovarsize=[6], method=["CCVE","CCALPeps"])
    outkeys = ["time", "query_time"]+ sum([[f"lowerbound{i}", f"upperbound{i}"] for i in range(0,endovarsize)], [])
    fargs = dict(target = lambda t: int(((t["N"]-1)//2)*2),
                 obsvar = lambda t: int(t["N"]*2 - 2))

    lenght_dep_vars = ["N", "target", "obsvar"]

    results1 = run_experiments(run_rhmm_nonmarkovian, args, outkeys, fargs, lenght_dep_vars=lenght_dep_vars)
    res_file = f"{res_folder}{strdate()}_rhmm_nonmarkovian_prec.csv"
    results1.to_csv(res_file)

    res["rhmm_nonmarkovian_prec"] = results1

{'N': [3, 4, 5, 6], 'dovar': [0], 'seed': [1851, 3085, 4319, 5553, 6787, 8021, 9255, 10489, 11723, 12957, 14191, 15425, 16659, 17893, 19127, 20361, 21595, 22829, 24063, 25297, 26531, 27765, 28999, 30233, 31467, 32701, 33935, 35169, 36403, 37637, 38871, 40105, 41339, 42573, 43807, 45041, 46275, 47509, 48743, 49977, 51211, 52445, 53679, 54913, 56147, 57381, 58615, 59849, 61083, 62317], 'exovarsize': [6], 'method': ['CCVE', 'CCALPeps']}
2020-05-28-18-30-29
{'N': 3, 'dovar': 0, 'seed': 1851, 'exovarsize': 6, 'method': 'CCVE', 'time': 713.574, 'query_time': 96.545, 'lowerbound0': 0.2044636199320751, 'upperbound0': 0.2044636199320751, 'lowerbound1': 0.7955363800679248, 'upperbound1': 0.7955363800679248, 'obsvar': 4.0, 'target': 2.0}



2020-05-28-18-30-31
{'N': 3, 'dovar': 0, 'seed': 1851, 'exovarsize': 6, 'method': 'CCALPeps', 'time': 1942.604, 'query_time': 1876.008, 'lowerbound0': 0.20464914497150388, 'upperbound0': 0.20464914497150427, 'lowerbound1': 0.7953508550284957, 'upperbound1': 0.

In [None]:
# with evidence Squares nonmarkovian 
if False:
    endovarsize = 2
    args = dict(N=list(range(3,11)), dovar=[0], seed=SEED, exovarsize=[6], method=["CCVE","CCALPeps"])
    outkeys = ["time", "query_time"]+ sum([[f"lowerbound{i}", f"upperbound{i}"] for i in range(0,endovarsize)], [])
    fargs = dict(target = lambda t: int(((t["N"]-1)//2)*2),
                 obsvar = lambda t: int(t["N"]*2 - 1))

    lenght_dep_vars = ["N", "target", "obsvar"]

    results1 = run_experiments(run_squares_nonmarkovian, args, outkeys, fargs, lenght_dep_vars=lenght_dep_vars)
    res_file = f"{res_folder}{strdate()}_squares_nonmarkovian_prec.csv"
    results1.to_csv(res_file)

    res["squares_nonmarkovian_prec"] = results1


In [None]:
(25 * 2 * 100)/60/60