In [5]:
import argparse
import json
import os
import pandas as pd
from pprint import pprint
import numpy as np
from vega import VegaLite
from ipywidgets import widgets, interact, interactive, fixed, interact_manual
from IPython.display import clear_output

# default directories
OUTPUT_DIR = os.path.join(".")
MAX_TIME = 600

def parse_log_content(exp_id, data_id, lines):
    """parse a log file"""
    status = {
        "exp_id": exp_id,
        "data_id": data_id,
        "num_candidates": [],
        "table_prog": None,
        "vis_spec": None,
        "time": MAX_TIME
    }
    for i, l in enumerate(lines):
        if l.startswith("# candidates before getting the correct solution: "):
            status["num_candidates"].append(int(l.split(":")[-1].strip()) + 1)
        if l.startswith("# time used (s): "):
            status["time"] = float(l.split(":")[-1].strip())
        if l.startswith("# table_prog:") and len(lines) > i + 1:
            #status["table_prog"] = lines[i + 1]
            pass
        if l.startswith("# vis_spec:") and len(lines) > i + 1:
            #status["vis_spec"] = lines[i + 1]
            pass
    status["solved"] = False if status["time"] >= MAX_TIME else True
    return status

def read_log_result_list(log_dir_list, titles=None):
    all_result = []
    for i, log_dir in enumerate(log_dir_list):
        for fname in os.listdir(log_dir):
            if not fname.endswith(".log"): continue
            fpath = os.path.join(log_dir, fname)
            title = log_dir if titles is None else titles[i]
            with open(fpath) as f:
                status = parse_log_content(title, fname.split(".")[0], f.readlines())
                all_result.append(status)
    all_result.sort(key=lambda x:x["time"])
    return all_result

def print_solving_time(log_dir_list, titiles=None):
    
    all_result = read_log_result_list(log_dir_list, titiles)
    
    plot_data = []
    for i in np.linspace(0.01, MAX_TIME, 1000):
        cnt = {}
        for r in all_result:
            if r["exp_id"] not in cnt:
                cnt[r["exp_id"]] = 0
            if r["solved"] and r["time"] > 0 and r["time"] < i:
                cnt[r["exp_id"]] += 1
        for exp_id in cnt:
            plot_data.append({"time": i, "cnt": cnt[exp_id], "exp_id": exp_id })
    
    print_res = [(d["data_id"], d["exp_id"], d["time"]) for d in all_result]
    cases = {}

    
    for d in all_result:
        if d["data_id"] not in cases:
            cases[d["data_id"]] = {}
        cases[d["data_id"]][d["exp_id"]] = d["time"]
    
    print("{}\t\t{}\t{}\t{}".format("data_id", "falx", "morpheus", "speedup"))
    for c in cases:
        print("{}\t{}\t{}\t{}".format(c, cases[c]["falx"], cases[c]["morpheus"], (cases[c]["morpheus"] / cases[c]["falx"])))
    for c in cases:
        print("{}".format((cases[c]["morpheus"] / cases[c]["falx"])))
    
log_folders = ["select a folder"] + [dname for dname in os.listdir(OUTPUT_DIR) 
               if os.path.isdir(os.path.join(OUTPUT_DIR, dname)) and dname.startswith("exp")]

print_solving_time(["trinity_exp_falx_20190918_150706", "trinity_exp_morpheus_20190918_151605"], 
                   titiles=["falx", "morpheus"])

data_id		falx	morpheus	speedup
trinity-025	0.1765	0.7799	4.418696883852692
trinity-006	0.4124	0.6848	1.6605237633365664
trinity-052	0.4153	0.6686	1.6099205393691307
trinity-010	0.4273	0.7135	1.6697870348701147
trinity-053	1.1871	1.35	1.1372251705837757
trinity-008	3.0247	527.9153	174.53476377822594
trinity-004	3.7885	600	158.37402665962782
trinity-003	4.7185	125.0648	26.505202924658263
trinity-040	42.0311	305.8659	7.277132884935202
trinity-014	50.7521	44.4793	0.8764031439093162
trinity-049	51.7927	184.2254	3.556976176179268
trinity-039	600	600	1.0
4.418696883852692
1.6605237633365664
1.6099205393691307
1.6697870348701147
1.1372251705837757
174.53476377822594
158.37402665962782
26.505202924658263
7.277132884935202
0.8764031439093162
3.556976176179268
1.0
