In [1]:
import warnings
warnings.filterwarnings('ignore')

from io import StringIO 
import sys

import json
import pandas as pd
import os
from vega import VegaLite
from ipywidgets import widgets
import numpy as np

from falx.chart import *
from eval_interface import FalxEvalInterface

DATA_DIR = os.path.join("..", "benchmarks")

np.random.seed(2019)

class Capturing(list):
    def __enter__(self):
        self._stdout = sys.stdout
        self._stderr = sys.stderr
        sys.stdout = self._stringio = StringIO()
        sys.stderr = self._stringio
        return self
    def __exit__(self, *args):
        self.extend(self._stringio.getvalue().splitlines())
        del self._stringio    # free up some memory
        sys.stdout = self._stdout
        sys.stderr = self._stderr
        
def render_vegalite(vis):
    # Render a visualization using vegalite
    VegaLite(vis.to_vl_obj()).display()
    
def print_log(output):
    print_flag = False
    for l in output:
        if print_flag:
            print(l)
        if "[info] #Candidates before getting the correct solution:" in l:
            print(l)
        if "====>" in l:
            print_flag = True
        

def run_synthesis(fname, num_samples):
    with open(os.path.join(DATA_DIR, fname), "r") as f:
        data = json.load(f)

    print("# run synthesize {}".format(fname))

    input_data = data["input_data"]
    vis = VisDesign.load_from_vegalite(data["vl_spec"], data["output_data"])
    trace = vis.eval()    
    result = FalxEvalInterface.synthesize(inputs=[input_data], full_trace=trace, num_samples=num_samples)
    print("====>")
    for p, vis in result:
        print("# table_prog:")
        print("  {}".format(p))
        print("# vis_spec:")
        vl_obj = vis.to_vl_obj()
        data = vl_obj.pop("data")["values"]
        print("    {}".format(vl_obj))
            
def run_wrapper(fname, num_samples=4):
    print("\n====> {}".format(fname))
    with Capturing() as output:
        run_synthesis(fname, num_samples)
    print_log(output)

%time run_wrapper('001.json')


====> 001.json
[info] #Candidates before getting the correct solution: 2
[info] #Candidates before getting the correct solution: 4
# table_prog:
  [[gatherNeg(@param0, ['-1', '-2', '-3'])], [gatherNeg(@param0, ['-1', '-2', '-3'])]]
# vis_spec:
    {'layer': [{'mark': {'type': 'line', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Quarter', 'type': 'nominal'}, 'y': {'field': 'Number of Units', 'type': 'quantitative'}, 'order': {'field': 'Quarter', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 0'}]}, {'mark': {'type': 'bar', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Quarter', 'type': 'nominal', 'sort': None}, 'y': {'field': 'Actual Profits', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 1'}]}], 'resolve': {}}
CPU times: user 986 ms, sys: 38.5 ms, total: 1.02 s
Wall time: 1.03 s


In [2]:
#simple cases
%time run_wrapper('001.json')
%time run_wrapper('002.json')
%time run_wrapper('003.json')
%time run_wrapper('005.json')
%time run_wrapper('006.json')
%time run_wrapper('007.json')
%time run_wrapper('009.json')
%time run_wrapper('010.json')
%time run_wrapper('011.json')
%time run_wrapper('012.json')
%time run_wrapper('022.json')
%time run_wrapper('024.json')
%time run_wrapper('025.json')
%time run_wrapper('027.json')
%time run_wrapper('031.json')
%time run_wrapper('033.json')
%time run_wrapper('035.json')
%time run_wrapper('040.json')
%time run_wrapper('041.json')
%time run_wrapper('042.json')
%time run_wrapper('045.json')
%time run_wrapper('046.json')
%time run_wrapper('047.json')
%time run_wrapper('053.json')
%time run_wrapper('056.json')
%time run_wrapper('058.json')


====> 001.json
[info] #Candidates before getting the correct solution: 6
[info] #Candidates before getting the correct solution: 8
# table_prog:
  [[gatherNeg(@param0, ['-1', '-2', '-3'])], [gatherNeg(@param0, ['-1', '-2', '-3'])]]
# vis_spec:
    {'layer': [{'mark': {'type': 'line', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Quarter', 'type': 'nominal'}, 'y': {'field': 'Number of Units', 'type': 'quantitative'}, 'order': {'field': 'Quarter', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 0'}]}, {'mark': {'type': 'bar', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Quarter', 'type': 'nominal', 'sort': None}, 'y': {'field': 'Actual Profits', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 1'}]}], 'resolve': {}}
CPU times: user 879 ms, sys: 29.4 ms, total: 909 ms
Wall time: 910 ms

====> 002.json
[info] #Candidates before getting the correct solution: 10
[info] #Candidates before getting the correct solution: 11
# table_prog:
  [[gather(@p

[info] #Candidates before getting the correct solution: 61
[info] #Candidates before getting the correct solution: 62
# table_prog:
  [[gatherNeg(@param0, ['-2', '-3', '-4'])], [gather(@param0, ['3', '4'])]]
# vis_spec:
    {'layer': [{'mark': {'type': 'bar', 'opacity': 0.7}, 'encoding': {'x': {'field': 'VALUE', 'type': 'nominal', 'sort': None}, 'y': {'field': 'Value', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 0'}]}, {'mark': {'type': 'point', 'opacity': 0.7}, 'encoding': {'x': {'field': 'DATA', 'type': 'nominal'}, 'y': {'field': 'VALUE', 'type': 'quantitative'}, 'color': {'field': 'KEY', 'type': 'nominal'}}, 'transform': [{'filter': 'datum.layer_id == 1'}]}], 'resolve': {}}
CPU times: user 755 ms, sys: 20.4 ms, total: 775 ms
Wall time: 776 ms

====> 040.json
[info] #Candidates before getting the correct solution: 64
[info] #Candidates before getting the correct solution: 85
# table_prog:
  [cumsum(@param0, 2), mutate(cumsum(@param0, 2), -, 4, 2)]
# vis_spec

In [3]:
%time run_wrapper('004.json') # ~1min
%time run_wrapper('013.json') # ~30s
%time run_wrapper('014.json') # ~2min
%time run_wrapper('015.json') # ~2min
%time run_wrapper('016.json') # ~30s
%time run_wrapper('017.json') # ~30s
%time run_wrapper('020.json') # ~1min
%time run_wrapper('029.json') # ~2min
%time run_wrapper('030.json') # ~2min
%time run_wrapper('034.json') # ~1min
%time run_wrapper('036.json') # ~1min
%time run_wrapper('043.json') # ~2min
%time run_wrapper('051.json') # ~1min
%time run_wrapper('055.json') # ~30s


====> 004.json
[info] #Candidates before getting the correct solution: 105
# table_prog:
  [gatherNeg(@param0, ['-1']), separate(gatherNeg(@param0, ['-1']), 2)]
# vis_spec:
    {'mark': 'line', 'encoding': {'x': {'field': 'VALUE', 'type': 'quantitative'}, 'y': {'field': 'Age Group', 'type': 'nominal'}, 'color': {'field': 'COL8588', 'type': 'nominal'}, 'column': {'field': 'COL8587', 'type': 'nominal'}, 'order': {'field': 'VALUE', 'type': 'quantitative'}}}
CPU times: user 21.6 s, sys: 701 ms, total: 22.3 s
Wall time: 22.3 s

====> 013.json
[info] #Candidates before getting the correct solution: 106
[info] #Candidates before getting the correct solution: 322
# table_prog:
  [[gatherNeg(@param0, ['-1', '-5'])], [mutate(@param0, +, 3, 2), gatherNeg(mutate(@param0, +, 3, 2), ['-1', '-3', '-4'])]]
# vis_spec:
    {'layer': [{'mark': {'type': 'bar', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Value', 'type': 'nominal'}, 'y': {'field': 'VALUE', 'type': 'quantitative'}, 'color': {'field': 'KE

In [4]:
#%time run_wrapper('008.json') # cannot solve
#%time run_wrapper('018.json') # cannot solve, filter with two predicates
#%time run_wrapper('019.json') # cannot solve, cut
#%time run_wrapper('021.json') # cannot solve
#%time run_wrapper('028.json') # cannot solve, cut
#%time run_wrapper('037.json') # cannot solve
#%time run_wrapper('039.json') # cannot solve 3 steps
#%time run_wrapper('044.json') # cannot solve, floating point issue
#%time run_wrapper('048.json') # cannot solve, division
#%time run_wrapper('049.json') # cannot solve, 3 steps
#%time run_wrapper('050.json') # cannot solve
#%time run_wrapper('057.json') # cannot solve
#%time run_wrapper('059.json') # cannot solve

In [5]:
%time run_wrapper('023.json') # new
%time run_wrapper('026.json') # new
%time run_wrapper('052.json') # new
%time run_wrapper('test_1.json') # new
%time run_wrapper('test_2.json') # new
%time run_wrapper('test_3.json') # new
%time run_wrapper('test_4.json') # new
%time run_wrapper('test_5.json') # new
%time run_wrapper('test_6.json') # new
%time run_wrapper('test_7.json') # new
%time run_wrapper('test_8.json') # new


====> 023.json


MemoryError: 


====> 026.json
CPU times: user 6min 22s, sys: 4.32 s, total: 6min 27s
Wall time: 6min 28s

====> 052.json
[info] #Candidates before getting the correct solution: 485
[info] #Candidates before getting the correct solution: 487
# table_prog:
  [[gatherNeg(@param0, ['-1'])], [gather(@param0, ['3', '5'])]]
# vis_spec:
    {'layer': [{'mark': {'type': 'bar', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Label', 'type': 'nominal'}, 'y': {'field': 'VALUE', 'type': 'quantitative'}, 'color': {'field': 'KEY', 'type': 'nominal'}}, 'transform': [{'filter': 'datum.layer_id == 0'}]}, {'mark': {'type': 'line', 'opacity': 0.7}, 'encoding': {'x': {'field': 'Label', 'type': 'nominal'}, 'y': {'field': 'VALUE', 'type': 'quantitative'}, 'color': {'field': 'KEY', 'type': 'nominal'}, 'order': {'field': 'Label', 'type': 'quantitative'}}, 'transform': [{'filter': 'datum.layer_id == 1'}]}], 'resolve': {}}
CPU times: user 1.82 s, sys: 48.9 ms, total: 1.87 s
Wall time: 1.87 s

====> test_1.json
[info] #Candidate

UnboundLocalError: local variable 'chart' referenced before assignment


====> test_4.json
[info] #Candidates before getting the correct solution: 524
# table_prog:
  [gatherNeg(@param0, ['-1'])]
# vis_spec:
    {'mark': 'bar', 'encoding': {'x': {'field': 'VALUE', 'type': 'quantitative'}, 'y': {'field': 'Period', 'type': 'nominal'}, 'color': {'field': 'KEY', 'type': 'nominal'}}}
CPU times: user 1.68 s, sys: 42.4 ms, total: 1.72 s
Wall time: 1.72 s

====> test_5.json
[info] #Candidates before getting the correct solution: 526
CPU times: user 18min 4s, sys: 20.5 s, total: 18min 24s
Wall time: 3h 3min

====> test_6.json
[info] #Candidates before getting the correct solution: 528
# table_prog:
  [gatherNeg(@param0, ['-1', '-2', '-3'])]
# vis_spec:
    {'mark': {'type': 'area', 'opacity': 0.8}, 'encoding': {'x': {'field': 'time', 'type': 'nominal'}, 'y': {'field': 'VALUE', 'type': 'quantitative', 'stack': None}, 'color': {'field': 'KEY', 'type': 'nominal'}, 'column': {'field': 'Station', 'type': 'nominal'}}}
CPU times: user 6.51 s, sys: 202 ms, total: 6.71 s
Wa

TypeError: unsupported operand type(s) for +: 'int' and 'str'


====> test_8.json
CPU times: user 5min 15s, sys: 7.06 s, total: 5min 22s
Wall time: 5min 22s
