# Benchmark libSps with simulated data

this should give us a feeling how everything scales for different dataspace sizes, amount of data, and number of dimensions

## Basic imports and settings first

In [None]:
import os
os.chdir("../build_benchmark")
from sps import VERSION, make_sps_index, MemSimpleVector, CachedSimpleVector
os.chdir("../notebooks")
import random
import time
import os
import time
import pickle
from numpy import std

from bokeh.plotting import figure
from bokeh.palettes import viridis
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, LabelSet, Whisker
from bokeh.transform import jitter
from bokeh.layouts import column, row, gridplot
output_notebook()

print("libSps version:", VERSION)

FAST = False

K = 1000
if FAST:
    K = 100
G = K * K

N_QUERY = 10 * K
REPEAT_QUERY = 10
REPEATS = 10


FILLS = [10 * K, 1 * G]
FILLS_DETAIL = [4**x for x in range(3, 11)]
FILL_NAMES = ["K", "G"]
AREAS = [10 * K, 1 * G]
AREAS_DETAIL = [4**x for x in range(3, 11)]
AREA_NAMES = ["K", "G"]

DIMS = [2, 3]
DIMS_DETAIL = [2, 3, 4, 5, 6]
RECTANGLES = [False, True]
STORAGE = ["Ram", "Cached"]

if FAST:
    FILLS_DETAIL = [4**x for x in range(3, 8)]
    AREAS_DETAIL = [4**x for x in range(3, 8)]
    DIMS_DETAIL = [2, 3, 4]

FILES = [".prefix_sums", ".coords", ".overlays", ".datasets", ".corners"]

COLOR_PALETTE = ["#0072B2", "#D55E00", "#009E73", "#E69F00", "#CC79A7", "#56B4E9", "#F0E442"]
SCATTER_PALETTE = ["x", "cross", "circle", "dash"]

OUTPUT_BACKEND = "svg"

## Setup testing functions

In [None]:
def fill_index(n, index, dims, is_ort, area):
    index.clear()
    t1 = time.perf_counter()
    for _ in range(n):
        if is_ort:
            pos_s = []
            pos_e = []
            for _ in range(0, 2):
                x = random.randrange(area)
                y = random.randrange(area)
                pos_s.append(min(x, y))
                pos_e.append(max(x, y))
            for _ in range(2, dims):
                pos_s.append(random.randrange(area))
            pos_e += pos_s[2:]
            index.add_point(pos_s, pos_e)
        else:
            pos_s = []
            for _ in range(0, dims):
                pos_s.append(random.randrange(area))
            index.add_point(pos_s)
    t2 = time.perf_counter()
    id = index.generate(verbosity=0)
    t3 = time.perf_counter()
    fill_time = (t2-t1)
    generate_tile = (t3-t2)
    # returns time in ms
    return fill_time, generate_tile, id

def query_index(index, id, dims, genome_size, n):
    ts = []
    for _ in range(REPEAT_QUERY):
        bins = []
        for _ in range(n):
            pos_s = []
            pos_e = []
            for _ in range(dims):
                x = random.randrange(genome_size)
                y = random.randrange(genome_size)
                pos_s.append(min(x, y))
                pos_e.append(max(x, y))
            bins.append((id, pos_s, pos_e))
        t1 = time.perf_counter()
        index.count_multiple(bins)
        t2 = time.perf_counter()
        # returns queries per ms
        ts.append( ( n / (t2-t1) ) / 1000 )
    return ts


def itr_order(*itr):
    if len(itr) == 0:
        yield ()
    else:
        for x in itr[0]:
            for t in itr_order(*itr[1:]):
                yield (x,) + t


def mean(xs):
    return sum(xs) / len(xs)
def plus_std(xs):
    return mean(xs) + std(xs)
def minus_std(xs):
    return mean(xs) - std(xs)

## Compute data

In [None]:
data = {}

for dims in DIMS:
    for rectangels in RECTANGLES:
        num_ort_dims = 2 if rectangels else 0
        for storage in STORAGE:
            index = make_sps_index(".benchmark_index", dims, num_ort_dims, storage)
            for area in AREAS:
                area_size = int(area ** ( 1 / (dims + num_ort_dims)))
                for fill in FILLS:
                    print("dims:", dims, "rectangles:", rectangels, "storage:", storage, "area:", area, "fill:", fill)
                    fill_times = []
                    generate_times = []
                    queries_per_ms_list = []
                    index_sizes = []
                    for _ in range(REPEATS):
                        fill_time, generate_time, idx = fill_index(fill, index, dims, rectangels, area_size)
                        fill_times.append(fill_time)
                        generate_times.append(generate_time)
                        queries_per_ms = query_index(index, idx, dims, area_size, N_QUERY)
                        queries_per_ms_list.extend(queries_per_ms)
                        index_sizes.append(index.get_size(idx) / 10**9) # in GB
                        index.clear()
                    data[(dims, rectangels, storage, area, fill, "fill_time")] = fill_times
                    data[(dims, rectangels, storage, area, fill, "generate_time")] = generate_times
                    data[(dims, rectangels, storage, area, fill, "queries_per_ms")] = queries_per_ms_list
                    data[(dims, rectangels, storage, area, fill, "index_size")] = index_sizes
            del index
            for file_suff in FILES:
                if os.path.isfile(".benchmark_index" + file_suff):
                    os.remove(".benchmark_index" + file_suff)

for area in AREAS:
    for storage in STORAGE:
        print("storage:", storage, "area:", area, "vector")
        if storage == "Ram":
            vec_d = MemSimpleVector("benchmark_index_c", True)
        else:
            vec_d = CachedSimpleVector("benchmark_index_c", True)
        for _ in range(area):
            vec_d.add(random.choice(range(area)))
        ts = []
        for _ in range(REPEATS):
            bins = []
            for _ in range(N_QUERY):
                bins.append(random.choice(range(area)))
            t1 = time.perf_counter()
            vec_d.get_multiple(bins)
            t2 = time.perf_counter()
            ts.append( ( N_QUERY / (t2 - t1) ) / 1000 )
        data[("simple_vec", area, storage, "queries_per_ms")] = ts

        del vec_d
        if storage == "Cached":
            os.remove("benchmark_index_c.vals")

with open("benchmark.pickle", "wb") as out_file:
    pickle.dump(data, out_file)

In [None]:
for dims, rectangels, storage, area in zip(DIMS, RECTANGLES, STORAGE, AREAS):
    num_ort_dims = 2 if rectangels else 0
    index = make_sps_index(".benchmark_index", dims, num_ort_dims, storage)
    area_size = int(area ** ( 1 / (dims + num_ort_dims)))
    for fill in FILLS_DETAIL:
        print("dims:", dims, "rectangles:", rectangels, "storage:", storage, "area:", area, "fill:", fill)
        fill_times = []
        generate_times = []
        queries_per_ms_list = []
        index_sizes = []
        for _ in range(REPEATS):
            fill_time, generate_time, idx = fill_index(fill, index, dims, rectangels, area_size)
            fill_times.append(fill_time)
            generate_times.append(generate_time)
            queries_per_ms = query_index(index, idx, dims, area_size, N_QUERY)
            queries_per_ms_list.extend(queries_per_ms)
            index_sizes.append(index.get_size(idx) / 10**9) # in GB
            index.clear()
        data[(dims, rectangels, storage, area, fill, "fill_time")] = fill_times
        data[(dims, rectangels, storage, area, fill, "generate_time")] = generate_times
        data[(dims, rectangels, storage, area, fill, "queries_per_ms")] = queries_per_ms_list
        data[(dims, rectangels, storage, area, fill, "index_size")] = index_sizes
    del index
    for file_suff in FILES:
        if os.path.isfile(".benchmark_index" + file_suff):
            os.remove(".benchmark_index" + file_suff)

for dims, rectangels, storage, fill in zip(DIMS, RECTANGLES, STORAGE, FILLS):
    num_ort_dims = 2 if rectangels else 0
    index = make_sps_index(".benchmark_index", dims, num_ort_dims, storage)
    for area in AREAS_DETAIL:
        area_size = int(area ** ( 1 / (dims + num_ort_dims)))
        print("dims:", dims, "rectangles:", rectangels, "storage:", storage, "area:", area, "fill:", fill)
        fill_times = []
        generate_times = []
        queries_per_ms_list = []
        index_sizes = []
        for _ in range(REPEATS):
            fill_time, generate_time, idx = fill_index(fill, index, dims, rectangels, area_size)
            fill_times.append(fill_time)
            generate_times.append(generate_time)
            queries_per_ms = query_index(index, idx, dims, area_size, N_QUERY)
            queries_per_ms_list.extend(queries_per_ms)
            index_sizes.append(index.get_size(idx) / 10**9) # in GB
            index.clear()
        data[(dims, rectangels, storage, area, fill, "fill_time")] = fill_times
        data[(dims, rectangels, storage, area, fill, "generate_time")] = generate_times
        data[(dims, rectangels, storage, area, fill, "queries_per_ms")] = queries_per_ms_list
        data[(dims, rectangels, storage, area, fill, "index_size")] = index_sizes
    del index
    for file_suff in FILES:
        if os.path.isfile(".benchmark_index" + file_suff):
            os.remove(".benchmark_index" + file_suff)

if True:
    for area, rectangels, storage, fill in zip(AREAS, RECTANGLES, STORAGE, FILLS):
        num_ort_dims = 2 if rectangels else 0
        for dims in DIMS_DETAIL:
            index = make_sps_index(".benchmark_index", dims, num_ort_dims, storage)
            area_size = int(area ** ( 1 / (dims + num_ort_dims)))
            print("dims:", dims, "rectangles:", rectangels, "storage:", storage, "area:", area, "fill:", fill)
            fill_times = []
            generate_times = []
            queries_per_ms_list = []
            index_sizes = []
            for _ in range(REPEATS):
                fill_time, generate_time, idx = fill_index(fill, index, dims, rectangels, area_size)
                fill_times.append(fill_time)
                generate_times.append(generate_time)
                queries_per_ms = query_index(index, idx, dims, area_size, N_QUERY)
                queries_per_ms_list.extend(queries_per_ms)
                index_sizes.append(index.get_size(idx) / 10**9) # in GB
                index.clear()
            data[(dims, rectangels, storage, area, fill, "fill_time")] = fill_times
            data[(dims, rectangels, storage, area, fill, "generate_time")] = generate_times
            data[(dims, rectangels, storage, area, fill, "queries_per_ms")] = queries_per_ms_list
            data[(dims, rectangels, storage, area, fill, "index_size")] = index_sizes
            del index
            for file_suff in FILES:
                if os.path.isfile(".benchmark_index" + file_suff):
                    os.remove(".benchmark_index" + file_suff)

with open("benchmark.pickle", "wb") as out_file:
    pickle.dump(data, out_file)

## Checkpoint

In [None]:
with open("benchmark.pickle", "rb") as in_file:
    data = pickle.load(in_file)

## Plot query speed

In [None]:
height = 300
f = figure(width=0, height=height, y_range=[0, 6000])#, y_axis_type="log")
f.output_backend = OUTPUT_BACKEND
f.xaxis.visible = False
f.yaxis.axis_label = "[Queries / ms]"
f.x(0,0)
f.toolbar_location = None
f.outline_line_color = None

idx = 0
fs = [f]

order_names = [RECTANGLES, STORAGE, FILL_NAMES[::-1]]
order = [RECTANGLES, STORAGE, FILLS[::-1]]

for area, area_name in zip(AREAS, AREA_NAMES):
    for dims in DIMS:
        cat_axis = [(("Rectangle" if data_type else "Point"), "R" if storage == "Ram" else "C", amount) for data_type, storage, amount in itr_order(*order_names)]

        f = figure(x_range=FactorRange(*cat_axis), y_range=fs[0].y_range, width=125, height=height)#, y_axis_type="log")
        f.xgrid.grid_line_color = None
        f.toolbar_location = None
        f.xaxis.axis_label = area_name + " - " + str(dims) + "D"
        f.yaxis.visible = False
        tops = [mean(data[(dims, data_type, storage, area, amount, "queries_per_ms")])
                for data_type, storage, amount in itr_order(*order)]
        f.vbar(x=cat_axis, 
                top=tops,
                bottom=0,
                width=0.8,
                color=COLOR_PALETTE[idx % 4])
        if False:
            f.x(x=[name for name, (data_type, storage, amount) in zip(cat_axis, itr_order(*order))
                        for _ in data[(dims, data_type, storage, area, amount, "queries_per_ms")]], 
                y=[x for data_type, storage, amount in itr_order(*order) 
                    for x in data[(dims, data_type, storage, area, amount, "queries_per_ms")]],
                color="grey", alpha=0.1)
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": cat_axis,
            "u": [plus_std(data[(dims, data_type, storage, area, amount, "queries_per_ms")])
                for data_type, storage, amount in itr_order(*order)],
            "l": [minus_std(data[(dims, data_type, storage, area, amount, "queries_per_ms")])
                for data_type, storage, amount in itr_order(*order)],
        }), line_width=2, level="annotation")
        error.upper_head.size=10
        error.lower_head.size=10
        f.add_layout(error)
        f.add_layout(LabelSet(x="x", y="y", text="text", y_offset=5,
                              source=ColumnDataSource(data={"x": cat_axis, 
                                                            "y": [plus_std(data[(dims, data_type, storage, area, amount, "queries_per_ms")])
                for data_type, storage, amount in itr_order(*order)], 
                                                            "text": [f'{int(round(x, 0)):,}' for x in tops]}),
                              angle=90, angle_units="deg", text_baseline="middle", text_color="black",
                              text_font_size="10px"))
        f.output_backend = OUTPUT_BACKEND
        f.outline_line_color = None
        fs.append(f)
        idx += 1


cat_axis = [(area, " ", "R" if storage == "Ram" else "C") for area in AREA_NAMES for storage in STORAGE]
f = figure(x_range=FactorRange(*cat_axis), y_range=[0, 35000], width=100, height=height)#, y_axis_type="log")
f.xgrid.grid_line_color = None
f.toolbar_location = None
f.xaxis.axis_label = "Array"
#f.yaxis.axis_label = "[Queries / ms]"
#f.yaxis.visible = False
tops = [mean(data[("simple_vec", area, storage, "queries_per_ms")]) for area in AREAS for storage in STORAGE]
f.vbar(x=cat_axis, 
        top=tops,
        bottom=0,
        width=0.8,
        color=COLOR_PALETTE[4])
if False:
    f.x(x=[(area_name, " ", "R" if storage == "Ram" else "C") 
                for area, area_name in zip(AREAS, AREA_NAMES) for storage in STORAGE
                for _ in data[("simple_vec", area, storage, "queries_per_ms")]], 
        y=[x for area in AREAS for storage in STORAGE 
                for x in data[("simple_vec", area, storage, "queries_per_ms")]],
        color="grey", alpha=0.1)

error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
    "b": cat_axis,
    "u": [plus_std(data[("simple_vec", area, storage, "queries_per_ms")]) for area in AREAS for storage in STORAGE],
    "l": [minus_std(data[("simple_vec", area, storage, "queries_per_ms")]) for area in AREAS for storage in STORAGE],
}), line_width=2, level="annotation")
error.upper_head.size=10
error.lower_head.size=10
f.add_layout(error)
f.add_layout(LabelSet(x="x", y="y", text="text", y_offset=5,
                        source=ColumnDataSource(data={"x": cat_axis, 
                                                    "y": [plus_std(data[("simple_vec", area, storage, "queries_per_ms")]) for area in AREAS for storage in STORAGE], 
                                                    "text": [f'{int(round(x, 0)):,}' for x in tops]}),
                        angle=90, angle_units="deg", text_baseline="middle", text_color="black",
                        text_font_size="10px"))
f.output_backend = OUTPUT_BACKEND
f.outline_line_color = None
fs.append(f)
idx += 1
show(gridplot([fs]))

In [None]:
def all_permutations(keys):
    if len(keys) == 0:
        yield ()
    else:
        for k in keys[0]:
            for t in all_permutations(keys[1:]):
                yield (k,) + t

cat_axis = ["higher dimension", "rectangles", "cached", "larger area", "more datapoints"]

def permutations_plot(key="queries_per_ms", y_axis="Speed increase", log_axis=False):
    if log_axis:
        f = figure(x_range=FactorRange("more datapoints", "rectangles", "cached", "higher dimension", "larger area"), width=200, height=300, y_axis_type="log", y_range=[1, 100])
    else:
        f = figure(x_range=FactorRange("more datapoints", "rectangles", "cached", "higher dimension", "larger area"), width=200, height=300)
    #f.toolbar_location = None
    f.yaxis.axis_label = y_axis


    all_variables = [DIMS, RECTANGLES, STORAGE, AREAS, FILLS]
    ys = []
    fold_change = []

    for idx in range(len(all_variables)):
        var_a = [*all_variables]
        var_b = [*all_variables]
        var_a[idx] = [all_variables[idx][0]]
        var_b[idx] = [all_variables[idx][1]]

        fold_change.append([])
        for a, b in zip(all_permutations(var_a), all_permutations(var_b)):
            for val_a, val_b in zip(data[a + (key,)], data[b + (key,)]):
                fold_change[-1].append(val_b / val_a)
        ys.append(mean(fold_change[-1]))
        
    
    f.vbar(x=cat_axis, 
            top=[max(1, y) for y in ys],
            bottom=[min(1, y) for y in ys],
            width=0.8,
            color=COLOR_PALETTE[:len(cat_axis)])

    
    for idx in range(len(all_variables)):
        f.x(x=jitter("x", width=0.25, range=f.x_range), 
            y="y",
            color="grey",
            source=ColumnDataSource({"x": [cat_axis[idx]] * len(fold_change[idx]), "y": fold_change[idx]}),
            alpha=0.1)
        
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": [cat_axis[idx]],
            "u": [plus_std(fold_change[idx])],
            "l": [minus_std(fold_change[idx])],
        }), line_width=2, level="annotation")
        error.upper_head.size=10
        error.lower_head.size=10
        f.add_layout(error)

    #for x, y in zip(cat_axis, fold_change):
    #    #f.x(x=[x]*len(y), y=y, color="grey", size=8)
    
        
    f.add_layout(LabelSet(x="x", y="y", text="text", y_offset="y_offset",
                            source=ColumnDataSource(data={"x": cat_axis, 
                                                        "y": [plus_std(xx) for xx in fold_change], 
                                                        "text": [f'{round(y, 3):,}' for y in ys],
                                                        "align": ["right" if y <= 1 and False else "left" for y in ys],
                                                        "y_offset": [-5 if y <= 1 and False else 5 for y in ys]}),
                            angle=90, angle_units="deg", text_baseline="middle", text_color="black",
                            text_align="align",
                            text_font_size="10px"))

    f.output_backend = OUTPUT_BACKEND
    f.outline_line_color = None
    show(f)
permutations_plot()

## Plot index size and build times

In [None]:
height_top = 130
height_bot = 200
text_font_size="8px"
width=100
f = figure(width=0, height=height_top, y_range=[1, 1000000], y_axis_type="log")
f.output_backend = OUTPUT_BACKEND
f.xaxis.visible = False
f.yaxis.axis_label = "Build Time [ms]"
f.x(0,0)
f.toolbar_location = None
f.outline_line_color = None

idx = 0
fs = [f]

f2 = figure(width=0, height=height_bot, y_range=[1000, 0.01], y_axis_type="log")
f2.output_backend = OUTPUT_BACKEND
f2.xaxis.visible = False
f2.yaxis.axis_label = "Size [MB]"
f2.x(0,0)
f2.toolbar_location = None
f2.outline_line_color = None
fs2 = [f2]

order_names = [FILL_NAMES, DIMS, STORAGE]
order = [FILLS, DIMS, STORAGE]

for area, area_name in zip(AREAS, AREA_NAMES):
    for data_type in RECTANGLES:
        cat_axis = [(amount_name, str(dims) + "D", "R" if storage == "Ram" else "C") for amount_name, dims, storage in itr_order(*order_names)]

        f = figure(x_range=FactorRange(*cat_axis), y_range=fs[0].y_range, width=width, height=height_top, y_axis_type="log")
        f.xgrid.grid_line_color = None
        f.toolbar_location = None
        f.xaxis.axis_label = area_name + " - " + ("Rectangle" if data_type else "Point")
        f.axis.visible = False
        tops = [mean(data[(dims, data_type, storage, area, amount, "generate_time")]) * 1000 
                 for amount, dims, storage in itr_order(*order)]
        f.vbar(x=cat_axis, 
                top=tops,
                bottom=0.001,
                width=0.8,
                color=COLOR_PALETTE[idx % 4])
        
        f.x(x=[name for name, (amount, dims, storage) in zip(cat_axis, itr_order(*order))
                    for _ in data[(dims, data_type, storage, area, amount, "generate_time")]], 
            y=[x * 1000 for amount, dims, storage in itr_order(*order) 
                 for x in data[(dims, data_type, storage, area, amount, "generate_time")]],
            color="grey", alpha=0.1)
        
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": [cat_axis],
            "u": [plus_std(data[(dims, data_type, storage, area, amount, "generate_time")]) for amount, dims, storage in itr_order(*order)],
            "l": [minus_std(data[(dims, data_type, storage, area, amount, "generate_time")]) for amount, dims, storage in itr_order(*order)],
        }), line_width=2, level="annotation")
        error.upper_head.size=10
        error.lower_head.size=10
        f.add_layout(error)

        f.add_layout(LabelSet(x="x", y="y", text="text", y_offset=5,
                              source=ColumnDataSource(data={"x": cat_axis, 
                                                            "y": tops, 
                                                            "text": [f'{int(round(x, 0)):,}' for x in tops]}),
                              angle=90, angle_units="deg", text_baseline="middle", text_color="black",
                              text_font_size=text_font_size))
        f.output_backend = OUTPUT_BACKEND
        f.outline_line_color = None
        fs.append(f)

        f = figure(x_range=FactorRange(*cat_axis), y_range=fs2[0].y_range, width=width, height=height_bot, y_axis_type="log")
        f.xgrid.grid_line_color = None
        f.toolbar_location = None
        f.xaxis.axis_label = area_name + " - " + ("Rect" if data_type else "Point")
        f.yaxis.visible = False
        tops = [sum(data[(dims, data_type, storage, area, amount, "index_size")]) / 
                len(data[(dims, data_type, storage, area, amount, "index_size")]) * 1000 
                for amount, dims, storage in itr_order(*order)]
        f.vbar(x=cat_axis, 
                top=tops,
                bottom=0.001,
                width=0.8,
                color=COLOR_PALETTE[idx % 4])
        f.x(x=[name for name, (amount, dims, storage) in zip(cat_axis, itr_order(*order))
                    for _ in data[(dims, data_type, storage, area, amount, "index_size")]], 
            y=[x * 1000 for amount, dims, storage in itr_order(*order) 
                 for x in data[(dims, data_type, storage, area, amount, "index_size")]],
            color="grey", alpha=0.1)
        
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": [cat_axis],
            "u": [plus_std(data[(dims, data_type, storage, area, amount, "index_size")]) for amount, dims, storage in itr_order(*order)],
            "l": [minus_std(data[(dims, data_type, storage, area, amount, "index_size")]) for amount, dims, storage in itr_order(*order)],
        }), line_width=2, level="annotation")
        error.upper_head.size=10
        error.lower_head.size=10
        f.add_layout(error)

        f.add_layout(LabelSet(x="x", y="y", text="text", y_offset=-5, text_align="right",
                              source=ColumnDataSource(data={"x": cat_axis, 
                                                            "y": tops, 
                                                            "text": [f'{round(x, 3):,}' for x in tops]}),
                              angle=90, angle_units="deg", text_baseline="middle", text_color="black",
                              text_font_size=text_font_size))
        f.output_backend = OUTPUT_BACKEND
        f.outline_line_color = None
        fs2.append(f)

        idx += 1

idx += 1
show(gridplot([fs, fs2]))

In [None]:
permutations_plot("generate_time", "Build time increase", False)
permutations_plot("index_size", "Index size increase", True)

## More detailed plots

In [None]:
lf = []
for key in ["queries_per_ms", "generate_time", "index_size"]:
    f = figure(x_axis_type="log", y_axis_type="log", title=key, width=300, height=300)
    f.xaxis.axis_label = "number of datapoints"
    for idx, (dims, rectangels, storage, area) in enumerate(zip(DIMS, RECTANGLES, STORAGE, AREAS)):
        xs = FILLS_DETAIL
        ys = [sum(data[(dims, rectangels, storage, area, fill, key)]) / 
              len(data[(dims, rectangels, storage, area, fill, key)]) 
              for fill in FILLS_DETAIL]
        legend = " ".join([str(x) for x in [dims, rectangels, storage, area]])
        c = COLOR_PALETTE[idx % len(COLOR_PALETTE)]
        f.line(x=xs, y=ys, color=c, legend_label=legend)
        
        xs = [fill for fill in FILLS_DETAIL
              for _ in data[(dims, rectangels, storage, area, fill, key)]]
        ys = [x for fill in FILLS_DETAIL
              for x in data[(dims, rectangels, storage, area, fill, key)]]
        #f.x(x=xs, y=ys, color=c, legend_label=legend, alpha=0.1)
        
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": FILLS_DETAIL,
            "u": [plus_std(data[(dims, rectangels, storage, area, fill, key)]) for fill in FILLS_DETAIL],
            "l": [minus_std(data[(dims, rectangels, storage, area, fill, key)]) for fill in FILLS_DETAIL],
        }), line_width=2, level="annotation", line_color=c)
        error.upper_head.size=10
        error.upper_head.line_color=c
        error.lower_head.size=10
        error.lower_head.line_color=c
        f.add_layout(error)
    lf.append(f)
show(gridplot([lf]), output_notebook=True)

lf = []
for key in ["queries_per_ms", "generate_time", "index_size"]:
    f = figure(x_axis_type="log", y_axis_type="log", title=key, width=300, height=300)
    f.xaxis.axis_label = "area size"
    for idx, (dims, rectangels, storage, fill) in enumerate(zip(DIMS, RECTANGLES, STORAGE, FILLS)):
        xs = AREAS_DETAIL
        ys = [sum(data[(dims, rectangels, storage, area, fill, key)]) /
              len(data[(dims, rectangels, storage, area, fill, key)]) 
              for area in AREAS_DETAIL]
        legend = " ".join([str(x) for x in [dims, rectangels, storage, fill]])
        c = COLOR_PALETTE[idx % len(COLOR_PALETTE)]
        f.line(x=xs, y=ys, color=c, legend_label=legend)

        xs = [area for area in AREAS_DETAIL
              for _ in data[(dims, rectangels, storage, area, fill, key)]]
        ys = [x for area in AREAS_DETAIL
              for x in data[(dims, rectangels, storage, area, fill, key)]]
        #f.x(x=xs, y=ys, color=c, legend_label=legend, alpha=0.1)
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": AREAS_DETAIL,
            "u": [plus_std(data[(dims, rectangels, storage, area, fill, key)]) for area in AREAS_DETAIL],
            "l": [minus_std(data[(dims, rectangels, storage, area, fill, key)]) for area in AREAS_DETAIL],
        }), line_width=2, level="annotation", line_color=c)
        error.upper_head.size=10
        error.upper_head.line_color=c
        error.lower_head.size=10
        error.lower_head.line_color=c
        f.add_layout(error)
    lf.append(f)
show(gridplot([lf]), output_notebook=True)

lf = []
for key in ["queries_per_ms", "generate_time", "index_size"]:
    f = figure(y_axis_type="log", title=key, width=300, height=300)
    f.xaxis.axis_label = "dimensions"
    for idx, (area, rectangels, storage, fill) in enumerate(zip(AREAS, RECTANGLES, STORAGE, FILLS)):
        xs = DIMS_DETAIL
        ys = [sum(data[(dims, rectangels, storage, area, fill, key)]) / 
              len(data[(dims, rectangels, storage, area, fill, key)]) 
              for dims in DIMS_DETAIL]
        legend = " ".join([str(x) for x in [dims, rectangels, storage, fill]])
        c = COLOR_PALETTE[idx % len(COLOR_PALETTE)]
        f.line(x=xs, y=ys, color=c, legend_label=legend)

        xs = [dims for dims in DIMS_DETAIL
              for _ in data[(dims, rectangels, storage, area, fill, key)]]
        ys = [x for dims in DIMS_DETAIL
              for x in data[(dims, rectangels, storage, area, fill, key)]]
        #f.x(x=xs, y=ys, color=c, legend_label=legend, alpha=0.1)
        error = Whisker(base="b", upper="u", lower="l", source=ColumnDataSource({
            "b": DIMS_DETAIL,
            "u": [plus_std(data[(dims, rectangels, storage, area, fill, key)]) for dims in DIMS_DETAIL],
            "l": [minus_std(data[(dims, rectangels, storage, area, fill, key)]) for dims in DIMS_DETAIL],
        }), line_width=2, level="annotation", line_color=c)
        error.upper_head.size=10
        error.upper_head.line_color=c
        error.lower_head.size=10
        error.lower_head.line_color=c
        f.add_layout(error)
    lf.append(f)
show(gridplot([lf]), output_notebook=True)