# Analyze index size and build times

### load data first

In [10]:
import pickle


RESO = ["5000", "10000", "50000"]
SUBSAMPLES = ["1", "0.1", "0.01", "0.001", "0.0001"]
SUBSAMPLES_2 = ["1", "0.8", "0.6", "0.4", "0.2"]
ORGANISMS = ["drosophila_m", "e_coli", "s_cerevisiae", "t_brucei"]
ORGA_SUBSAMPLE = "t_brucei"
SUBSAMPLE_ORGA = ["even_1"]
NO_PARAMS =  [""] #, "-c.-m", "-q.-c", "-q.-c.-m"] 
PARAMS = ["", "-q", "-c", "-m", "-q.-c", "-q.-m", "-c.-m", "-q.-c.-m"]
PARAMS_TUPLE = [
    ("+m", "+c", "+q"), 
    ("+m", "+c", "-q"), 
    ("+m", "-c", "+q"), 
    ("-m", "+c", "+q"), 
    ("+m", "-c", "-q"), 
    ("-m", "+c", "-q"), 
    ("-m", "-c", "+q"), 
    ("-m", "-c", "-q")]

zip_list = list(zip(PARAMS, PARAMS_TUPLE))
zip_list.sort(key=lambda x: x[1], reverse=True)
PARAMS = [x for x, y in zip_list]
PARAMS_TUPLE = [y for x, y in zip_list]
REPLICATES = 100

with open("index_sizes_data.pickle", "rb") as in_file:
    data = pickle.load(in_file)

ORGANISMS.sort(key=lambda x: data[x]["genome_size"])
print(ORGANISMS)

OUTPUT_BACKEND = "svg"
#OUTPUT_BACKEND = "canvas"

PLOT_SIZE = 100*6
#PLOT_SIZE = 25*6

['e_coli', 's_cerevisiae', 't_brucei', 'drosophila_m']


### bokeh imports

In [3]:

from bokeh.plotting import figure
from bokeh.palettes import viridis
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, LabelSet, Whisker
from bokeh.models.tickers import FixedTicker
from bokeh.transform import jitter
from bokeh.layouts import column, row, gridplot
from math import pi
from bokeh.layouts import row

COLOR_PALETTE = ["#0072B2", "#D55E00", "#009E73", "#E69F00", "#CC79A7", "#56B4E9", "#F0E442"]
SCATTER_PALETTE = ["x", "circle", "cross", "dash", "triangle", "square", "asterisk"]

output_notebook()

### plot index size & build time by organism

In [4]:
def conf_x_axis_ticker(f, subsample_orga):
    def to_readable_genome_siz(x):
        x = int(x)
        if x < 1000:
            return str(x) + "bp"
        elif x < 1000000:
            return str(round(x / 1000, 2)) + "kbp"
        else:
            return str(round(x / 1000000, 2)) + "mbp"
    def to_readable_num_interactions(x):
        x = int(x)
        if x < 1000:
            return str(x)
        elif x < 1000000:
            return str(round(x / 1000, 2)) + "k"
        else:
            return str(round(x / 1000000, 2)) + "m"
        
    f.xaxis.ticker = FixedTicker(ticks=sorted([data[orga]["genome_size"] for orga in ORGANISMS] + 
                                              [10**m for m in range(10)]),
                                 minor_ticks=[b*10**m for m in range(10) for b in range(1, 11)])
    # + [430 * 1000 * 1000, 1000 * 1000 * 1000, 1412 * 1000 * 1000, 2800 * 1000 * 1000])
    f.xaxis.major_label_overrides = {data[orga]["genome_size"]: orga + " (" + to_readable_genome_siz(data[orga]["genome_size"]) + "; " + to_readable_num_interactions(data[orga][subsample_orga]["num_unique_interactions"]) + ")" for orga in ORGANISMS} 
    #| {
    #    430 * 1000 * 1000: "rice 430mbp",
    #    1000 * 1000 * 1000: "chicken 1000mbp",
    #    1412 * 1000 * 1000: "zebrafish 1412mbp",
    #}


def plot_by_genome_size(key="index_size", parse_key=lambda x: x, conf_extra=lambda x: x,
                        subsample_orga=SUBSAMPLE_ORGA[0], y_range=(1000, 200000000), orgas=ORGANISMS,
                        param_and_reso_list=[(param, reso) for param in PARAMS for reso in RESO]): 
    fl = []
    f = figure(x_axis_type="log", y_axis_type="log", x_axis_label="genome size", y_axis_label=key, title="subsample: " + subsample_orga, y_range=y_range, frame_width=PLOT_SIZE, frame_height=PLOT_SIZE)
    conf_x_axis_ticker(f, subsample_orga)
    f.xaxis.major_label_orientation = pi/2
    conf_extra(f)
    pxs = list(set(params for params, reso in param_and_reso_list))
    rxs = list(set(reso for params, reso in param_and_reso_list))
    for idx, (params, reso) in enumerate(param_and_reso_list):
        idx_1 = pxs.index(params)
        idx_2 = rxs.index(reso)
        xs = [data[orga]["genome_size"] if data[orga][subsample_orga][params][reso]["build_status"] == "OK" else float("NaN") \
            for orga in orgas]
        ys = [parse_key(data[orga][subsample_orga][params][reso][key]) if data[orga][subsample_orga][params][reso]["build_status"] == "OK" else float("NaN") \
            for orga in orgas]
        
        legend_label=str(int(reso)//1000) + "kbp"
        f.scatter(xs, ys, marker=SCATTER_PALETTE[idx], line_color=COLOR_PALETTE[idx],
                legend_label=params + " | " + legend_label, size=10, fill_color=None)
        f.line(xs, ys, color=COLOR_PALETTE[idx], legend_label=params + " | " + legend_label, 
                #dash="dashed" if idx2 == 1 else "solid"
                )

    f.legend.title = "params | min res"
    f.legend.location = "top_left"
    f.output_backend = OUTPUT_BACKEND
    fl.append(f)
    show(row(fl), notebook_handle=True)

def conf_axis_bytes(f, y_axis=True, only_major=False):
    KB = 1
    MB = KB * 1000
    GB = MB * 1000
    TB = GB * 1000
    ticks = [KB, MB, 10*MB, 100*MB, GB, 10*GB, 100*GB, TB]
    minor_ticks = [x for t in [1, KB, MB, GB, TB] for x in [*range(t, t * 10, t), 
                                                            *range(10*t, t * 100, 10*t), 
                                                            *range(100*t, t * 1000, 100*t)]]
    tick = FixedTicker(ticks=ticks if not only_major else minor_ticks, 
                       minor_ticks=minor_ticks if not only_major else [])
    if only_major:
        label_overrides = {
            x*t: str(x) + n for n, t in zip(["", " kb", " mb", " gb", " tb"], [1, KB, MB, GB, TB]) for x in [
                                                    *range(10), 
                                                    *range(10, 100, 10), 
                                                    *range(100, 1000, 100)]
        }
    else:
        label_overrides = {KB: "1 kb", MB: "1 mb", 10*MB: "10 mb", 100*MB: "100 mb", GB: "1 gb", 
                                     10*GB: "10 gb", 100*GB: "100 gb", TB: "1 tb", 10*TB: "10 tb", 100*TB: "100 tb"}
    if y_axis:
        f.yaxis.ticker = tick
        f.yaxis.major_label_overrides = label_overrides
    else:
        f.xaxis.ticker = tick
        f.xaxis.major_label_overrides = label_overrides

def conf_y_axis_bytes(f):
    conf_axis_bytes(f, y_axis=True)
def conf_y_axis_bytes_detailed(f):
    conf_axis_bytes(f, y_axis=True, only_major=True)
def conf_x_axis_bytes(f):
    conf_axis_bytes(f, y_axis=False)

param_and_reso_list = [(NO_PARAMS[0], RESO[0]), (NO_PARAMS[0], RESO[1]), (NO_PARAMS[0], RESO[2])]#
#,(NO_PARAMS[1], RESO[1]), (NO_PARAMS[2], RESO[1])] # (NO_PARAMS[3], RESO[1])
plot_by_genome_size(conf_extra=conf_y_axis_bytes, subsample_orga="even_1", orgas=ORGANISMS, 
                    param_and_reso_list=param_and_reso_list)
#plot_by_genome_size(conf_extra=conf_y_axis_bytes, subsample_orga="even_2", orgas=ORGANISMS[1:], 
#                    param_and_reso_list=param_and_reso_list)


for orga in ORGANISMS:
    x = data[orga]["even_1"][NO_PARAMS[0]][RESO[0]]
    if x["build_status"] == "OK":
        print(str(round(int(data[orga]["genome_size"]) / 1000000, 2)) + "mbp", 
              str(round(int(x["index_size"]) / 1000000, 2)) + "gb", 
              str(int(x["index_size"])/int(data[orga]["genome_size"])) + "gb / mbp")

4.64mbp 0.13gb 0.027385077554284553gb / mbp
12.16mbp 0.26gb 0.021566647651723003gb / mbp
50.08mbp 10.04gb 0.20049335655517087gb / mbp
143.73mbp 94.07gb 0.6545421057492437gb / mbp


In [5]:
def conf_y_axis_time(f):
    MIN = 60
    HOUR = 60 * MIN
    DAY = 24 * HOUR
    f.yaxis.ticker = FixedTicker(ticks=[1, 10, MIN, 10*MIN, HOUR, 10*HOUR, DAY], 
                                minor_ticks=[*range(9), *range(10, MIN, 10), *range(2*MIN, 10*MIN, MIN), 
                                             *range(20*MIN, HOUR, 10*MIN), *range(2*HOUR, DAY, HOUR)])
    f.yaxis.major_label_overrides = {1: "1 sec", 10: "10 sec", MIN: "1 min", 10*MIN: "10 min", HOUR: "1 hour", 10*HOUR: "10 hours", DAY: "1 day"}
def conf_y_axis_time_detailed(f):
    MIN = 60
    HOUR = 60 * MIN
    DAY = 24 * HOUR
    f.yaxis.ticker = FixedTicker(ticks=[*range(9), *range(10, MIN, 10), *range(MIN, 10*MIN, MIN), 
                                             *range(10*MIN, HOUR, 10*MIN), *range(1*HOUR, DAY, HOUR)])
    f.yaxis.major_label_overrides = {x: str(x) + " sec" for x in [*range(10), *range(10, MIN, 10)]} | \
                                    {x*60: str(x) + " min" for x in [*range(10), *range(10, MIN, 10)]} | \
                                    {x*60*60: str(x) + " hour" for x in [*range(24)]} | \
                                    {x*60*60*24: str(x) + " day" for x in [*range(10)]}


def parse_time(s):
    mins, secs = s.split("m")
    secs = secs[:-1]
    return float(mins) * 60 + float(secs)

plot_by_genome_size(key="index_build_time", parse_key=parse_time, conf_extra=conf_y_axis_time,  subsample_orga="even_1",
                    y_range=(1, 60*60*24), param_and_reso_list=param_and_reso_list)
#plot_by_genome_size(key="index_build_time", parse_key=parse_time, conf_extra=conf_y_axis_time,  subsample_orga="even_2",
#                    y_range=(1, 60*60*24), param_and_reso_list=param_and_reso_list)

In [6]:
def to_readable_num_interactions(x):
    x = int(x)
    if x < 1000:
        return str(x)
    elif x < 1000000:
        return str(round(x / 1000, 2)) + "k"
    else:
        return str(round(x / 1000000, 2)) + "m"


def conf_y_axis_bytes_linear(f):
    KB = 1
    MB = KB * 1000
    GB = MB * 1000
    ticks = [x*MB*100 for x in range(10)] + [x*GB*10 for x in range(10)]
    minor_ticks = [x*GB for x in range(100)]
    tick = FixedTicker(ticks=ticks, 
                       minor_ticks=minor_ticks)
    label_overrides = {x*GB*10: str(x*10) + " gb" for x in range(10)} | {x*MB*100: str(x*100) + " mb" for x in range(10)}
    f.yaxis.ticker = tick
    f.yaxis.major_label_overrides = label_overrides

def plot_by_genome_size(key="index_size", parse_key=lambda x: x, conf_extra=conf_y_axis_bytes,
                        param_and_reso_list=[(param, reso) for param in PARAMS for reso in RESO],
                        subsample_orga=ORGA_SUBSAMPLE, subsamples=SUBSAMPLES, y_range=(4000000, 25000000),
                        height=PLOT_SIZE, x_axis=True):
    fl = [] # x_axis_type="log", y_axis_type="log"
    f = figure(x_axis_label="number of unique interactions", 
                y_axis_label=key, title="organism: "+ subsample_orga, frame_width=PLOT_SIZE, frame_height=height,
                y_range=y_range)
    if not conf_extra is None:
        conf_extra(f)
    for idx, (params, reso) in enumerate(param_and_reso_list):
            
        f.xaxis.ticker = FixedTicker(ticks=[data[subsample_orga][samples]["num_unique_interactions"] for samples in subsamples])
        f.xaxis.major_label_overrides = {data[subsample_orga][samples]["num_unique_interactions"]: str(int(float(samples)*100) if float(samples) >= 0.01 else float(samples)*100) + "%: " + to_readable_num_interactions(data[subsample_orga][samples]["num_unique_interactions"]) for samples in subsamples}
        f.xaxis.major_label_orientation = pi/4

        xs = [data[subsample_orga][samples]["num_unique_interactions"] for samples in subsamples]
        ys = [parse_key(data[subsample_orga][samples][params][reso][key]) if data[subsample_orga][samples][params][reso]["build_status"] == "OK" else float("NaN") \
            for samples in subsamples]
        
        legend_label=str(int(reso)//1000) + "kb "
        f.scatter(xs, ys, marker=SCATTER_PALETTE[idx % len(SCATTER_PALETTE)], line_color=COLOR_PALETTE[idx],
                legend_label=params + " | " + legend_label, size=10, fill_color=None)
        f.line(xs, ys, color=COLOR_PALETTE[idx], legend_label=params + " | " + legend_label)

    f.legend.title = "params | min res"
    f.legend.location = "top_left"
    f.output_backend = OUTPUT_BACKEND
    f.toolbar_location = "below"
    f.xaxis.visible = x_axis
    fl.append(f)
    show(row(fl), notebook_handle=True)
plot_by_genome_size(param_and_reso_list=param_and_reso_list, subsamples=SUBSAMPLES_2, 
                    height=PLOT_SIZE*2//3, x_axis=False, conf_extra=conf_y_axis_bytes_linear)
plot_by_genome_size(param_and_reso_list=param_and_reso_list, subsamples=SUBSAMPLES_2, y_range=(500000, 650000), 
                    height=PLOT_SIZE//3, conf_extra=conf_y_axis_bytes_linear)
plot_by_genome_size("index_build_time", parse_time, conf_extra=conf_y_axis_time,
                    param_and_reso_list=param_and_reso_list, subsamples=SUBSAMPLES_2, y_range=(60, 60*60))


In [7]:
def plot_by_params(key="index_size", parse_key=lambda x: x, conf_extra=conf_y_axis_bytes,
                   y_range=(100000, 10000000), subsample_orga=SUBSAMPLE_ORGA[0]):
    fl = []

    space = .75
    f = figure(x_axis_label="index parameters", y_axis_type="log",
                y_axis_label=key, x_range=FactorRange(*PARAMS_TUPLE),
                title="subsample:" + subsample_orga,
                frame_width=PLOT_SIZE, frame_height=PLOT_SIZE, y_range=y_range)
    for idx, reso in enumerate(RESO[::-1]):
        xs = [list(params_tuple) + [space*idx/len(RESO) - space/2] for params_tuple, params in zip(PARAMS_TUPLE, PARAMS) if data[ORGA_SUBSAMPLE][subsample_orga][params][reso]["build_status"] == "OK"]
        ys = [float(parse_key(data[ORGA_SUBSAMPLE][subsample_orga][params][reso][key])) for params in PARAMS if data [ORGA_SUBSAMPLE][subsample_orga][params][reso]["build_status"] == "OK"]

        #if len(fl) > 0:
        #    f.y_range = fl[0].y_range
        conf_extra(f)
        
        f.vbar(x=xs, top=ys, bottom=1, color=COLOR_PALETTE[2 - idx], legend_label=str(int(reso)//1000) + "kb", 
               width=space/len(RESO))

        #f.y_range.flipped = True 
        
    f.legend.title = "min res"
    f.legend.location = "top_left"
    f.output_backend = OUTPUT_BACKEND
    fl.append(f)
    show(row(fl), notebook_handle=True)

plot_by_params()
plot_by_params("index_build_time", parse_time, conf_extra=conf_y_axis_time_detailed, y_range=(60, 60*10))
#plot_by_params(subsample_orga=SUBSAMPLE_ORGA[1], y_range=(70000, 150000))

In [11]:
EXPORT = "export"
PROCESS = "process"
QUERY = "query"
GROUP_LIST = [
    PROCESS,
    EXPORT,
    QUERY,
]

GROUPS = {
    "bin_size": PROCESS,
    "render_area": PROCESS,
    "active_chroms": PROCESS,
    "axis_coords": PROCESS,
    "symmetry_setting": PROCESS,
    "bin_coords": PROCESS,
    "decay_coords": PROCESS,
    "flat_decay": PROCESS,
    "intersection_type_setting": PROCESS,
    "active_replicates_setting": PROCESS,
    "active_coverage": PROCESS,
    "coverage_values": QUERY,
    "bin_values": QUERY,
    "decay_values": QUERY,
    "flat_bins": PROCESS,
    "in_group_setting": PROCESS,
    "normalized_bins": PROCESS,
    "dist_dep_dec_normalized_bins": PROCESS,
    "decay_cds": EXPORT,
    "color_palette": PROCESS,
    "between_group_setting": PROCESS,
    "combined_bins": PROCESS,
    "colored_bins": EXPORT,
    "active_annotation": PROCESS,
    "annotation_values": QUERY,
    "annotation_cds": EXPORT,
    "annotation_color_palette": EXPORT,
    "active_annotation_cds": EXPORT,
    "heatmap_cds": EXPORT,
    "heatmap_export": EXPORT,
    "track_export": EXPORT,
    "scaled_bins": PROCESS,
    "ticks": PROCESS,
    "coverage_tracks": PROCESS,
    "rendered_palette": PROCESS,
    "anno_filters": PROCESS,
    "longest_common_substring": PROCESS,
    "canvas_size": PROCESS,
    "mapping_quality_setting": PROCESS,
    "directionality_setting": PROCESS,
    "ranked_slices_cds": EXPORT,
    "grid_seq_coverage": PROCESS,
    "radicl_coverage": PROCESS,
    "rna_associated_genes_filter": PROCESS,
    "rna_associated_background": PROCESS,
    "grid_seq_samples": PROCESS,
    "radicl_seq_samples": PROCESS,
    "dataset_id_per_repl": PROCESS,
    "active_chroms_length": PROCESS,
    "virtual4c_coords": PROCESS,
    "flat_4c": PROCESS,
    "ploidy_corr": PROCESS,
    "ice_coords": PROCESS,
    "coverage_track_precurser": PROCESS,
    "sample_coords": PROCESS,
    "": PROCESS,
    "bin_coord_cds": EXPORT,
}

def avg_runtime_by_group(d, reps=range(REPLICATES)): #range(REPLICATES)):
    ret = {x: 0 for x in GROUP_LIST}
    for r in reps:
        for key, value in d[r].items():
            if GROUPS[key] in ret:
                #value.sort()
                #ret[GROUPS[key]] += value[len(value)//2]
                ret[GROUPS[key]] += sum(value) / len(value)
    return ret

def runtime_by_group(d, reps=range(REPLICATES)): #range(REPLICATES)):
    ret = []
    for r in reps:
        for zipped_values in zip(*d[r].values()):
            ret.append({x: 0 for x in GROUP_LIST})
            for key, value in zip(d[r].keys(), zipped_values):
                if GROUPS[key] in ret[-1]:
                    ret[-1][GROUPS[key]] += value
    return ret

def get_median(xs):
    return list(sorted(xs))[len(xs)//2]

#print(runtime_by_group(data[ORGANISMS[0]][SUBSAMPLE_ORGA[0]][NO_PARAMS[0]][RESO[2]]["query_times"])[:10])

def total_runtimes(d, percentile_to_exclude=0.1, reps=[0]): # range(REPLICATES)):
    runtimes = [sum(l) for r in reps for l in zip(*d[r].values())]
    runtimes.sort()
    runtimes = runtimes[int(len(runtimes)*(percentile_to_exclude)):int(len(runtimes)*(1-percentile_to_exclude))]
    return runtimes

def plot_runtime_orgas(data, subsample=SUBSAMPLE_ORGA[0], params=NO_PARAMS[0], reso=RESO[2]):
    f = figure(x_axis_label="genome size", y_axis_label="runtime (ms)", title=subsample + " " + params + " " + reso,
               x_axis_type="log")

    conf_x_axis_ticker(f, subsample)
    f.xaxis.major_label_orientation = pi/4

    def pick_data(orga):
        return data[orga][subsample][params][reso]

    orga_list = [orga for orga in ORGANISMS if pick_data(orga)["build_status"] == "OK" and pick_data(orga)["query_status"] == "OK"]

    avg_runtimes = [avg_runtime_by_group(pick_data(orga)["query_times"]) for orga in orga_list]

    for idx, group in list(enumerate(GROUP_LIST))[::-1]:
        xs = [data[orga]["genome_size"] for orga in orga_list]
        ys1 = [sum(x[g] for g in GROUP_LIST[:idx])/1000 for x in avg_runtimes]
        ys2 = [sum(x[g] for g in GROUP_LIST[:idx+1])/1000 for x in avg_runtimes]
        
        f.varea(xs, ys1, ys2, color=COLOR_PALETTE[idx], legend_label=group)

    f.legend.title = "min res | params"
    f.legend.location = "bottom_right"
    show(f)

# for reso in RESO:
#     for params in NO_PARAMS:
#         plot_runtime_orgas(data, params=params, reso=reso)

In [12]:
import pandas as pd
from bokeh.transform import dodge

def plot_runtime_subsample(pick_data, samples_list, y_range=(32.5, 36.5), height=PLOT_SIZE, x_axis=True, 
                           x_label="interactions", color_idx=1, granuality=0.1):
    x_range_list = []
    for samples_key, samples_name in samples_list:
        x_range_list.append(samples_name)
    f = figure(x_axis_label=x_label, y_axis_label="runtime (ms)" if x_axis else "", 
               x_range=FactorRange(*x_range_list), frame_width=PLOT_SIZE, frame_height=height, y_range=y_range)#, y_axis_type="log")
    f.xaxis.major_label_orientation = pi/2
    f.xaxis.visible = x_axis
    f.yaxis.ticker.desired_num_ticks = 3
    if not x_axis:
        f.yaxis.ticker.num_minor_ticks = 0
    f.toolbar_location = "below"
    f.output_backend = OUTPUT_BACKEND

    for samples_key, samples_name in samples_list:
        if pick_data(samples_key)["build_status"] == "OK" and pick_data(samples_key)["query_status"] == "OK":
            ys = [sum([v/1000 for k, v in x.items() if k in GROUP_LIST]) for x in runtime_by_group(pick_data(samples_key)["query_times"])]
            x = samples_name
            #print(x, "N=", len(ys))
            c = COLOR_PALETTE[color_idx]

            ys_min = min(ys)
            ys_max = max(ys)
            ys_start = int(ys_min / granuality) * granuality
            vs = [0] * (int((ys_max - ys_start) / granuality) + 1)
            for y in ys:
                idy = int((y-ys_start)/granuality)
                vs[idy] += 1
            #max_val = max(vs) * 2.25
            max_val = len(ys) / 2


            xs = [(x, 0)] + [(x, -v/max_val) for v in vs] + [(x, 0)]
            xs2 = [(x, 0)] + [(x, v/max_val) for v in vs] + [(x, 0)]
            ys_2 = [ys_start] + [k*granuality+granuality/2 + ys_start for k in range(len(vs) + 1)]

            f.patch(xs + xs2[::-1], 
                    ys_2 + ys_2[::-1], 
                    fill_color=c, line_color="black", line_width=0.2)
            f.dot(x=[x], y=[get_median(ys)], color="black", size=20)

    show(f)

def pick_data(samples):
    return data[ORGA_SUBSAMPLE][samples][NO_PARAMS[0]][RESO[1]]
samples_list = [(samples, str(int(float(samples)*100) if float(samples) >= 0.01 else float(samples)*100) + "%: " + to_readable_num_interactions(data[ORGA_SUBSAMPLE][samples]["num_unique_interactions"])) for samples in SUBSAMPLES_2 
                if pick_data(samples)["build_status"] == "OK" and pick_data(samples)["query_status"] == "OK"][::-1]
plot_runtime_subsample(pick_data, samples_list, y_range=(63, 250), height=PLOT_SIZE//6, x_axis=False)
plot_runtime_subsample(pick_data, samples_list, y_range=(58, 63), height=PLOT_SIZE*5//6)

def pick_data(samples):
    return data[ORGA_SUBSAMPLE][samples][NO_PARAMS[0]][RESO[0]]
plot_runtime_subsample(pick_data, samples_list, y_range=(66, 250), height=PLOT_SIZE//6, x_axis=False, color_idx=0)
plot_runtime_subsample(pick_data, samples_list, y_range=(58, 66), height=PLOT_SIZE*5//6, color_idx=0)

def pick_data(samples):
    return data[ORGA_SUBSAMPLE][samples][NO_PARAMS[0]][RESO[2]]
plot_runtime_subsample(pick_data, samples_list, y_range=(36, 250), height=PLOT_SIZE//6, x_axis=False, color_idx=2)
plot_runtime_subsample(pick_data, samples_list, y_range=(32, 36), height=PLOT_SIZE*5//6, color_idx=2)

def pick_data(samples):
    return data[ORGA_SUBSAMPLE][SUBSAMPLE_ORGA[0]][samples][RESO[1]]
samples = [(a, "   ".join([b_[0] for b_ in b]))for a,b in zip(PARAMS, PARAMS_TUPLE)]
plot_runtime_subsample(pick_data, samples, y_range=(65, 250), 
                       x_axis=False, height=PLOT_SIZE//6, x_label="active filters")
plot_runtime_subsample(pick_data, samples, y_range=(40, 65), 
                       height=PLOT_SIZE*5//6, x_label="active filters")

def pick_data(samples):
    return data[samples[0]][samples[1]][NO_PARAMS[0]][RESO[1]]
samples = [(("e_coli", "even_1"), "E. coli"), (("s_cerevisiae", "even_1"), "S. cereviaiae"), 
           (("t_brucei", "even_1"), "T. brucei"), (("drosophila_m", "even_1"), "D. melanogaster"), ]
plot_runtime_subsample(pick_data, samples, y_range=(150, 400), x_axis=False, height=PLOT_SIZE//6, x_label="genome size")
plot_runtime_subsample(pick_data, samples, y_range=(49, 150), height=PLOT_SIZE*5//6, x_label="genome size", 
                       granuality=0.5)