# Top-k upper bounds

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json

from typing import Dict, List

import numpy as np
import pandas as pd

from analysis import selection
from postgres import explain
from transform import db, mosp

In [3]:
def read_workload(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, converters={"query": mosp.MospQuery.parse, "ues_bounds": json.loads, "query_result": json.loads})
    df = selection.best_query_repetition(df, ["label"], performance_col="query_rt_total")
    df = selection.reorder(df)
    df.rename(columns={"query_rt_total": "rt"}, inplace=True)
    df.set_index("label", inplace=True)
    df.ues_final_bound = df.ues_final_bound.astype("float")
    return df

In [4]:
df_top1 = read_workload("workloads/job-ues-results-top1.csv")
df_topk = read_workload("workloads/job-ues-results-topk.csv")

In [5]:
bounds_df = pd.merge(df_top1[["query", "rt", "ues_bounds", "ues_final_bound"]], df_topk[["query", "rt", "ues_bounds", "ues_final_bound"]],
                     left_index=True, right_index=True, suffixes=("_top1", "_topk"))
bounds_df[["ues_final_bound_top1", "ues_final_bound_topk"]]

Unnamed: 0_level_0,ues_final_bound_top1,ues_final_bound_topk
label,Unnamed: 1_level_1,Unnamed: 2_level_1
1a,1.132360e+05,950.0
1b,9.151500e+06,950.0
1c,6.900800e+04,950.0
1d,9.151500e+06,950.0
2a,1.333058e+07,1522172.0
...,...,...
32a,7.480087e+06,7480087.0
32b,7.480087e+06,7480087.0
33a,9.404592e+09,3744966.0
33b,3.134864e+09,1806332.0


In [6]:
bounds_df["bound_reduction"] = bounds_df["ues_final_bound_top1"] / bounds_df["ues_final_bound_topk"]
bounds_df["bound_reduction"].describe()

count     113.000000
mean      543.461335
std      1428.227092
min         1.000000
25%         8.757606
50%        31.294728
75%       312.403167
max      9633.157895
Name: bound_reduction, dtype: float64