In [1]:
import numpy as np

import pandas as pd
import ast
from typing import List, Tuple

def load_val_lists(csv_path: str, row_idx: int = 0) -> Tuple[List[float], List[float]]:
    df = pd.read_csv(csv_path)

    labels_str = df.loc[row_idx, "val_labels"]
    preds_str  = df.loc[row_idx, "val_preds"]

    val_labels = ast.literal_eval(labels_str)
    val_preds  = ast.literal_eval(preds_str)

    return val_labels, val_preds

def percentile_qerror(y_true, y_pred, percentile, min_runtime=1e-3):
    y_true = np.asarray(y_true, float)
    y_pred = np.asarray(y_pred, float)

    mask = (y_true > 0) & (y_pred > 0)
    y_true = y_true[mask]
    y_pred = y_pred[mask]

    qerr = np.maximum(y_true, y_pred) / np.maximum(
        np.minimum(y_true, y_pred),
        min_runtime,   
    )

    return float(np.percentile(qerr, percentile))


def compute_first_three_columns(val_labels, val_preds):
    y_true = np.asarray(val_labels, float)
    y_pred = np.asarray(val_preds, float)

    mask = (y_true > 0) & (y_pred > 0)
    y_true = y_true[mask]
    y_pred = y_pred[mask]

    mae = float(np.mean(np.abs(y_pred - y_true)))
    apes = np.abs((y_true - y_pred) / y_true) * 100
    med_qerr = percentile_qerror(y_true, y_pred, 50)
    p99_qerr = percentile_qerror(y_true, y_pred, 99)

    return {
        "MAE": mae,
        "Med_QErr": med_qerr,
        "P99_QErr": p99_qerr,
    }

In [2]:
Databases = ["TPCDS", "SSB", "IMDB"]
Types = ["Baseline", "Local"]

for Database in Databases:
    for Type in Types:
        try:
            val_labels, val_preds = load_val_lists(f"./Results/{Type}_{Database}/test_{Type}_{Database}_MSELoss_3090_{Type}.csv")
            metrics = compute_first_three_columns(val_labels, val_preds)
            print(f"{Database} {Type} {metrics}")
        except:
            continue


TPCDS Baseline {'MAE': 26.849326768046932, 'Med_QErr': 4.324217709070258, 'P99_QErr': 239.79635472940865}
TPCDS Local {'MAE': 13.57109046753993, 'Med_QErr': 2.9016602831312257, 'P99_QErr': 102.84955683506543}
SSB Baseline {'MAE': 37.839113728055416, 'Med_QErr': 3.7848845237605335, 'P99_QErr': 324.9060228831072}
SSB Local {'MAE': 7.752025321594574, 'Med_QErr': 1.9815465920057969, 'P99_QErr': 38.21646919779795}
IMDB Baseline {'MAE': 4.241209208396089, 'Med_QErr': 8.546962274151381, 'P99_QErr': 33.74164515844489}
IMDB Local {'MAE': 3.980900615819642, 'Med_QErr': 8.21284045027615, 'P99_QErr': 28.502782521063764}
