In [1]:
import os
import random
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

sys.path.append("..")
import warnings

import wandb
from otc.metrics.metrics import effective_spread
from scipy.stats import wilcoxon
from tqdm.auto import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# set here globally
EXCHANGE = "cboe" # "ise"
MODELS = ["fttransformer", "classical"] 
SUBSET = "test"  # "all"
STRATEGY = "transfer" # "supervised" #  


In [3]:
KEY = f"{EXCHANGE}_{STRATEGY}_{SUBSET}"
DATASET = f"fbv/thesis/{EXCHANGE}_{STRATEGY}_raw:latest"

os.environ["GCLOUD_PROJECT"] = "flowing-mantis-239216"

run = wandb.init(project="thesis", entity="fbv")

# load unscaled data
artifact = run.use_artifact(DATASET)  # type: ignore
data_dir = artifact.download()

# load results
result_dirs = []
for model in MODELS:
    results = f"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest"
    artifact = run.use_artifact(results)  # type: ignore
    result_dir = artifact.download()
    result_dirs.append(result_dir)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkarelze[0m ([33mfbv[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact ise_supervised_raw:latest, 2589.45MB. 3 files... 
[34m[1mwandb[0m:   3 of 3 files downloaded.  
Done. 0:0:3.4
[34m[1mwandb[0m: Downloading large artifact ise_fttransformer_supervised_test:latest, 72.26MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:1.8
[34m[1mwandb[0m: Downloading large artifact ise_classical_supervised_test:latest, 499.12MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:1.1


In [4]:
# p. 35-38
COLUMNS = [
    "buy_sell",
    "EXPIRATION",
    "QUOTE_DATETIME",
    "TRADE_SIZE",
    "TRADE_PRICE",
    "ask_ex",
    "bid_ex",
    "myn",
    "OPTION_TYPE",
    "issue_type",
]


if SUBSET == "all":
    train = pd.read_parquet(
        Path(data_dir, "train_set"), engine="fastparquet", columns=COLUMNS
    )
    val = pd.read_parquet(
        Path(data_dir, "val_set"), engine="fastparquet", columns=COLUMNS
    )
    test = pd.read_parquet(
        Path(data_dir, "test_set"), engine="fastparquet", columns=COLUMNS
    )
    eval_data = pd.concat([train, val, test])
    del train, val, test

elif SUBSET == "test":
    eval_data = pd.read_parquet(
        Path(data_dir, "test_set"), engine="fastparquet", columns=COLUMNS
    )


results = []
for i, model in tqdm(enumerate(MODELS)):
    result = pd.read_parquet(Path(result_dirs[i], "results"), engine="fastparquet")
    result.columns = pd.MultiIndex.from_product([[model], result.columns])
    results.append(result)

results_data = pd.concat(results, axis=1, names=MODELS)

assert len(eval_data) == len(results_data)

X_print = eval_data

del results


2it [00:04,  2.06s/it]


In [5]:
# FIXME: select a subset of results for testing.
results_data = results_data[
    [
        ("fttransformer", "fttransformer(classical)"),
        ("fttransformer", "fttransformer(classical-size)"),
        ("fttransformer", "fttransformer(ml)"),        
        #("gbm", "gbm(classical)"),
        # ("gbm", "gbm(classical-size)"),
        #("gbm", "gbm(ml)"),
        # ("gbm", "gbm(classical-retraining)"),
        # ("gbm", "gbm(classical-size-retraining)"),
        # ("gbm", "gbm(ml-retraining)"),
        # ("gbm", "gbm(semi-classical)"),
        # ("gbm",'gbm(semi-classical-retraining)'),
        ("classical", "tick(ex)"),
        ("classical", "quote(ex)"),
        ("classical", "lr(ex)"),
        ("classical", "emo(ex)"),
        ("classical", "clnv(ex)"),
        ("classical", "quote(best)->quote(ex)"),
        (
            "classical",
            "trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)",
        ),
    ]
]


In [6]:
results_data


Unnamed: 0_level_0,fttransformer,fttransformer,fttransformer,classical,classical,classical,classical,classical,classical,classical
Unnamed: 0_level_1,fttransformer(classical),fttransformer(classical-size),fttransformer(ml),tick(ex),quote(ex),lr(ex),emo(ex),clnv(ex),quote(best)->quote(ex),trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
39342191,-1,-1,-1,1,-1,-1,-1,-1,-1,-1
39342190,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
39342189,1,1,1,1,1,1,1,1,1,1
39342188,1,-1,-1,-1,-1,-1,-1,-1,-1,-1
39342187,-1,-1,-1,1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...
49203742,-1,-1,-1,-1,1,1,1,1,1,1
49203743,1,1,1,-1,1,1,-1,1,1,1
49203744,1,1,-1,-1,-1,-1,-1,-1,-1,-1
49203745,1,1,1,-1,1,1,1,1,1,1


In [7]:
LUT = {
    "Trade_Size(ex)->Quote(Best)->Depth(Best)->Quote(Ex)->Depth(Ex)->Rev_Tick(All)": "\gls{GBM}",
    "(Ex)": " (Ex)",
    "(Best)": " (Best)",
    "(Classical)": " (Classical)",
    "(Classical-Size)": " (Classical, Size)",
    "Rev_": "Rev. ",
    "Trade_Size": "Trade Size",
    "Depth": "Depth",
    "->": " $\\to$ ",
    "Lr": "\gls{LR}",
    "Emo": "\gls{EMO}",
    "Clnv": "\gls{CLNV}",
    "OPTION_TYPE": "Option Type",
    "_": "$\_",
    "Gbm": "\gls{GBM}",
}

LUT_INDEX = {
    "OPTION_TYPE": "Option Type",
    "issue_type": "Security Type",
    "TRADE_SIZE_binned": "Trade Size",
    "year_binned": "Year",
    "ttm_binned": "Time to Maturity",
    "myn_binned": "Moneyness",
    "prox_q_binned": "Location to Quote",
    "all": "All trades",
}


def cell_str(x):
    x = x.title()
    for orig, sub in LUT.items():
        x = x.replace(orig, sub)
    # title-case everything
    return x


def highlight_max(s, props=""):
    return np.where(s == np.nanmax(s.values), props, "")


In [8]:
def set_tex_style(styler, caption, label, bold_axis=1):
    res = styler.set_caption(caption)

    res = (
        res.apply(highlight_max, props="font-weight:bold;", axis=bold_axis)
        .format(precision=4, decimal=".", thousands=",", escape=False, hyperlinks=None)
        .format_index(cell_str, axis=0)
        .format_index(cell_str, axis=1)
        .to_latex(
            f"{label}.tex",
            siunitx=True,
            position_float="centering",
            hrules=True,
            clines="skip-last;data",
            label="tab:" + label,
            caption=caption,
            convert_css=True,
        )
    )
    return res


In [9]:
classifiers = results_data.columns.tolist()
criterions = list(LUT_INDEX)


## Unclassified by method

In [10]:
results_data.head()


Unnamed: 0_level_0,fttransformer,fttransformer,fttransformer,classical,classical,classical,classical,classical,classical,classical
Unnamed: 0_level_1,fttransformer(classical),fttransformer(classical-size),fttransformer(ml),tick(ex),quote(ex),lr(ex),emo(ex),clnv(ex),quote(best)->quote(ex),trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
39342191,-1,-1,-1,1,-1,-1,-1,-1,-1,-1
39342190,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
39342189,1,1,1,1,1,1,1,1,1,1
39342188,1,-1,-1,-1,-1,-1,-1,-1,-1,-1
39342187,-1,-1,-1,1,-1,-1,-1,-1,-1,-1


In [11]:
unclassified = (
    (results_data[results_data == 0.0].count(axis=0) / len(results_data.index))
    .sort_values(ascending=False)
    .to_frame(name="unclassified")
)


In [12]:
unclassified.style.pipe(
    set_tex_style,
    caption=(f"{KEY}-unclassified-long", "{key}-unclassified-short"),
    label=f"{KEY.lower()}-unclassfied",
    bold_axis=0,
)
unclassified


Unnamed: 0,Unnamed: 1,unclassified
classical,tick(ex),0.124762
classical,quote(ex),0.060195
classical,quote(best)->quote(ex),0.037738
classical,emo(ex),0.023059
classical,clnv(ex),0.019444
classical,lr(ex),0.001618
classical,trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all),4.3e-05
fttransformer,fttransformer(classical),0.0
fttransformer,fttransformer(classical-size),0.0
fttransformer,fttransformer(ml),0.0


## Fill in unclassified

Unclassified are `0`.

In [13]:
rng = np.random.default_rng(42)

# replace 0 -> nan -> [-1,1]
results_data.replace(0, np.nan, inplace=True)
filler = pd.DataFrame(
    rng.choice(a=[-1, 1], size=results_data.shape),
    index=results_data.index,
    columns=results_data.columns,
)
results_data.fillna(filler, inplace=True)


### Robustness Checks

In [14]:
# prepare columns for printing
X_print["ttm"] = (
    X_print["EXPIRATION"].dt.to_period("M")
    - X_print["QUOTE_DATETIME"].dt.to_period("M")
).apply(lambda x: x.n)

X_print["year"] = X_print["QUOTE_DATETIME"].dt.year

bins_tradesize = [-1, 1, 3, 5, 11, np.inf]
trade_size_labels = ["(0,1]", "(1,3]", "(3,5]", "(5,11]", ">11"]
X_print["TRADE_SIZE_binned"] = pd.cut(
    X_print["TRADE_SIZE"], bins_tradesize, labels=trade_size_labels
)

# p. 38
bins_years = [2004, 2007, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
year_labels = [
    "2005-2007",
    "2008-2010",
    "2011",
    "2012",
    "2013",
    "2014",
    "2015",
    "2016",
    "2017",
]
X_print["year_binned"] = pd.cut(X_print["year"], bins_years, labels=year_labels)

# p. 37
bins_ttm = [-1, 1, 2, 3, 6, 12, np.inf]
ttm_labels = [
    "<= 1",
    "(1-2]",
    "(2-3]",
    "(3-6]",
    "(6-12]",
    "> 12",
]
X_print["ttm_binned"] = pd.cut(X_print["ttm"], bins_ttm, labels=ttm_labels)

# Security type
# see 3.0a-mb-explanatory-data-analysis.ipynb
X_print["issue_type"] = X_print["issue_type"].map(
    {
        "0": "Stock option",
        "A": "Index option",
        "7": "Others",
        "F": "Others",
        "%": "Others",
        " ": "Others",
    }
)

# Moneyness p. 38
bins_myn = [-1, 0.7, 0.9, 1.1, 1.3, np.inf]
myn_labels = [
    "<= 0.7",
    "(0.7-0.9]",
    "(0.9-1.1]",
    "(1.1-1.3]",
    "> 1.3",
]
X_print["myn_binned"] = pd.cut(X_print["myn"], bins_myn, labels=myn_labels)

# mid p. 31 + extra category for unknowns
ask = X_print["ask_ex"]
bid = X_print["bid_ex"]
trade_price = X_print["TRADE_PRICE"]

# require ask >= bid
mid = np.where(ask >= bid, (ask + bid) * 0.5, np.nan)

prox_quotes = np.where(
    trade_price == mid,
    0,  # at mid
    np.where(
        (bid < trade_price) & (trade_price < ask),
        1,  # inside
        np.where(
            (trade_price == bid) | (ask == trade_price),
            2,  # at quotes
            np.where((trade_price < bid) | (ask < trade_price), 3, 4),
        ),
    ),
)  # outside + unclassifiable

bins_prox = [-np.inf, 0, 1, 2, 3, 4]
prox_labels = [
    "at mid",
    "inside",
    "at quotes",
    "outside",
    "unknown",
]

X_print["prox_q_binned"] = pd.cut(prox_quotes, bins_prox, labels=prox_labels)
X_print["mid"] = mid

# clean up empty buckets, as it causes empty grouping in result set generation
X_print["year_binned"] = X_print["year_binned"].cat.remove_unused_categories()
X_print["myn_binned"] = X_print["myn_binned"].cat.remove_unused_categories()
X_print["ttm_binned"] = X_print["ttm_binned"].cat.remove_unused_categories()
X_print["prox_q_binned"] = X_print["prox_q_binned"].cat.remove_unused_categories()

X_print["all"] = "all"

X_print.drop(
    columns=[
        "EXPIRATION",
        "QUOTE_DATETIME",
        "TRADE_SIZE",
        "ttm",
        "myn",
        "year",
    ],
    inplace=True,
)


In [15]:
X_print = pd.concat([X_print, results_data], axis=1)


In [16]:
X_print.head().T


index,39342191,39342190,39342189,39342188,39342187
buy_sell,-1,-1,1,-1,-1
TRADE_PRICE,3.5,6.38,0.13,0.04,0.4
ask_ex,3.85,6.8,0.13,0.1,0.55
bid_ex,3.5,6.35,0.07,0.04,0.31
OPTION_TYPE,P,C,P,C,C
issue_type,Stock option,Stock option,Others,Others,Others
TRADE_SIZE_binned,"(3,5]","(0,1]","(0,1]","(1,3]","(3,5]"
year_binned,2015,2015,2015,2015,2015
ttm_binned,<= 1,<= 1,<= 1,<= 1,<= 1
myn_binned,(0.9-1.1],(0.9-1.1],(0.7-0.9],(0.7-0.9],(0.9-1.1]


## Accuracy Calculation

In [17]:
# FIXME: Find better approach
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

result_dfs = []

for criterion in tqdm(criterions):
    results = []
    for classifier in tqdm(classifiers):
        res = (
            X_print.groupby([criterion])[["buy_sell", classifier]]
            .apply(
                lambda x: accuracy_score(x["buy_sell"].astype("int8"), x[classifier])
            )
            .mul(100)
            .rename(classifier)
        )
        #         acc_tot = accuracy_score(
        #             X_print["buy_sell"].astype("int8"), X_print[classifier]
        #         )

        #         res.loc["all"] = acc_tot * 100

        res.index.name = LUT_INDEX.get(criterion)
        results.append(res)

    # save aggregated results
    result_df = pd.concat(results, axis=1).T
    result_df.style.pipe(
        set_tex_style,
        caption=(f"long-tbd", "short-tbd"),
        label=f"{KEY.lower()}-{criterion.lower()}",
    )

    # store all result sets for later use
    result_dfs.append(result_df)


  0%|          | 0/8 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:01<00:13,  1.50s/it][A
 20%|██        | 2/10 [00:02<00:10,  1.29s/it][A
 30%|███       | 3/10 [00:03<00:08,  1.22s/it][A
 40%|████      | 4/10 [00:05<00:07,  1.30s/it][A
 50%|█████     | 5/10 [00:06<00:06,  1.35s/it][A
 60%|██████    | 6/10 [00:08<00:05,  1.38s/it][A
 70%|███████   | 7/10 [00:09<00:04,  1.40s/it][A
 80%|████████  | 8/10 [00:10<00:02,  1.41s/it][A
 90%|█████████ | 9/10 [00:12<00:01,  1.43s/it][A
100%|██████████| 10/10 [00:13<00:00,  1.39s/it][A
 12%|█▎        | 1/8 [00:13<01:37, 13.87s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:01<00:13,  1.51s/it][A
 20%|██        | 2/10 [00:03<00:12,  1.51s/it][A
 30%|███       | 3/10 [00:04<00:10,  1.51s/it][A
 40%|████      | 4/10 [00:06<00:09,  1.62s/it][A
 50%|█████     | 5/10 [00:08<00:08,  1.70s/it][A
 60%|██████    | 6/10 [00:09<00:06,  1.74s/it][A
 70%|███████   | 7/10 [00:11<00

In [18]:
master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T


In [19]:
master


Unnamed: 0_level_0,Unnamed: 1_level_0,fttransformer,fttransformer,fttransformer,classical,classical,classical,classical,classical,classical,classical
Unnamed: 0_level_1,Unnamed: 1_level_1,fttransformer(classical),fttransformer(classical-size),fttransformer(ml),tick(ex),quote(ex),lr(ex),emo(ex),clnv(ex),quote(best)->quote(ex),trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)
Option Type,C,61.552726,70.755864,72.199883,50.348015,56.338657,56.41652,53.500982,54.338435,58.881009,66.995
Option Type,P,62.942856,71.67292,72.934734,50.101214,57.74841,57.79798,54.118193,55.227019,60.772429,67.442413
Security Type,Index option,53.741246,56.59739,57.421532,50.893608,53.728014,53.788502,51.280161,51.725311,57.797688,58.524483
Security Type,Others,67.140345,75.306743,76.250359,49.868438,62.402034,62.49408,57.510196,59.066922,65.242783,70.13741
Security Type,Stock option,60.345998,69.743643,71.277467,50.369423,54.870101,54.923804,52.328385,53.061654,57.588565,66.150581
Trade Size,"(0,1]",60.574816,71.734785,72.884153,50.291338,55.288035,55.41833,51.93023,52.957108,58.219535,68.780466
Trade Size,"(1,3]",61.065922,72.174763,73.517439,50.207361,55.312322,55.398067,51.948962,52.864432,58.172238,68.845596
Trade Size,"(3,5]",61.223988,71.912851,73.24191,49.630289,55.723949,55.768923,52.821732,53.604825,58.488028,68.958994
Trade Size,"(5,11]",64.294364,70.025028,71.693434,50.145873,59.893257,59.899878,57.168629,57.977015,62.2928,63.295412
Trade Size,>11,65.643297,69.348202,70.844035,50.757787,60.71469,60.695406,57.231189,58.461494,63.427558,64.547913


In [20]:
master.style.pipe(
    set_tex_style,
    caption=("master-long", "master-short"),
    label=f"{KEY}-master",
    bold_axis=0,
)


## Effective Spread 💴

In [21]:
X_print.head()

Unnamed: 0_level_0,buy_sell,TRADE_PRICE,ask_ex,bid_ex,OPTION_TYPE,issue_type,TRADE_SIZE_binned,year_binned,ttm_binned,myn_binned,...,"(gbm, gbm(classical))","(gbm, gbm(classical-size))","(gbm, gbm(ml))","(classical, tick(ex))","(classical, quote(ex))","(classical, lr(ex))","(classical, emo(ex))","(classical, clnv(ex))","(classical, quote(best)->quote(ex))","(classical, trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all))"
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
39342191,-1,3.5,3.85,3.5,P,Stock option,"(3,5]",2015,<= 1,(0.9-1.1],...,-1,-1,-1,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
39342190,-1,6.38,6.8,6.35,C,Stock option,"(0,1]",2015,<= 1,(0.9-1.1],...,-1,-1,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
39342189,1,0.13,0.13,0.07,P,Others,"(0,1]",2015,<= 1,(0.7-0.9],...,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0
39342188,-1,0.04,0.1,0.04,C,Others,"(1,3]",2015,<= 1,(0.7-0.9],...,-1,-1,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
39342187,-1,0.4,0.55,0.31,C,Others,"(3,5]",2015,<= 1,(0.9-1.1],...,-1,-1,-1,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [22]:
results = []

ask = X_print["ask_ex"]
bid = X_print["bid_ex"]
mid = X_print["mid"]

# calculate true rel effective spread but not aggregated, convert to %
es_true = effective_spread(X_print["buy_sell"], X_print["TRADE_PRICE"], mid, mode="none")
eps_true = np.empty(es_true.shape)
np.divide(es_true, mid, out=eps_true, where=mid != 0)

nom_true = effective_spread(X_print["buy_sell"], X_print["TRADE_PRICE"], mid, mode="nominal")
rel_true = effective_spread(X_print["buy_sell"], X_print["TRADE_PRICE"], mid, mode="relative")

# require ask > bid
rel_quoted = np.nanmean((ask - bid) / mid)
nom_quoted = np.nanmean(np.where(ask >= bid, (ask - bid), np.nan))

for classifier in tqdm(classifiers):
    nom_pred = effective_spread(X_print[classifier], X_print["TRADE_PRICE"], mid, mode="nominal")
    rel_pred = effective_spread(X_print[classifier], X_print["TRADE_PRICE"], mid, mode="relative")

    # calculate pred rel effective spread but not aggregated convert to %
    es_pred = effective_spread(X_print[classifier], X_print["TRADE_PRICE"], mid, mode="none")
    eps_pred = np.empty(es_pred.shape)
    np.divide(es_pred, mid, out=eps_pred, where=mid != 0)

    wilcoxon_res  = wilcoxon(eps_pred, eps_true, nan_policy="omit", zero_method="zsplit")

    res = pd.Series(
            {
                "nom_pred": nom_pred * 100,
                "rel_pred": rel_pred * 100,
                "statistic":wilcoxon_res.statistic,
                "pvalue":wilcoxon_res.pvalue,
            }, name=classifier
        )
    results.append(res)

true_eff = pd.Series({"nom_pred":nom_true * 100, "rel_pred": rel_true * 100, "statistic":np.NaN, "pvalue":np.NaN}, name="true_eff")
true_quoted = pd.Series({"nom_pred":nom_quoted * 100, "rel_pred": rel_quoted * 100, "statistic":np.NaN, "pvalue":np.NaN}, name="true_quoted")

results.append(true_eff)
results.append(true_quoted)

results = pd.concat(results, axis=1)

  0%|          | 0/10 [00:00<?, ?it/s]

In [23]:
results.T.style.format("{:.3f}")


Unnamed: 0,nom_pred,rel_pred,statistic,pvalue
"('gbm', 'gbm(classical)')",7.414,9.186,15449198918565.0,0.0
"('gbm', 'gbm(classical-size)')",4.247,6.983,18996716578607.5,0.0
"('gbm', 'gbm(ml)')",3.741,6.513,19613712891116.5,0.0
"('classical', 'tick(ex)')",1.557,1.073,22593375152463.5,0.0
"('classical', 'quote(ex)')",16.333,16.207,9330692653233.0,0.0
"('classical', 'lr(ex)')",16.333,16.207,9330692653233.0,0.0
"('classical', 'emo(ex)')",4.647,8.443,21566549392574.5,0.0
"('classical', 'clnv(ex)')",11.626,13.284,18544330865464.0,0.0
"('classical', 'quote(best)->quote(ex)')",6.271,9.419,15830405879950.5,0.0
"('classical', 'trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)')",1.415,4.293,20692020255975.0,0.0


In [24]:
results.style.to_latex(
    f"../reports/Content/{KEY}-eff-spread.tex",
    siunitx=True,
    position_float="centering",
    hrules=True,
    clines="skip-last;data",
    label=f"tab:eff-{KEY}",
    caption=(f"long-eff-{KEY}", f"short-eff-{KEY}"),
    convert_css=True,
)


## Diffs 🔄️

In [21]:
# classical baselines

base = master[
    [
        ("classical", "quote(best)->quote(ex)"),
        (
            "classical",
            "trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)",
        ),
        (
            "classical",
            "trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)",
        ),
    ]
]

# my ml models
revised = master[
    [(MODELS[0], f"{MODELS[0]}(classical)"), (MODELS[0], f"{MODELS[0]}(classical-size)"), (MODELS[0], f"{MODELS[0]}(ml)")]
]


In [22]:
def combine_results(revised: pd.DataFrame, base: pd.DataFrame) -> pd.DataFrame:
    """
    Generate print layout like in Grauer et al.

    https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290

    # see p. https://texdoc.org/serve/siunitx/0
    """
    # first, second layer of colum index
    c_1 = revised.columns.get_level_values(1)
    c_2 = ["nom"]
    midx = pd.MultiIndex.from_product([c_1, c_2])

    # copy data from revised add as (column, "nom")
    combo = pd.DataFrame(revised.values, index=revised.index, columns=midx)

    for i, mul_col in enumerate(combo.columns):

        combo[(mul_col[0], "pm")] = (combo[mul_col] - base.iloc[:, i]).round(2)
        combo.sort_index(axis=1, inplace=True)

    return combo


In [23]:
diff = combine_results(revised, base)

diff.style.to_latex(
    f"../reports/Content/diff-{KEY}.tex",
    siunitx=True,
    position_float="centering",
    hrules=True,
    clines="skip-last;data",
    label=f"tab:diff-{KEY}",
    caption=(f"long-diff-{KEY}", f"short-diff-{KEY}"),
    convert_css=True,
)


In [24]:
diff


Unnamed: 0_level_0,Unnamed: 1_level_0,fttransformer(classical),fttransformer(classical),fttransformer(classical-size),fttransformer(classical-size),fttransformer(ml),fttransformer(ml)
Unnamed: 0_level_1,Unnamed: 1_level_1,nom,pm,nom,pm,nom,pm
Option Type,C,61.552726,2.67,70.755864,3.76,72.199883,5.2
Option Type,P,62.942856,2.17,71.67292,4.23,72.934734,5.49
Security Type,Index option,53.741246,-4.06,56.59739,-1.93,57.421532,-1.1
Security Type,Others,67.140345,1.9,75.306743,5.17,76.250359,6.11
Security Type,Stock option,60.345998,2.76,69.743643,3.59,71.277467,5.13
Trade Size,"(0,1]",60.574816,2.36,71.734785,2.95,72.884153,4.1
Trade Size,"(1,3]",61.065922,2.89,72.174763,3.33,73.517439,4.67
Trade Size,"(3,5]",61.223988,2.74,71.912851,2.95,73.24191,4.28
Trade Size,"(5,11]",64.294364,2.0,70.025028,6.73,71.693434,8.4
Trade Size,>11,65.643297,2.22,69.348202,4.8,70.844035,6.3
