In [69]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

sys.path.append("..")
from otc.metrics.metrics import effective_spread

import wandb

from tqdm.auto import tqdm

from scipy.stats import wilcoxon
import warnings


In [2]:
# set here globally
exchange = "cboe"
models = ["gbm", "classical"]
subset = "test"  # "all"
strategy = "transfer"  # "supervised"


In [3]:
key = f"{exchange}_all_{strategy}_{subset}"
dataset = f"fbv/thesis/{exchange}_{strategy}_raw:latest"

os.environ["GCLOUD_PROJECT"] = "flowing-mantis-239216"

run = wandb.init(project="thesis", entity="fbv")

# load unscaled data
artifact = run.use_artifact(dataset)  # type: ignore
data_dir = artifact.download()

# load results
result_dirs = []
for model in models:
    results = f"fbv/thesis/{exchange}_{model}_{strategy}_{subset}:latest"
    artifact = run.use_artifact(results)  # type: ignore
    result_dir = artifact.download()
    result_dirs.append(result_dir)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkarelze[0m ([33mfbv[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact cboe_transfer_raw:latest, 663.29MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.0
[34m[1mwandb[0m: Downloading large artifact cboe_gbm_transfer_test:latest, 67.57MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.0
[34m[1mwandb[0m: Downloading large artifact cboe_classical_transfer_test:latest, 365.80MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:8.2


In [30]:
# p. 35-38
columns = [
    "buy_sell",
    "EXPIRATION",
    "QUOTE_DATETIME",
    "TRADE_SIZE",
    "TRADE_PRICE",
    "ask_ex",
    "bid_ex",
    "myn",
    "OPTION_TYPE",
    "issue_type",
]


if subset == "all":
    train = pd.read_parquet(
        Path(data_dir, "train_set"), engine="fastparquet", columns=columns
    )
    val = pd.read_parquet(
        Path(data_dir, "val_set"), engine="fastparquet", columns=columns
    )
    test = pd.read_parquet(
        Path(data_dir, "test_set"), engine="fastparquet", columns=columns
    )
    eval_data = pd.concat([train, val, test])
    del train, val, test

elif subset == "test":
    eval_data = pd.read_parquet(
        Path(data_dir, "test_set"), engine="fastparquet", columns=columns
    )


results = []
for i, model in tqdm(enumerate(models)):
    result = pd.read_parquet(Path(result_dirs[i], "results"), engine="fastparquet")
    result.columns = pd.MultiIndex.from_product([[model], result.columns])
    results.append(result)

results_data = pd.concat(results, axis=1, names=models)

assert len(eval_data) == len(results_data)

X_print = eval_data

del results


2it [00:02,  1.28s/it]


In [31]:
results_data


Unnamed: 0_level_0,gbm,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical
Unnamed: 0_level_1,gbm(classical-size),tick(all),tick(ex),quote(best),quote(ex),lr(ex),lr(best),rev_lr(ex),rev_lr(best),emo(ex),...,rev_emo(best),clnv(ex),clnv(best),rev_clnv(ex),rev_clnv(best),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),quote(best)->quote(ex),trade_size(ex)->quote(best)->depth(best)->quote(ex)->depth(ex)->rev_tick(ex)
index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
24700819,-1,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,...,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0
24700261,-1,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
24700842,-1,1.0,1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,1.0,...,-1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0
24695039,1,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,-1.0,...,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0
24700841,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37155409,1,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
37155410,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0
37104659,-1,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0
37108365,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0


### Robustness Checks

In [32]:
# prepare columns for printing
X_print["ttm"] = (
    X_print["EXPIRATION"].dt.to_period("M")
    - X_print["QUOTE_DATETIME"].dt.to_period("M")
).apply(lambda x: x.n)

X_print["year"] = X_print["QUOTE_DATETIME"].dt.year

bins_tradesize = [-1, 1, 3, 5, 11, np.inf]
trade_size_labels = ["(0,1]", "(1,3]", "(3,5]", "(5,11]", ">11"]
X_print["TRADE_SIZE_binned"] = pd.cut(
    X_print["TRADE_SIZE"], bins_tradesize, labels=trade_size_labels
)

# p. 38
bins_years = [2004, 2007, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
year_labels = [
    "2005-2007",
    "2008-2010",
    "2011",
    "2012",
    "2013",
    "2014",
    "2015",
    "2016",
    "2017",
]
X_print["year_binned"] = pd.cut(X_print["year"], bins_years, labels=year_labels)

# p. 37
bins_ttm = [-1, 1, 2, 3, 6, 12, np.inf]
ttm_labels = [
    "ttm <= 1 month",
    "ttm (1-2] month",
    "ttm (2-3] month",
    "ttm (3-6] month",
    "ttm (6-12] month",
    "ttm > 12 month",
]
X_print["ttm_binned"] = pd.cut(X_print["ttm"], bins_ttm, labels=ttm_labels)

# Security type
# see 3.0a-mb-explanatory-data-analysis.ipynb
X_print["issue_type"] = X_print["issue_type"].map(
    {
        "0": "Stock options",
        "A": "Index option",
        "7": "Others",
        "F": "Others",
        "%": "Others",
        " ": "Others",
    }
)

# Moneyness p. 38
bins_myn = [-1, 0.7, 0.9, 1.1, 1.3, np.inf]
myn_labels = [
    "myn <= 0.7",
    "myn (0.7-0.9]",
    "myn (0.9-1.1]",
    "myn (1.1-1.3]",
    "myn > 1.3",
]
X_print["myn_binned"] = pd.cut(X_print["myn"], bins_myn, labels=myn_labels)

# mid
mid_ex = 0.5 * (X_print["ask_ex"] + X_print["bid_ex"])
X_print["mid"] = mid_ex

# p. 31
def map_quotes(x):
    if x["TRADE_PRICE"] == x["bid_ex"] or x["TRADE_PRICE"] == x["ask_ex"]:
        return "at quote"
    elif x["TRADE_PRICE"] < x["bid_ex"] or x["TRADE_PRICE"] > x["ask_ex"]:
        return "outside"
    elif x["TRADE_PRICE"] == x["mid"]:
        return "at mid"
    # TODO: See my mail to C Grauer. How to handle unknowns?
    elif (
        x["bid_ex"] < x["TRADE_PRICE"] < x["mid"]
        or x["mid"] < x["TRADE_PRICE"] < x["ask_ex"]
    ):
        return "inside"
    return "other"


X_print["prox_q_binned"] = X_print[["TRADE_PRICE", "mid", "bid_ex", "ask_ex"]].apply(
    map_quotes, axis=1
)

# clean up empty buckets, as it causes empty grouping in result set generatio
X_print["year_binned"] = X_print["year_binned"].cat.remove_unused_categories()
X_print["myn_binned"] = X_print["myn_binned"].cat.remove_unused_categories()
X_print["ttm_binned"] = X_print["ttm_binned"].cat.remove_unused_categories()


X_print.drop(
    columns=[
        "EXPIRATION",
        "QUOTE_DATETIME",
        "TRADE_SIZE",
        "ttm",
        "myn",
        "ask_ex",
        "bid_ex",
        "year",
    ],
    inplace=True,
)


In [33]:
X_print.head(20)


Unnamed: 0_level_0,buy_sell,TRADE_PRICE,OPTION_TYPE,issue_type,TRADE_SIZE_binned,year_binned,ttm_binned,myn_binned,mid,prox_q_binned
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
24700819,-1,1.73,C,Others,"(0,1]",2015,ttm (1-2] month,myn (0.9-1.1],,other
24700261,-1,0.6,C,Stock options,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],0.8,at quote
24700842,-1,1.3,C,Others,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],,other
24695039,1,1.28,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],,other
24700841,-1,1.04,C,Others,>11,2015,ttm <= 1 month,myn (0.9-1.1],1.0,at quote
182536,-1,3.3,P,Others,"(0,1]",2015,ttm (3-6] month,myn (0.7-0.9],,other
24700164,1,0.25,C,Stock options,>11,2015,ttm <= 1 month,myn (0.9-1.1],,other
24323781,1,64.239998,P,Index option,"(0,1]",2015,ttm (3-6] month,myn (0.9-1.1],,other
24700851,1,8.95,C,Stock options,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],8.5,inside
24698285,1,0.45,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],0.47,at quote


In [34]:
X_print = pd.concat([X_print, results_data], axis=1)


In [35]:
X_print.head()


Unnamed: 0_level_0,buy_sell,TRADE_PRICE,OPTION_TYPE,issue_type,TRADE_SIZE_binned,year_binned,ttm_binned,myn_binned,mid,prox_q_binned,...,"(classical, rev_emo(best))","(classical, clnv(ex))","(classical, clnv(best))","(classical, rev_clnv(ex))","(classical, rev_clnv(best))","(classical, trade_size(ex)->tick(all))","(classical, trade_size(ex)->quote(best))","(classical, trade_size(ex)->quote(best)->quote(ex))","(classical, quote(best)->quote(ex))","(classical, trade_size(ex)->quote(best)->depth(best)->quote(ex)->depth(ex)->rev_tick(ex))"
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
24700819,-1,1.73,C,Others,"(0,1]",2015,ttm (1-2] month,myn (0.9-1.1],,other,...,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0
24700261,-1,0.6,C,Stock options,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],0.8,at quote,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
24700842,-1,1.3,C,Others,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],,other,...,-1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0
24695039,1,1.28,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],,other,...,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0
24700841,-1,1.04,C,Others,>11,2015,ttm <= 1 month,myn (0.9-1.1],1.0,at quote,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Results Set Generation

In [36]:
LUT = {
    "(ex)": " (ex)",
    "(best)": " (best)",
    "rev_": "Rev. ",
    "tick": "Tick",
    "quote": "Quote",
    "trade_size": "Trade Size",
    "depth": "Depth",
    "->": " $\\to$ ",
    "lr": "\gls{LR}",
    "emo": "\gls{EMO}",
    "clnv": "\gls{CLNV}",
    "OPTION_TYPE": "Option Type",
    "(": "$(",  # put interval start in math env
    "]": "]$",  # put interval end in math env
    "_": "$\_",
}

LUT_INDEX = {
    "OPTION_TYPE": "Option Type",
    "issue_type": "Security Type",
    "TRADE_SIZE_binned": "Trade Size",
    "year_binned": "Year",
    "ttm_binned": "Time to Maturity",
    "myn_binned": "Moneyness",
    "prox_q_binned": "Location to Quote",
}


def cell_str(x):
    for orig, sub in LUT.items():
        x = x.replace(orig, sub)
    return x


def highlight_max(s, props=""):
    return np.where(s == np.nanmax(s.values), props, "")


In [37]:
def set_tex_style(styler, caption, label):
    res = (
        styler.set_caption(caption)
        # .hide(axis="index")
        .apply(
            highlight_max, props="font-weight:bold;", axis=0
        )  # optionally set subset....
        .format_index(cell_str, axis=0)
        .format_index(cell_str, axis=1)
        .format(precision=4, decimal=".", thousands=",", escape=False, hyperlinks=None)
        .to_latex(
            f"{label}.tex",
            siunitx=True,
            position_float="centering",
            hrules=True,
            clines="skip-last;data",
            label="tab:" + label,
            caption=caption,
            convert_css=True,
        )
    )
    return res


In [38]:
classifiers = results_data.columns.tolist()
criterions = list(LUT_INDEX)


In [48]:
results_data.columns.tolist()


[('gbm', 'gbm(classical-size)'),
 ('classical', 'tick(all)'),
 ('classical', 'tick(ex)'),
 ('classical', 'quote(best)'),
 ('classical', 'quote(ex)'),
 ('classical', 'lr(ex)'),
 ('classical', 'lr(best)'),
 ('classical', 'rev_lr(ex)'),
 ('classical', 'rev_lr(best)'),
 ('classical', 'emo(ex)'),
 ('classical', 'emo(best)'),
 ('classical', 'rev_emo(ex)'),
 ('classical', 'rev_emo(best)'),
 ('classical', 'clnv(ex)'),
 ('classical', 'clnv(best)'),
 ('classical', 'rev_clnv(ex)'),
 ('classical', 'rev_clnv(best)'),
 ('classical', 'trade_size(ex)->tick(all)'),
 ('classical', 'trade_size(ex)->quote(best)'),
 ('classical', 'trade_size(ex)->quote(best)->quote(ex)'),
 ('classical', 'quote(best)->quote(ex)'),
 ('classical',
  'trade_size(ex)->quote(best)->depth(best)->quote(ex)->depth(ex)->rev_tick(ex)')]

## Accuracy Calculation

In [55]:
X_print.columns


Index([                                                                                   'buy_sell',
                                                                                       'TRADE_PRICE',
                                                                                       'OPTION_TYPE',
                                                                                        'issue_type',
                                                                                 'TRADE_SIZE_binned',
                                                                                       'year_binned',
                                                                                        'ttm_binned',
                                                                                        'myn_binned',
                                                                                               'mid',
                                                                                  

In [70]:
# FIXME: Find better approach
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

result_dfs = []

for criterion in tqdm(criterions):
    results = []
    for classifier in tqdm(classifiers):
        acc_tot = accuracy_score(
            X_print["buy_sell"].astype("int8"), X_print[classifier]
        )

        res = (
            X_print.groupby([criterion])[["buy_sell", classifier]]
            .apply(
                lambda x: accuracy_score(x["buy_sell"].astype("int8"), x[classifier])
            )
            .mul(100)
            .rename(classifier)
        )
        res.loc["all"] = acc_tot * 100

        res.index.name = LUT_INDEX.get(criterion)
        results.append(res)

    # save aggregated results
    result_df = pd.concat(results, axis=1).T
    result_df.style.pipe(
        set_tex_style,
        caption=(f"long-tbd", "short-tbd"),
        label=f"{key.lower()}-{criterion.lower()}",
    )

    # store all result sets for later use
    result_dfs.append(result_df)


  0%|          | 0/7 [00:00<?, ?it/s]
  0%|          | 0/22 [00:00<?, ?it/s][A
  5%|▍         | 1/22 [00:02<00:53,  2.53s/it][A
  9%|▉         | 2/22 [00:05<01:01,  3.06s/it][A
 14%|█▎        | 3/22 [00:09<01:01,  3.23s/it][A
 18%|█▊        | 4/22 [00:12<00:59,  3.32s/it][A
 23%|██▎       | 5/22 [00:16<00:57,  3.36s/it][A
 27%|██▋       | 6/22 [00:19<00:54,  3.38s/it][A
 32%|███▏      | 7/22 [00:23<00:51,  3.41s/it][A
 36%|███▋      | 8/22 [00:26<00:47,  3.42s/it][A
 41%|████      | 9/22 [00:30<00:44,  3.42s/it][A
 45%|████▌     | 10/22 [00:33<00:41,  3.43s/it][A
 50%|█████     | 11/22 [00:36<00:37,  3.43s/it][A
 55%|█████▍    | 12/22 [00:40<00:34,  3.44s/it][A
 59%|█████▉    | 13/22 [00:43<00:30,  3.44s/it][A
 64%|██████▎   | 14/22 [00:47<00:27,  3.43s/it][A
 68%|██████▊   | 15/22 [00:50<00:24,  3.44s/it][A
 73%|███████▎  | 16/22 [00:54<00:20,  3.44s/it][A
 77%|███████▋  | 17/22 [00:57<00:17,  3.44s/it][A
 82%|████████▏ | 18/22 [01:00<00:13,  3.43s/it][A
 86%|██████

In [71]:
master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T


In [72]:
master


Unnamed: 0_level_0,Unnamed: 1_level_0,gbm,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical,classical
Unnamed: 0_level_1,Unnamed: 1_level_1,gbm(classical-size),tick(all),tick(ex),quote(best),quote(ex),lr(ex),lr(best),rev_lr(ex),rev_lr(best),emo(ex),...,rev_emo(best),clnv(ex),clnv(best),rev_clnv(ex),rev_clnv(best),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),quote(best)->quote(ex),trade_size(ex)->quote(best)->depth(best)->quote(ex)->depth(ex)->rev_tick(ex)
Option Type,C,71.726305,50.068165,48.639452,59.216378,61.855015,61.529399,58.942468,61.811671,59.181008,48.509719,...,52.577056,53.209126,53.629622,54.7993,55.050791,53.317029,64.006189,64.530429,59.71315,64.873171
Option Type,P,72.263779,50.885849,49.446618,60.486601,62.281154,62.011341,60.267345,62.209134,60.424101,48.803783,...,53.645393,53.296756,54.725847,54.279688,55.818674,54.005699,65.307006,65.93398,61.087352,66.048256
Option Type,all,71.970823,50.440161,49.006663,59.794251,62.048882,61.748653,59.545206,61.992492,59.746539,48.6435,...,53.063084,53.248992,54.128337,54.562908,55.40013,53.630332,64.597981,65.168958,60.338327,65.407762
Security Type,Index option,67.418516,48.293046,48.174157,53.302115,53.865554,53.648287,53.092002,53.84886,53.278027,42.012213,...,43.742599,43.576012,43.524736,44.947943,44.899767,55.215306,65.074845,65.093686,53.322625,65.723069
Security Type,Others,74.140903,50.500341,48.745063,63.366657,66.06476,65.669597,63.073521,65.92937,63.271074,50.246585,...,55.944851,55.151702,56.937947,56.66885,58.265498,52.849903,66.670364,67.526332,64.20409,67.427641
Security Type,Stock options,71.52169,50.628366,49.197934,58.955429,61.193841,60.924802,58.720834,61.166324,58.925248,48.636117,...,52.791816,53.419253,54.014448,54.64301,55.252876,53.797034,63.690007,64.197477,59.429686,64.537641
Security Type,all,71.970823,50.440161,49.006663,59.794251,62.048882,61.748653,59.545206,61.992492,59.746539,48.6435,...,53.063084,53.248992,54.128337,54.562908,55.40013,53.630332,64.597981,65.168958,60.338327,65.407762
Trade Size,"(0,1]",70.805402,49.300227,48.80638,56.12913,58.840355,58.62647,55.945127,58.81492,56.112292,46.875427,...,50.347241,50.730336,50.843193,52.383491,52.281691,53.681652,62.283829,62.794485,56.617879,62.976265
Trade Size,"(1,3]",71.384306,49.681809,48.943375,58.705402,62.018777,61.732442,58.466148,61.975068,58.674435,48.552952,...,52.041166,52.889873,52.810937,54.463642,54.203829,52.647729,63.065882,63.655184,59.292037,63.859694
Trade Size,"(3,5]",71.967362,50.049784,48.731932,59.453704,62.431819,62.139861,59.151767,62.387721,59.409662,48.989046,...,53.106561,53.488609,53.738755,55.014406,55.223074,53.293432,64.319036,64.920123,60.037675,65.150868


In [74]:
master.style.pipe(
    set_tex_style, caption=("master-short", "master-long"), label=f"{key}-master"
)


## Effective Spread 🚧

In [None]:
classifiers


In [None]:
eff_dfs = []


def stats(x, classifier):

    nom = effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="nominal")
    rel = (
        effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="relative")
        * 100
    )

    # eff_spread_pred = effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="none")
    # eff_spread_true = effective_spread(x["buy_sell"], x["TRADE_PRICE"], x["mid"], mode="none")
    # wilcoxon_res  = wilcoxon(eff_spread_pred, eff_spread_true, nan_policy="omit", zero_method="zsplit")

    return pd.Series(
        {
            "nominal": nom,
            "rel": rel,
            # 'statistic':wilcoxon_res.statistic,
            # 'pvalue':wilcoxon_res.pvalue
        }
    )


for criterion in tqdm(criterions):
    results = []

    for classifier in tqdm(classifiers):
        res = X_print.groupby([criterion])[
            ["TRADE_PRICE", "mid", classifier, "buy_sell"]
        ].apply(stats, classifier)
        results.append(res)

    # save aggregated results
    result_df = pd.concat(results, axis=1, keys=classifiers).T
    result_df.style.pipe(
        set_tex_style,
        caption=(f"long-tbd", "short-tbd"),
        label=f"{key.lower()}-{criterion.lower()}-eff-spread",
    )

    # store all result sets for later use
    eff_dfs.append(result_df)


In [None]:
master = pd.concat(eff_dfs, axis=1, keys=LUT_INDEX.values()).T


In [None]:
master


In [None]:
master.iloc[:, 0:20]


In [None]:
master.iloc[:, 21:40]


In [None]:
master.iloc[:, 41:-1]


In [None]:
master.style.pipe(
    set_tex_style,
    caption=("master-short", "master-long"),
    label=f"{key}-master-eff-spread",
)


## Change in Parenthesis 🅾️

```latex
# https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290
\begin{table}
    \centering
    \caption{test of combination with change}
    \label{tab:combo}
    \begin{tabular}{lSSSSSSSS}
        \toprule
        {} & \multicolumn{2}{l}{Index option} & \multicolumn{2}{l}{Others} & \multicolumn{2}{l}{Stock options} & \multicolumn{2}{l}{all} \\
        \midrule
        classical-size & 1.0 & \parl-56.42\parr & 2.0 & \parl-74.35\parr & -73.5 & \parl-143.93\parr & 5.0 & \parl-67.33\parr \\
        \bottomrule
        \end{tabular}
\end{table}
```

In [None]:
foo = pd.DataFrame([[4, 3, 8, 5]])
bar = pd.DataFrame([[1.1, 2, 73, 5]], columns=foo.columns, index=foo.index)


In [None]:
def combine_results(revised: pd.DataFrame, base: pd.DataFrame) -> pd.DataFrame:
    """
    Generate print layout like in Grauer et al.

    https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290

    # see p. https://texdoc.org/serve/siunitx/0
    """
    # first, second layer of colum index
    c_1 = revised.columns
    c_2 = ["nom"]
    midx = pd.MultiIndex.from_product([c_1, c_2])

    # copy data from revised add as (column, "nom")
    combo = pd.DataFrame(revised.values, index=revised.index, columns=midx)

    for i, mul_col in enumerate(combo.columns):

        # define custom brackets that are not parsed by sunitx
        combo[[(mul_col[0], "pm")]] = (
            "\parl" + (combo[mul_col] - base[mul_col[0]]).round(2).astype(str) + "\parr"
        )
        # sort to group together columns
        combo.sort_index(axis=1, inplace=True)
    return combo


In [None]:
combo = combine_results(bar, foo)

# manually replace S with S[table-format=1.4(5)] if needed
combo.style.to_latex(
    f"combo.tex",
    siunitx=True,
    position_float="centering",
    hrules=True,
    clines="skip-last;data",
    label="tab:combo",
    caption="test of combination with change",
    multicol_align="l",
)


In [None]:
combo
