In [1]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

sys.path.append("..")
from otc.metrics.metrics import effective_spread

import wandb

from tqdm.notebook import tqdm

from scipy.stats import wilcoxon

In [2]:
# set here globally
exchange = "ise"
models = ["gbm", "classical"]
subset = "test" # "all"
strategy = "supervised"

key = f"{exchange}_all_{strategy}_{subset}"

In [12]:
os.environ["GCLOUD_PROJECT"] = "flowing-mantis-239216"

run = wandb.init(project="thesis", entity="fbv")

dataset = f"fbv/thesis/{exchange}_{strategy}_raw:latest"

# load unscaled data
artifact = run.use_artifact(dataset) # type: ignore
data_dir = artifact.download()

# load results
result_dirs = []
for model in models:
    results = f"fbv/thesis/{exchange}_{model}_{strategy}_{subset}:latest"
    artifact = run.use_artifact(results) # type: ignore
    result_dir = artifact.download()
    result_dirs.append(result_dir)


[34m[1mwandb[0m: Downloading large artifact ise_supervised_raw:latest, 3391.53MB. 3 files... 
[34m[1mwandb[0m:   3 of 3 files downloaded.  
Done. 0:0:0.2
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m: Downloading large artifact ise_classical_supervised_test:latest, 63.63MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.0


In [13]:
# p. 35-38
columns = [
    "buy_sell",
    "EXPIRATION",
    "QUOTE_DATETIME",
    "TRADE_SIZE",
    "TRADE_PRICE",
    "ask_ex",
    "bid_ex",
    "myn",
    "OPTION_TYPE",
    "issue_type"
]


if subset == "all":
    train = pd.read_parquet(
        Path(data_dir, "train_set_extended_60"), engine="fastparquet", columns=columns
    )
    val = pd.read_parquet(
        Path(data_dir, "val_set_extended_20"), engine="fastparquet", columns=columns
    )
    test = pd.read_parquet(
        Path(data_dir, "test_set_extended_20"), engine="fastparquet", columns=columns
    )
    eval_data = pd.concat([train,val,test])
    del train, val, test
    
elif subset == "test":
    eval_data = pd.read_parquet(
        Path(data_dir, "test_set_extended_20"), engine="fastparquet", columns=columns
    )

results_data = pd.read_parquet(
    Path(results_dir, "results"), engine="fastparquet"
)

results = []
for i, model in enumerate(models):
    result = pd.read_parquet(
    Path(result_dirs[i], "results"), engine="fastparquet"
    )
    results.append(result)

results_data = pd.concat(results, axis=1)

assert len(eval_data) == len(results_data)

X_print = eval_data

del results

### Robustness Checks

In [15]:
X_print.head()

Unnamed: 0,buy_sell,EXPIRATION,QUOTE_DATETIME,TRADE_SIZE,TRADE_PRICE,ask_ex,bid_ex,myn,OPTION_TYPE,issue_type
39342171,-1,2015-12-18,2015-11-06 09:30:00,2,0.52,0.6,0.52,0.921659,P,%
39342172,-1,2015-11-27,2015-11-06 09:30:00,1,7.82,8.15,7.6,1.001696,C,0
39342173,-1,2017-01-20,2015-11-06 09:30:00,1,28.889999,32.049999,28.799999,0.886115,C,0
39342174,1,2015-11-20,2015-11-06 09:30:00,1,2.25,2.25,1.85,1.009261,C,%
39342175,-1,2015-12-18,2015-11-06 09:30:00,1,1.7,1.95,1.7,0.99188,C,%


In [17]:

# prepare columns for printing
X_print["ttm"] = (
    X_print["EXPIRATION"].dt.to_period("M")
    - X_print["QUOTE_DATETIME"].dt.to_period("M")
).apply(lambda x: x.n)

X_print["year"] = X_print["QUOTE_DATETIME"].dt.year

bins_tradesize = [-1, 1, 3, 5, 11, np.inf]
trade_size_labels = ["(0,1]", "(1,3]", "(3,5]", "(5,11]", ">11"]
X_print["TRADE_SIZE_binned"] = pd.cut(
    X_print["TRADE_SIZE"], bins_tradesize, labels=trade_size_labels
)

# p. 38
bins_years = [2004, 2007, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
year_labels = [
    "2005-2007",
    "2008-2010",
    "2011",
    "2012",
    "2013",
    "2014",
    "2015",
    "2016",
    "2017",
]
X_print["year_binned"] = pd.cut(X_print["year"], bins_years, labels=year_labels)

# p. 37
bins_ttm = [-1, 1, 2, 3, 6, 12, np.inf]
ttm_labels = [
    "ttm <= 1 month",
    "ttm (1-2] month",
    "ttm (2-3] month",
    "ttm (3-6] month",
    "ttm (6-12] month",
    "ttm > 12 month",
]
X_print["ttm_binned"] = pd.cut(X_print["ttm"], bins_ttm, labels=ttm_labels)

# Security type
# see 3.0a-mb-explanatory-data-analysis.ipynb
X_print["issue_type"] = X_print["issue_type"].map({'0': 'Stock options', 'A': 'Index option', '7': 'Others', 'F':'Others', '%': 'Others', ' ': 'Others'})

# Moneyness p. 38
bins_myn = [-1, 0.7, 0.9, 1.1, 1.3, np.inf]
myn_labels = [
    "myn <= 0.7",
    "myn (0.7-0.9]",
    "myn (0.9-1.1]",
    "myn (1.1-1.3]",
    "myn > 1.3",
]
X_print["myn_binned"] = pd.cut(X_print["myn"], bins_myn, labels=myn_labels)

# mid
mid_ex = 0.5 * (X_print["ask_ex"] + X_print["bid_ex"])
X_print["mid"] = mid_ex

# p. 31
def map_quotes(x):
    if x["TRADE_PRICE"] == x["bid_ex"] or x["TRADE_PRICE"] == x["ask_ex"]:
        return "at quote"
    elif x["TRADE_PRICE"] < x["bid_ex"] or  x["TRADE_PRICE"] > x["ask_ex"]:
        return "outside"
    elif x["TRADE_PRICE"] == x["mid"]:
        return "at mid"
    return "inside"
    # TODO: How to handle unknowns?
    # elif x["bid_ex"] < x["TRADE_PRICE"] < x["mid"]  or x["mid"] < x["TRADE_PRICE"] < x["ask_ex"]:
    #     return "inside"   
    # return "unknown"
    
X_print["prox_q_binned"] = X_print[["TRADE_PRICE", "mid", "bid_ex", "ask_ex"]].apply(map_quotes, axis=1)

# clean up empty buckets, as it causes empty grouping in result set generatio
X_print["year_binned"] = X_print["year_binned"].cat.remove_unused_categories()
X_print["myn_binned"] = X_print["myn_binned"].cat.remove_unused_categories()
X_print["ttm_binned"] = X_print["ttm_binned"].cat.remove_unused_categories()


X_print.drop(columns=["EXPIRATION","QUOTE_DATETIME", "TRADE_SIZE", "ttm", "myn", "ask_ex", "bid_ex", "year"], inplace=True)


In [None]:
X_print.head(20)

Unnamed: 0,buy_sell,TRADE_PRICE,OPTION_TYPE,issue_type,TRADE_SIZE_binned,year_binned,ttm_binned,myn_binned,mid,prox_q_binned
39342171,-1,0.52,P,Others,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],0.56,at quote
39342172,-1,7.82,C,Stock options,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],7.875,inside
39342173,-1,28.889999,C,Stock options,"(0,1]",2015,ttm > 12 month,myn (0.7-0.9],30.424999,inside
39342174,1,2.25,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],2.05,at quote
39342175,-1,1.7,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],1.825,at quote
39342176,1,0.35,P,Others,"(3,5]",2015,ttm <= 1 month,myn (0.9-1.1],0.28,inside
39342177,1,0.44,P,Others,"(3,5]",2015,ttm <= 1 month,myn (0.9-1.1],0.36,inside
39342178,-1,0.72,P,Others,"(3,5]",2015,ttm <= 1 month,myn (0.9-1.1],0.81,inside
39342179,1,11.3,C,Stock options,"(5,11]",2015,ttm > 12 month,myn > 1.3,,inside
39342180,1,2.19,P,Stock options,"(1,3]",2015,ttm > 12 month,myn (0.7-0.9],1.98,inside


In [None]:
X_print = pd.concat([X_print, results_data], axis=1)

In [None]:
X_print.head()

Unnamed: 0,buy_sell,TRADE_PRICE,OPTION_TYPE,issue_type,TRADE_SIZE_binned,year_binned,ttm_binned,myn_binned,mid,prox_q_binned,...,rev_emo(best),clnv(ex),clnv(best),rev_clnv(ex),rev_clnv(best),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),quote(best)->quote(ex),trade_size(ex)->depth(ex)->quote(best)->rev_lr(ex)
39342171,-1,0.52,P,Others,"(1,3]",2015,ttm <= 1 month,myn (0.9-1.1],0.56,at quote,...,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0
39342172,-1,7.82,C,Stock options,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],7.875,inside,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
39342173,-1,28.889999,C,Stock options,"(0,1]",2015,ttm > 12 month,myn (0.7-0.9],30.424999,inside,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
39342174,1,2.25,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],2.05,at quote,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
39342175,-1,1.7,C,Others,"(0,1]",2015,ttm <= 1 month,myn (0.9-1.1],1.825,at quote,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


## Results Set Generation

In [None]:
LUT = {"(ex)": " (ex)", 
       "(best)": " (best)",
       "rev_": "Rev. ",
       "tick": "Tick",
       "quote": "Quote",
       "trade_size": "Trade Size",
       "depth": "Depth",
       "->": " $\\to$ ",
       "lr": "\gls{LR}",
       "emo": "\gls{EMO}", 
       "clnv": "\gls{CLNV}",
      "OPTION_TYPE": "Option Type",
      "(":"$(", # put interval start in math env
      "]":"]$", # put interval end in math env
      "_":"$\_"
      }

LUT_INDEX = {"OPTION_TYPE": "Option Type", 
             "issue_type": "Security Type", 
             "TRADE_SIZE_binned": "Trade Size", 
             "year_binned": "Year", 
             "ttm_binned": "Time to Maturity", 
             "myn_binned": "Moneyness",
             "prox_q_binned": "Location to Quote"
}



def cell_str(x):
    for orig, sub in LUT.items():
        x = x.replace(orig,sub)
    return x


def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')


In [None]:
def set_tex_style(styler, caption, label):
    res = (
    styler.set_caption(caption)
    # .hide(axis="index")
    .apply(highlight_max, props="font-weight:bold;", axis=0) # optionally set subset....    
    .format_index(cell_str, axis = 0)
    .format_index(cell_str, axis = 1)
    .format(precision=4, decimal='.', thousands=",", escape=False, hyperlinks=None)
    .to_latex(f"{label}.tex", siunitx=True, position_float="centering", hrules=True, clines="skip-last;data",
                      label="tab:"+label, caption=caption, convert_css=True)
             )
    return res

In [None]:
classifiers = results_data.columns.tolist()
criterions = list(LUT_INDEX)

## Accurcay Calculation

In [29]:
result_dfs = []

for criterion in criterions:
    results = []
    for classifier in classifiers:
        acc_tot = accuracy_score(X_print["buy_sell"], X_print[classifier])
        res = (
            X_print.groupby([criterion])[["buy_sell", classifier]]
            .apply(lambda x: accuracy_score(x["buy_sell"], x[classifier]))
            .mul(100)
            .rename(classifier)
            )
        res.loc["all"] = acc_tot * 100
        
        res.index.name = LUT_INDEX.get(criterion)
        results.append(res)

    # save aggregated results
    result_df = pd.concat(results, axis=1).T
    result_df.style.pipe(set_tex_style, caption=(f"long-tbd", "short-tbd"), label=f"{key.lower()}-{criterion.lower()}")

    # store all result sets for later use
    result_dfs.append(result_df)

Exception ignored in: <function tqdm.__del__ at 0x1551096df3a0>
Traceback (most recent call last):
  File "/pfs/data5/home/kit/stud/uloak/thesis/thesis/lib/python3.8/site-packages/tqdm/std.py", line 1145, in __del__
    self.close()
  File "/pfs/data5/home/kit/stud/uloak/thesis/thesis/lib/python3.8/site-packages/tqdm/notebook.py", line 283, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'
Exception ignored in: <function tqdm.__del__ at 0x1551096df3a0>
Traceback (most recent call last):
  File "/pfs/data5/home/kit/stud/uloak/thesis/thesis/lib/python3.8/site-packages/tqdm/std.py", line 1145, in __del__
    self.close()
  File "/pfs/data5/home/kit/stud/uloak/thesis/thesis/lib/python3.8/site-packages/tqdm/notebook.py", line 283, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


In [None]:
result_dfs[0]

Option Type,C,P,all
classical-size,71.871219,72.845956,72.326249
tick(all),53.5739,53.078335,53.342559
tick(ex),50.372016,50.097673,50.243947
quote(best),58.711418,60.582925,59.585081
quote(ex),56.344838,57.740199,56.996225
lr(ex),56.418422,57.801998,57.064307
lr(best),58.774523,60.594004,59.623898
rev_lr(ex),56.486528,57.888561,57.141029
rev_lr(best),58.792914,60.650285,59.659977
emo(ex),53.505205,54.11172,53.78834


In [None]:
result_dfs[1]

Security Type,Index option,Others,Stock options,all
classical-size,57.421532,76.345621,70.932783,72.326249
tick(all),51.554245,53.257312,53.40457,53.342559
tick(ex),50.87187,49.870976,50.384689,50.243947
quote(best),57.8147,64.962501,57.447408,59.585081
quote(ex),53.720453,62.409791,54.866364,56.996225
lr(ex),53.782831,62.501336,54.925098,57.064307
lr(best),57.820371,64.982376,57.494341,59.623898
rev_lr(ex),53.667527,62.607179,54.993031,57.141029
rev_lr(best),57.820371,65.030275,57.526223,59.659977
emo(ex),51.285832,57.505227,52.329176,53.78834


In [None]:
result_dfs[2]

Trade Size,"(0,1]","(1,3]","(3,5]","(5,11]",>11,all
classical-size,72.534139,72.950857,72.748349,71.768243,71.297236,72.326249
tick(all),52.848691,52.830617,52.4113,53.630853,55.550232,53.342559
tick(ex),50.28974,50.22292,49.648294,50.186182,50.750727,50.243947
quote(best),58.064698,57.950477,58.330811,62.096424,63.258972,59.585081
quote(ex),55.290135,55.30764,55.736591,59.89012,60.706061,56.996225
lr(ex),55.423031,55.397594,55.774439,59.902085,60.697694,57.064307
lr(best),58.131272,57.984292,58.363067,62.118031,63.271784,59.623898
rev_lr(ex),55.480578,55.452455,55.932575,59.987698,60.765154,57.141029
rev_lr(best),58.176407,58.033288,58.418537,62.14864,63.260737,59.659977
emo(ex),51.933208,51.950476,52.81453,57.157071,57.237072,53.78834


In [None]:
result_dfs[3]

Year,2015,2016,2017,all
classical-size,69.011751,72.517449,72.998745,72.326249
tick(all),52.788691,53.281666,53.670502,53.342559
tick(ex),50.381045,50.18311,50.339021,50.243947
quote(best),56.015922,59.729647,60.451627,59.585081
quote(ex),54.878241,57.578845,56.356885,56.996225
lr(ex),54.887934,57.639197,56.462584,57.064307
lr(best),56.014491,59.765951,60.509837,59.623898
rev_lr(ex),54.995435,57.719699,56.520163,57.141029
rev_lr(best),56.094015,59.811811,60.508576,59.659977
emo(ex),52.835172,54.268153,52.995354,53.78834


In [None]:
result_dfs[4]

Time to Maturity,ttm <= 1 month,ttm (1-2] month,ttm (2-3] month,ttm (3-6] month,ttm (6-12] month,ttm > 12 month,all
classical-size,72.732527,72.815921,71.803625,71.275752,71.226768,68.59547,72.326249
tick(all),53.070524,53.404782,53.689628,53.882093,54.278666,54.903416,53.342559
tick(ex),50.124143,50.234919,50.465958,50.823732,50.631747,50.270019,50.243947
quote(best),60.417557,60.309049,59.459459,57.685187,57.463633,50.800908,59.585081
quote(ex),57.307837,57.732111,57.093101,55.978766,56.419706,52.670582,56.996225
lr(ex),57.366438,57.765969,57.177345,56.112714,56.47325,52.850764,57.064307
lr(best),60.43845,60.311838,59.50221,57.790397,57.546824,51.028867,59.623898
rev_lr(ex),57.469937,57.832292,57.172525,56.099023,56.494991,52.86652,57.141029
rev_lr(best),60.505662,60.322195,59.447095,57.750558,57.490944,51.003962,59.659977
emo(ex),54.413938,53.550872,52.892041,52.026183,51.948605,51.190243,53.78834


In [None]:
result_dfs[5]

Moneyness,myn <= 0.7,myn (0.7-0.9],myn (0.9-1.1],myn (1.1-1.3],myn > 1.3,all
classical-size,71.865108,74.258422,72.949282,66.248435,63.042252,72.326249
tick(all),54.477316,55.496913,52.948661,51.638894,52.034053,53.342559
tick(ex),49.538611,50.675156,50.186938,50.100022,50.042366,50.243947
quote(best),61.25958,63.39487,60.04545,50.023348,48.772913,59.585081
quote(ex),60.365344,60.374359,57.084807,49.931615,48.710672,56.996225
lr(ex),60.428423,60.574909,57.115398,49.995855,48.819439,57.064307
lr(best),61.388021,63.54531,60.040269,50.099607,48.903247,59.623898
rev_lr(ex),60.572848,60.548998,57.214293,50.109692,48.858878,57.141029
rev_lr(best),61.458235,63.472991,60.103392,50.166887,48.882295,59.659977
emo(ex),58.223516,57.563791,53.144117,49.876078,50.041134,53.78834


In [None]:
result_dfs[6]

Location to Quote,at mid,at quote,inside,outside,all
classical-size,72.120396,86.63901,68.313459,61.383614,72.326249
tick(all),51.026223,57.729733,52.409884,62.376703,53.342559
tick(ex),49.21332,51.380862,50.057898,55.478763,50.243947
quote(best),55.901058,60.20631,59.924963,65.852513,59.585081
quote(ex),49.98518,59.973691,57.138567,66.375897,56.996225
lr(ex),50.625876,59.973793,57.143744,66.375897,57.064307
lr(best),56.239301,60.366086,59.886519,65.812253,59.623898
rev_lr(ex),51.205073,59.973588,57.170209,66.375897,57.141029
rev_lr(best),56.682999,60.228822,59.913159,65.872643,59.659977
emo(ex),51.026223,59.973793,52.409884,62.376703,53.78834


In [None]:
master = pd.concat(result_dfs, axis=1, keys=list(LUT_INDEX.values())).T

In [None]:
master.iloc[:,0:10]

Unnamed: 0,Unnamed: 1,classical-size,tick(all),tick(ex),quote(best),quote(ex),lr(ex),lr(best),rev_lr(ex),rev_lr(best),emo(ex)
Option Type,C,71.871219,53.5739,50.372016,58.711418,56.344838,56.418422,58.774523,56.486528,58.792914,53.505205
Option Type,P,72.845956,53.078335,50.097673,60.582925,57.740199,57.801998,60.594004,57.888561,60.650285,54.11172
Option Type,all,72.326249,53.342559,50.243947,59.585081,56.996225,57.064307,59.623898,57.141029,59.659977,53.78834
Security Type,Index option,57.421532,51.554245,50.87187,57.8147,53.720453,53.782831,57.820371,53.667527,57.820371,51.285832
Security Type,Others,76.345621,53.257312,49.870976,64.962501,62.409791,62.501336,64.982376,62.607179,65.030275,57.505227
Security Type,Stock options,70.932783,53.40457,50.384689,57.447408,54.866364,54.925098,57.494341,54.993031,57.526223,52.329176
Security Type,all,72.326249,53.342559,50.243947,59.585081,56.996225,57.064307,59.623898,57.141029,59.659977,53.78834
Trade Size,"(0,1]",72.534139,52.848691,50.28974,58.064698,55.290135,55.423031,58.131272,55.480578,58.176407,51.933208
Trade Size,"(1,3]",72.950857,52.830617,50.22292,57.950477,55.30764,55.397594,57.984292,55.452455,58.033288,51.950476
Trade Size,"(3,5]",72.748349,52.4113,49.648294,58.330811,55.736591,55.774439,58.363067,55.932575,58.418537,52.81453


In [None]:
master.iloc[:,11:-1]

Unnamed: 0,Unnamed: 1,rev_emo(ex),rev_emo(best),clnv(ex),clnv(best),rev_clnv(ex),rev_clnv(best),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),quote(best)->quote(ex)
Option Type,C,53.649405,55.606796,54.339938,56.597578,54.463541,56.778181,59.054555,66.385867,66.645778,58.874904
Option Type,P,54.727324,56.918757,55.220002,57.624942,55.789794,58.177595,57.932244,66.888761,67.167868,60.773841
Option Type,all,54.152602,56.219249,54.750772,57.077175,55.082666,57.431459,58.530634,66.620629,66.889501,59.761371
Security Type,Index option,50.218795,50.22541,51.741378,51.565586,50.663945,50.566598,52.560795,58.58119,58.611434,57.836438
Security Type,Others,58.159445,60.691923,59.06131,61.574413,59.616119,62.156604,57.473985,69.492664,69.833893,65.229307
Security Type,Stock options,52.598969,54.510022,53.060187,55.350035,53.324088,55.633455,59.045155,65.582245,65.825705,57.589686
Security Type,all,54.152602,56.219249,54.750772,57.077175,55.082666,57.431459,58.530634,66.620629,66.889501,59.761371
Trade Size,"(0,1]",52.443638,54.52717,52.960619,55.325992,53.402249,55.845825,60.571775,68.253518,68.538024,58.226337
Trade Size,"(1,3]",52.531057,54.604379,52.860507,55.22795,53.345081,55.772635,60.061444,68.216728,68.51293,58.165759
Trade Size,"(3,5]",53.643976,55.583435,53.603599,55.942535,54.31927,56.576993,59.553527,68.348517,68.618666,58.500746


In [None]:
master.style.pipe(set_tex_style, caption=("master-short","master-long"), label=f"{key}-master")

## Effective Spread

In [61]:
classifiers

['classical-size',
 'tick(all)',
 'tick(ex)',
 'quote(best)',
 'quote(ex)',
 'lr(ex)',
 'lr(best)',
 'rev_lr(ex)',
 'rev_lr(best)',
 'emo(ex)',
 'emo(best)',
 'rev_emo(ex)',
 'rev_emo(best)',
 'clnv(ex)',
 'clnv(best)',
 'rev_clnv(ex)',
 'rev_clnv(best)',
 'trade_size(ex)->tick(all)',
 'trade_size(ex)->quote(best)',
 'trade_size(ex)->quote(best)->quote(ex)',
 'quote(best)->quote(ex)',
 'trade_size(ex)->depth(ex)->quote(best)->rev_lr(ex)']

In [88]:
eff_dfs = []

def stats(x, classifier):

    nom = effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="nominal")
    rel = effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="relative") * 100

    # eff_spread_pred = effective_spread(x[classifier], x["TRADE_PRICE"], x["mid"], mode="none")
    # eff_spread_true = effective_spread(x["buy_sell"], x["TRADE_PRICE"], x["mid"], mode="none")
    # wilcoxon_res  = wilcoxon(eff_spread_pred, eff_spread_true, nan_policy="omit", zero_method="zsplit")
    
    return pd.Series({'nominal':  nom, 
             'rel': rel,
             # 'statistic':wilcoxon_res.statistic,
             # 'pvalue':wilcoxon_res.pvalue
            })

for criterion in criterions:
    results = []
    
    for classifier in classifiers:
        res = (X_print.groupby([criterion])[["TRADE_PRICE", "mid", classifier, "buy_sell"]]
            .apply(stats, classifier)
              )
        results.append(res)

    # save aggregated results
    result_df = pd.concat(results, axis=1, keys=classifiers).T
    result_df.style.pipe(set_tex_style, caption=(f"long-tbd","short-tbd"), label=f"{key.lower()}-{criterion.lower()}-eff-spread")

    # store all result sets for later use
    eff_dfs.append(result_df)

In [106]:
eff_dfs[0]

Unnamed: 0,OPTION_TYPE,C,P
classical-size,nominal,0.045034,0.03935998
classical-size,rel,6.663309,9.775967e+302
tick(all),nominal,0.02369,0.01967647
tick(all),rel,2.787034,9.775967e+302
tick(ex),nominal,0.016413,0.01472416
tick(ex),rel,1.216119,9.775967e+302
quote(best),nominal,0.062775,0.05130346
quote(best),rel,8.687065,9.775967e+302
quote(ex),nominal,0.171481,0.1544656
quote(ex),rel,15.908595,9.775967e+302


In [107]:
eff_dfs[1]

Unnamed: 0,issue_type,Index option,Others,Stock options
classical-size,nominal,0.128587,0.03054,0.045819
classical-size,rel,0.602381,10.922618,5.448746
tick(all),nominal,0.034653,0.013705,0.024881
tick(all),rel,0.705226,0.99682,3.132711
tick(ex),nominal,0.024063,0.008241,0.018466
tick(ex),rel,0.602702,-0.785548,1.842353
quote(best),nominal,0.102625,0.042595,0.062661
quote(best),rel,2.356139,12.796188,7.564179
quote(ex),nominal,0.563742,0.104951,0.180986
quote(ex),rel,7.288216,17.468798,15.880001


In [108]:
eff_dfs[2]

Unnamed: 0,TRADE_SIZE_binned,"(0,1]","(1,3]","(3,5]","(5,11]",>11
classical-size,nominal,0.038047,0.04227,0.040207,0.043894,0.051746
classical-size,rel,4.853544,5.728606,5.859333,9.884575,10.646104
tick(all),nominal,0.025029,0.022638,0.019875,0.019467,0.018281
tick(all),rel,2.853949,2.633597,2.188958,2.045184,2.326401
tick(ex),nominal,0.01796,0.015399,0.015507,0.014679,0.012231
tick(ex),rel,1.615596,1.345637,0.934363,0.510324,0.354463
quote(best),nominal,0.066373,0.058722,0.053381,0.050948,0.04768
quote(best),rel,7.540989,8.071728,8.450977,11.484538,10.954882
quote(ex),nominal,0.187541,0.165961,0.159121,0.148846,0.130451
quote(ex),rel,14.93481,15.454604,15.897607,18.745894,17.558562


In [109]:
eff_dfs[3]

Unnamed: 0,year_binned,2015,2016,2017
classical-size,nominal,0.049976,0.04309619,0.038179
classical-size,rel,6.16432,3.596854e+302,7.150329
tick(all),nominal,0.016895,0.02227762,0.022402
tick(all),rel,2.362699,3.596854e+302,3.008716
tick(ex),nominal,0.011205,0.01526681,0.017943
tick(ex),rel,1.274827,3.596854e+302,1.450287
quote(best),nominal,0.061677,0.05713752,0.056638
quote(best),rel,8.207709,3.596854e+302,8.568844
quote(ex),nominal,0.147433,0.1573853,0.183234
quote(ex),rel,13.74512,3.596854e+302,18.155461


In [110]:
eff_dfs[4]

Unnamed: 0,ttm_binned,ttm <= 1 month,ttm (1-2] month,ttm (2-3] month,ttm (3-6] month,ttm (6-12] month,ttm > 12 month
classical-size,nominal,0.032901,0.044063,0.051212,0.063586,0.067209,0.108283
classical-size,rel,8.813669,4.057045,3.140338,3.015841,2.466502,2.077753
tick(all),nominal,0.019678,0.017443,0.021077,0.027972,0.028432,0.047843
tick(all),rel,2.947513,1.870065,1.636938,1.593565,1.329861,1.120096
tick(ex),nominal,0.017074,0.009775,0.011682,0.014216,0.012935,0.017647
tick(ex),rel,1.344642,0.721835,0.574159,0.556112,0.453916,0.156663
quote(best),nominal,0.045934,0.050152,0.05873,0.080552,0.093681,0.168799
quote(best),rel,11.231114,5.249123,4.406634,4.465686,3.69061,3.333307
quote(ex),nominal,0.147437,0.143851,0.161277,0.199127,0.226251,0.325578
quote(ex),rel,19.91433,9.794847,8.330565,9.060307,7.890216,6.973403


In [111]:
eff_dfs[5]

Unnamed: 0,myn_binned,myn <= 0.7,myn (0.7-0.9],myn (0.9-1.1],myn (1.1-1.3],myn > 1.3
classical-size,nominal,0.019004,0.02725,0.04135865,0.075467,0.098973
classical-size,rel,13.281575,13.418302,3.3890029999999998e+302,0.92736,0.951555
tick(all),nominal,0.038055,0.02478,0.01860807,0.028592,0.038125
tick(all),rel,6.737995,4.708344,3.3890029999999998e+302,0.447292,0.655988
tick(ex),nominal,0.034122,0.022565,0.01216843,0.015886,0.027049
tick(ex),rel,2.995119,2.348643,3.3890029999999998e+302,0.224926,0.470962
quote(best),nominal,0.064909,0.043103,0.04686678,0.131244,0.178501
quote(best),rel,21.297551,17.69129,3.3890029999999998e+302,1.914659,1.986869
quote(ex),nominal,0.201324,0.148623,0.147541,0.245477,0.350267
quote(ex),rel,34.937756,29.090731,3.3890029999999998e+302,3.772796,4.321214


In [112]:
eff_dfs[6]

Unnamed: 0,prox_q_binned,at mid,at quote,inside,outside
classical-size,nominal,0.0,-0.025131,0.06753,0.128042
classical-size,rel,0.0,17.109239,5.033986,-671129.513198
tick(all),nominal,0.0,0.039821,0.018993,0.410488
tick(all),rel,0.0,2.462868,2.834164,-671130.687109
tick(ex),nominal,0.0,0.021083,0.01585,0.233708
tick(ex),rel,0.0,-3.655351,2.570458,-671141.581771
quote(best),nominal,0.0,0.159269,0.03505,0.819263
quote(best),rel,0.0,31.905514,3.710756,-671114.273781
quote(ex),nominal,0.0,0.164077,0.184796,1.161184
quote(ex),rel,0.0,32.362553,13.912788,-671105.862486


In [113]:
master = pd.concat(eff_dfs, axis=1, keys = LUT_INDEX.values()).T

In [114]:
master

Unnamed: 0_level_0,Unnamed: 1_level_0,classical-size,classical-size,tick(all),tick(all),tick(ex),tick(ex),quote(best),quote(best),quote(ex),quote(ex),...,trade_size(ex)->tick(all),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),trade_size(ex)->quote(best)->quote(ex),quote(best)->quote(ex),quote(best)->quote(ex),trade_size(ex)->depth(ex)->quote(best)->rev_lr(ex),trade_size(ex)->depth(ex)->quote(best)->rev_lr(ex)
Unnamed: 0_level_1,Unnamed: 1_level_1,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,...,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel
Option Type,C,0.045034,6.663309,0.02369,2.787034,0.016413,1.216119,0.062775,8.687065,0.171481,15.90859,...,-0.007608,0.1570495,0.010157,3.987762,0.013925,4.309501,0.067512,9.072596,0.013925,4.309501
Option Type,P,0.03936,9.775967e+302,0.019676,9.775967e+302,0.014724,9.775967e+302,0.051303,9.775967e+302,0.154466,9.775967e+302,...,-0.004571,9.775967e+302,0.009893,9.775967e+302,0.013647,9.775967e+302,0.055965,9.775967e+302,0.013647,9.775967e+302
Security Type,Index option,0.128587,0.6023815,0.034653,0.705226,0.024063,0.6027022,0.102625,2.356139,0.563742,7.288216,...,0.024238,0.5877154,0.071541,1.651301,0.085599,1.74699,0.122452,2.479553,0.085599,1.74699
Security Type,Others,0.03054,10.92262,0.013705,0.9968202,0.008241,-0.7855478,0.042595,12.79619,0.104951,17.4688,...,-0.00804,-2.004831,0.007652,6.346037,0.010012,6.633266,0.04542,13.12378,0.010012,6.633266
Security Type,Stock options,0.045819,5.448746,0.024881,3.132711,0.018466,1.842353,0.062661,7.564179,0.180986,15.88,...,-0.005909,0.5147985,0.010031,3.018342,0.0142,3.373216,0.067888,7.997564,0.0142,3.373216
Trade Size,"(0,1]",0.038047,4.853544,0.025029,2.853949,0.01796,1.615596,0.066373,7.540989,0.187541,14.93481,...,-0.007324,1.062228,0.012231,4.334164,0.01643,4.672034,0.071797,7.944337,0.01643,4.672034
Trade Size,"(1,3]",0.04227,5.728606,0.022638,2.633597,0.015399,1.345637,0.058722,8.071728,0.165961,15.4546,...,-0.003089,0.8189863,0.013641,4.60033,0.017759,4.975017,0.063732,8.495901,0.017759,4.975017
Trade Size,"(3,5]",0.040207,5.859333,0.019875,2.188958,0.015507,0.934363,0.053381,8.450977,0.159121,15.89761,...,-0.007262,0.1552519,0.007476,4.572227,0.011428,4.911585,0.057983,8.84336,0.011428,4.911585
Trade Size,"(5,11]",0.043894,9.884575,0.019467,2.045184,0.014679,0.5103243,0.050948,11.48454,0.148846,18.74589,...,-0.006029,-2.430099,0.007444,3.063872,0.010799,3.377553,0.055269,11.88705,0.010799,3.377553
Trade Size,>11,0.051746,10.6461,0.018281,2.326401,0.012231,0.3544626,0.04768,10.95488,0.130451,17.55856,...,-0.007379,-2.059201,0.005567,2.710777,0.008216,2.991968,0.050965,11.31866,0.008216,2.991968


In [115]:
master.iloc[:,0:20]

Unnamed: 0_level_0,Unnamed: 1_level_0,classical-size,classical-size,tick(all),tick(all),tick(ex),tick(ex),quote(best),quote(best),quote(ex),quote(ex),lr(ex),lr(ex),lr(best),lr(best),rev_lr(ex),rev_lr(ex),rev_lr(best),rev_lr(best),emo(ex),emo(ex)
Unnamed: 0_level_1,Unnamed: 1_level_1,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel
Option Type,C,0.045034,6.663309,0.02369,2.787034,0.016413,1.216119,0.062775,8.687065,0.171481,15.90859,0.171481,15.90859,0.063281,8.781871,0.171481,15.90859,0.06299,8.741008,0.0504,8.211
Option Type,P,0.03936,9.775967e+302,0.019676,9.775967e+302,0.014724,9.775967e+302,0.051303,9.775967e+302,0.154466,9.775967e+302,0.154466,9.775967e+302,0.051788,9.775967e+302,0.154466,9.775967e+302,0.051578,9.775967e+302,0.042124,9.775967e+302
Security Type,Index option,0.128587,0.6023815,0.034653,0.705226,0.024063,0.6027022,0.102625,2.356139,0.563742,7.288216,0.563742,7.288216,0.104647,2.367328,0.563742,7.288216,0.104487,2.356349,0.060771,1.724806
Security Type,Others,0.03054,10.92262,0.013705,0.9968202,0.008241,-0.7855478,0.042595,12.79619,0.104951,17.4688,0.104951,17.4688,0.04289,12.89739,0.104951,17.4688,0.042688,12.86359,0.03107,11.79115
Security Type,Stock options,0.045819,5.448746,0.024881,3.132711,0.018466,1.842353,0.062661,7.564179,0.180986,15.88,0.180986,15.88,0.063215,7.660478,0.180986,15.88,0.06294,7.615463,0.052511,7.207397
Trade Size,"(0,1]",0.038047,4.853544,0.025029,2.853949,0.01796,1.615596,0.066373,7.540989,0.187541,14.93481,0.187541,14.93481,0.066955,7.641033,0.187541,14.93481,0.0666,7.601823,0.051456,7.015676
Trade Size,"(1,3]",0.04227,5.728606,0.022638,2.633597,0.015399,1.345637,0.058722,8.071728,0.165961,15.4546,0.165961,15.4546,0.059287,8.186965,0.165961,15.4546,0.058986,8.12922,0.047535,7.527817
Trade Size,"(3,5]",0.040207,5.859333,0.019875,2.188958,0.015507,0.934363,0.053381,8.450977,0.159121,15.89761,0.159121,15.89761,0.05385,8.55043,0.159121,15.89761,0.053591,8.505075,0.043196,7.79511
Trade Size,"(5,11]",0.043894,9.884575,0.019467,2.045184,0.014679,0.5103243,0.050948,11.48454,0.148846,18.74589,0.148846,18.74589,0.051438,11.58085,0.148846,18.74589,0.051231,11.53441,0.044633,10.96764
Trade Size,>11,0.051746,10.6461,0.018281,2.326401,0.012231,0.3544626,0.04768,10.95488,0.130451,17.55856,0.130451,17.55856,0.047933,11.01836,0.130451,17.55856,0.04791,11.00368,0.039891,10.40028


In [116]:
master.iloc[:,21:40]

Unnamed: 0_level_0,Unnamed: 1_level_0,emo(best),rev_emo(ex),rev_emo(ex),rev_emo(best),rev_emo(best),clnv(ex),clnv(ex),clnv(best),clnv(best),rev_clnv(ex),rev_clnv(ex),rev_clnv(best),rev_clnv(best),trade_size(ex)->tick(all),trade_size(ex)->tick(all),trade_size(ex)->quote(best),trade_size(ex)->quote(best),trade_size(ex)->quote(best)->quote(ex),trade_size(ex)->quote(best)->quote(ex)
Unnamed: 0_level_1,Unnamed: 1_level_1,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel,nominal,rel
Option Type,C,7.327593,0.04578,7.694177,0.043009,7.00073,0.12216,13.0334,0.055455,7.958045,0.12071,12.9107,0.053844,7.822577,-0.007608,0.1570495,0.010157,3.987762,0.013925,4.309501
Option Type,P,9.775967e+302,0.036976,9.775967e+302,0.034225,9.775967e+302,0.109473,9.775967e+302,0.044824,9.775967e+302,0.108574,9.775967e+302,0.042855,9.775967e+302,-0.004571,9.775967e+302,0.009893,9.775967e+302,0.013647,9.775967e+302
Security Type,Index option,1.712909,0.055739,1.212132,0.055242,1.217619,0.181988,4.133225,0.066934,1.702426,0.188071,4.160702,0.070783,1.307506,0.024238,0.5877154,0.071541,1.651301,0.085599,1.74699
Security Type,Others,11.27714,0.029192,11.5072,0.027514,11.08161,0.077548,15.12437,0.034969,11.92205,0.07702,15.04213,0.035042,11.85674,-0.00804,-2.004831,0.007652,6.346037,0.010012,6.633266
Security Type,Stock options,6.180699,0.046446,6.527171,0.043213,5.740075,0.130772,12.67889,0.056451,6.820008,0.129202,12.53539,0.053844,6.628785,-0.005909,0.5147985,0.010031,3.018342,0.0142,3.373216
Trade Size,"(0,1]",6.138223,0.045795,6.432262,0.042839,5.747699,0.128571,11.93176,0.05703,6.779406,0.126906,11.80095,0.054783,6.613555,-0.007324,1.062228,0.012231,4.334164,0.01643,4.672034
Trade Size,"(1,3]",6.611263,0.041968,6.910765,0.039394,6.215395,0.116847,12.3803,0.051806,7.260238,0.115585,12.25575,0.049528,7.087151,-0.003089,0.8189863,0.013641,4.60033,0.017759,4.975017
Trade Size,"(3,5]",6.96502,0.039507,7.286308,0.037071,6.621546,0.113159,12.74641,0.047296,7.636632,0.112252,12.66125,0.046171,7.515149,-0.007262,0.1552519,0.007476,4.572227,0.011428,4.911585
Trade Size,"(5,11]",10.07952,0.040073,10.37978,0.0368,9.699466,0.109913,15.77868,0.046514,10.69955,0.109258,15.66177,0.045045,10.53762,-0.006029,-2.430099,0.007444,3.063872,0.010799,3.377553
Trade Size,>11,9.598867,0.036302,9.965578,0.033982,9.334917,0.099423,14.94715,0.042248,10.18701,0.098465,14.84351,0.041237,10.07679,-0.007379,-2.059201,0.005567,2.710777,0.008216,2.991968


In [117]:
master.iloc[:,41:-1]

Unnamed: 0_level_0,Unnamed: 1_level_0,quote(best)->quote(ex),trade_size(ex)->depth(ex)->quote(best)->rev_lr(ex)
Unnamed: 0_level_1,Unnamed: 1_level_1,rel,nominal
Option Type,C,9.072596,0.013925
Option Type,P,9.775967e+302,0.013647
Security Type,Index option,2.479553,0.085599
Security Type,Others,13.12378,0.010012
Security Type,Stock options,7.997564,0.0142
Trade Size,"(0,1]",7.944337,0.01643
Trade Size,"(1,3]",8.495901,0.017759
Trade Size,"(3,5]",8.84336,0.011428
Trade Size,"(5,11]",11.88705,0.010799
Trade Size,>11,11.31866,0.008216


In [118]:
master.style.pipe(set_tex_style, caption=("master-short","master-long"), label=f"{key}-master-eff-spread")

## Change in Parenthesis

```latex
# https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290
\begin{table}
    \centering
    \caption{test of combination with change}
    \label{tab:combo}
    \begin{tabular}{lSSSSSSSS}
        \toprule
        {} & \multicolumn{2}{l}{Index option} & \multicolumn{2}{l}{Others} & \multicolumn{2}{l}{Stock options} & \multicolumn{2}{l}{all} \\
        \midrule
        classical-size & 1.0 & \parl-56.42\parr & 2.0 & \parl-74.35\parr & -73.5 & \parl-143.93\parr & 5.0 & \parl-67.33\parr \\
        \bottomrule
        \end{tabular}
\end{table}
```

In [119]:
foo = pd.DataFrame([[4,3 ,8, 5]])
bar = pd.DataFrame([[1.1,2 ,73, 5]], columns=foo.columns, index=foo.index)

In [120]:
def combine_results(revised: pd.DataFrame, base: pd.DataFrame) -> pd.DataFrame:
    """
    Generate print layout like in Grauer et al.
    
    https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290
    
    # see p. https://texdoc.org/serve/siunitx/0
    """
    # first, second layer of colum index
    c_1 = revised.columns
    c_2 = ["nom"]
    midx = pd.MultiIndex.from_product([c_1, c_2])
    
    # copy data from revised add as (column, "nom")
    combo = pd.DataFrame(revised.values, index=revised.index, columns=midx)
    
    for i, mul_col in enumerate(combo.columns):
        
        # define custom brackets that are not parsed by sunitx
        combo[[(mul_col[0], "pm")]] = (
            "\parl" + (combo[mul_col] - base[mul_col[0]]).round(2).astype(str) + "\parr"
            )
        # sort to group together columns
        combo.sort_index(axis=1, inplace=True)
    return combo

In [121]:
combo = combine_results(bar, foo)

# manually replace S with S[table-format=1.4(5)] if needed
combo.style.to_latex(f"combo.tex", siunitx=True, position_float="centering", hrules=True, clines="skip-last;data", label="tab:combo", caption="test of combination with change", multicol_align="l")
             

In [122]:
combo

Unnamed: 0_level_0,0,0,1,1,2,2,3,3
Unnamed: 0_level_1,nom,pm,nom,pm,nom,pm,nom,pm
0,1.1,\parl-2.9\parr,2.0,\parl-1.0\parr,73.0,\parl65.0\parr,5.0,\parl0.0\parr
