In [None]:
import gcsfs
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.dates import DateFormatter
from matplotlib.ticker import PercentFormatter
from sklearn.metrics import accuracy_score

from tclf.classical_classifier import ClassicalClassifier

In [None]:
features_classical = [
    "TRADE_PRICE",
    "bid_ex",
    "ask_ex",
    "BEST_ASK",
    "BEST_BID",
    "price_all_lead",
    "price_ex_lead",
]

features_size = ["TRADE_SIZE", "bid_size_ex", "ask_size_ex"]

features_meta = ["QUOTE_DATETIME", "buy_sell"]

columns = [
    *features_classical,
    *features_size,
    *features_meta,
]

In [None]:
fs = gcsfs.GCSFileSystem()

gcs_loc = fs.glob(
    "gs://thesis-bucket-option-trade-classification/data/preprocessed/matched_ise_quotes*"
)
X = pd.read_parquet(gcs_loc, engine="pyarrow", columns=columns, filesystem=fs)

X_meta = X[features_meta]
X = X.drop(columns=features_meta).rename(
    {
        "TRADE_PRICE": "trade_price",
        "TRADE_SIZE": "trade_size",
        "BEST_ASK": "ask_best",
        "BEST_BID": "bid_best",
        "buy_sell": "y_true",
    },
    axis=1,
)
X[["ask_size_best", "bid_size_best"]] = X[["ask_size_ex", "bid_size_ex"]]

In [None]:
X.head()

In [None]:
layers = [  # grauer (benchmark 2)
    ("trade_size", "ex"),
    ("quote", "best"),
    ("quote", "ex"),
    ("depth", "best"),
    ("depth", "ex"),
    ("rev_tick", "all"),
]
clf = ClassicalClassifier(layers=layers, strategy="random")

X_meta["y_pred"] = clf.fit(X).predict(X)

In [None]:
X_meta

In [None]:
df_plot = X_meta.groupby(X_meta.QUOTE_DATETIME.dt.date).apply(
    lambda x: accuracy_score(x["y_true"], x["y_pred"])
)

In [None]:
plt.rcParams["font.family"] = "serif"
plt.figure(figsize=(9, 3))
plt.plot(df_plot * 100, color="tab:orange", linewidth=1.5, label="ISE")
plt.ylim(0, 100)
plt.ylabel("Overall success rate")
ax = plt.gca()
ax.yaxis.set_major_formatter(PercentFormatter(100, decimals=0))
ax.xaxis.set_major_formatter(DateFormatter("%b-%y"))
plt.title(
    "C: Performance of trade classification based on\n trade size rule + depth rule + reverse LR (NBBO, exchange)",
    loc="left",
)
plt.grid(True, axis="y")
plt.legend(loc="lower left", frameon=False)
plt.show()