In [None]:
# change notebook cell width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

%cd "/Users/simon/code/thesis/"

%load_ext autoreload
%autoreload 2
from IPython.display import display

In [None]:
import gzip
import math
from pathlib import Path
import pickle

from lifelines import KaplanMeierFitter
from lifelines.plotting import rmst_plot
from lifelines.statistics import pairwise_logrank_test, logrank_test
from lifelines.utils import restricted_mean_survival_time
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import pandas as pd
import plotly_express as px
import plotly.graph_objects as go
import seaborn as sns
from scipy.stats import ranksums
from tqdm.notebook import tqdm

matplotlib.rcParams['figure.figsize'] = (20.0, 10.0)
pd.options.display.float_format = '{:.2f}'.format

In [None]:
measure = "time_to_fill"
# measure = "time_to_removal"
dir_path = Path.home() / "data/thesis_statistics/models"
model_paths = sorted(list(dir_path.glob(f"*{measure}*.gz")))

models = dict()
for filepath in tqdm(model_paths):
    month = int(filepath.name.split("_")[3])
    with gzip.open(filepath, "rb") as handle:
        model = pickle.load(handle)
        models[month] = model

In [None]:
num_obs = 0
for month, model in models.items():
    num_obs += model.durations.shape[0] / 1e6
print(f"Number of orders: {round(num_obs, 2)}mn")
print(f"Per moth: {round(num_obs/len(models), 2)}mn")

# Survial Functions

In [None]:
upper_limit = 100 # milliseconds

In [None]:
# for yearmonth, model in models.items():
#     below_limit = len(model.durations[model.durations <= upper_limit])
#     total = len(model.durations)
#     percentage =  below_limit / total
#     print(f"{yearmonth} \t {round(100*percentage)}%")

In [None]:
survivals = list()
for yearmon, model in models.items():
    estimated_survival = model.survival_function_.loc[:upper_limit].copy()
    estimated_survival.columns = ["percentage"]
    estimated_survival["yearmon"] = yearmon
    survivals.append(estimated_survival)

survivals = pd.concat(survivals).reset_index()

In [None]:
# convert to probability of execution/deletion
survivals["percentage"] = 1 - survivals["percentage"]

In [None]:
mask = 201907 <= survivals["yearmon"]
survivals.loc[mask, "timespan"] = "Months after non-equivalence"
survivals.loc[~mask, "timespan"] = "Months before non-equivalence"
survivals.sort_values(["timespan", "timeline"], ascending=False, inplace=True)

# months = [yearmon*100 + 1 for yearmon in survivals["yearmon"]]
# months = pd.Series([pd.Timestamp(str(month)) for month in months])

# survivals["yearmon"] = pd.to_datetime((100 * survivals["yearmon"] + 1).astype(str))
# # months = months.dt.strftime("%b %y")

# # create labels for 2018 H2, 2019 H1 and H2
# mask = (pd.Timestamp("2019-07-01") <= survivals["yearmon"]) & (survivals["yearmon"] < pd.Timestamp("2020-01-01"))
# survivals.loc[mask, "half_year"] = "2019 H2"  # pd.Timestamp("20190701")

# mask = (pd.Timestamp("2019-01-01") <= survivals["yearmon"]) & (survivals["yearmon"] < pd.Timestamp("2019-07-01"))
# survivals.loc[mask, "half_year"] = "2019 H1"  # pd.Timestamp("20190101")

# mask = (pd.Timestamp("2018-01-01") <= survivals["yearmon"]) & (survivals["yearmon"] < pd.Timestamp("2019-01-01"))
# survivals.loc[mask, "half_year"] = "2018 H2"  # pd.Timestamp("20180701")

# survivals["yearmon"] = survivals["yearmon"].dt.strftime("%b %y")

In [None]:
color = "timespan"

fig = px.line(
    survivals,
    x="timeline",
    y="percentage",
    color=color,
#     symbol=color,
    hover_name="yearmon",
    template="plotly_white",
    color_discrete_sequence=[px.colors.qualitative.Safe[0], px.colors.qualitative.Safe[1]],
)

In [None]:
# format figure
fig.update_layout(
    font=dict(
        family="STIX Two Text",
        size=35,
        color="black"
    ),
    xaxis=dict(
        title="Time in book t (ms)",
        title_standoff = 40,
        title_font=dict(size=35),
    ),
    yaxis=dict(
        title="1 - Ŝ(t)",
        title_standoff=40,
        title_font=dict(size=35),
    ),
    legend=dict(itemsizing="constant", x=0.60, y=0.1),
    margin=dict(r=0.0),
)
fig.for_each_trace(
    lambda trace: trace.update(
        name=trace.name.replace(f"{color}=", ""),
        marker={"size": 4}
    )
)

In [None]:
# export it
plot_name = f"{measure}_60s.pdf"

specs = dict(width=1600, height=1600/1.618)

plot_path = Path("/Users/simon/Library/Mobile Documents/com~apple~CloudDocs/Academia/01_UniSG/00_Thesis/01_Thesis/01_draft/my_print_style/figures")

while True:
    answer = input(f"Are you sure to overwrite {plot_name}? [y/n]\n")

    if answer == "y":
        file_path = plot_path / plot_name
        fig.write_image(str(file_path), **specs)
        print(f"Exported file {plot_name} to {plot_path}")
        break
    elif answer == "n":
        print("Aborted")
        break