# Latex metric tables cross validation

This notebook creates the comparison tables latex code for the cross validation experiments.

In [47]:
import os

import pandas as pd
from dotenv import load_dotenv

import utilities.latex_tables as latex_tabs

load_dotenv()
EXPERIMENT = os.environ.get("EXPERIMENT_NUMBER")
LATEX_RESOURCES = os.environ.get("LATEX_RESOURCES")

## Survival Models

In [48]:
surv_cv_df = pd.read_csv(f"./data/cross_validation/surv_cross_validation.csv")

In [49]:
by_metrics = [("metrics.c_index_ipcw", False), ("metrics.ibs", True)]
group_by = "params.n_dev"

model_class_to_short = {
    "kaplan_meier_estimator": "km",
    "CoxPHFitter": "cph",
    "RandomSurvivalForest": "rsf",
}

In [50]:
for model_class, model_group in surv_cv_df.groupby(by="params.model_class"):
    print(model_class)
    model_abbr = model_class_to_short[model_class]
    caption = f"Vergleich der besten Metriken für \\gls{{{model_abbr}}}-Modelle."
    label = f"best_metrics_{model_abbr}"
    tab = latex_tabs.LatexTable(model_group)
    if model_class == "kaplan_meier_estimator":
        by_metrics = [("metrics.ibs", True)]
    latex_str = tab.compare_best_by_metrics_and_n_aug(
        by_metrics,
        group_by,
        caption=caption,
        label=label,
    )
    print(latex_str)
    with open(f"{LATEX_RESOURCES}/../best_metrics_{model_abbr}.tex", "w") as file:
        file.write(latex_str)

CoxPHFitter
\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{c_index_ipcw}} $\uparrow$ & \textit{n\_aug} & \textit{\gls{ibs}} $\downarrow$ \\
        \hline
        \hline
        10 & 10 & \textbf{0,903} (+6,2~\%) & 3 & \textbf{0,073} (-40,0~\%)  \\
        10 & 0 & 0,850 & 0 & 0,122  \\
        \hline
        20 & 10 & \textbf{0,908} (+1,7~\%) & 3 & \textbf{0,080} (-24,0~\%)  \\
        20 & 0 & 0,894 & 0 & 0,106  \\
        \hline
        40 & 10 & 0,894 (-0,7~\%) & 3 & \textbf{0,075} (-29,4~\%)  \\
        40 & 0 & \textbf{0,900} & 0 & 0,106  \\
        \hline
        63 & 3 & 0,940 ($\pm$0,0~\%) & 3 & \textbf{0,068} (-38,5~\%)  \\
        63 & 0 & 0,940 & 0 & 0,110  \\
        \hline
        \end{tabular}
    \caption{Vergleich der besten Metriken für \gls{cph}-Modelle.}
    \label{tab:best_metrics_cph}
\end{table}

RandomSurvivalForest
\begin{table}[H]
    \centering
        \begi

In [51]:
latex_str  = ""

for by_metrics in [("metrics.c_index_ipcw", False), ("metrics.ibs", True)]:
    caption = f"Vergleich der insgesamt besten Survival-Modelle nach dem \\gls{{{by_metrics[0].split('.')[1]}}}"
    label = f"best_{by_metrics[0].split('.')[1]}_surv"
    tab = latex_tabs.LatexTable(surv_cv_df)
    latex_str += tab.compare_best_by_metrics_and_n_aug(
        [by_metrics],
        group_by,
        caption=caption,
        label=label,
        add_model_type=True,
    )
print(latex_str)
with open(f"{LATEX_RESOURCES}/../best_metrics_surv.tex", "w") as file:
    file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | l  c  l |}
        \hline
        \textit{n\_dev} & Modelltyp & \textit{n\_aug} & \textit{\gls{c_index_ipcw}} $\uparrow$ \\
        \hline
        \hline
        10 & CoxPHFitter & 10 & \textbf{0,903} (+6,2~\%)  \\
        10 & CoxPHFitter & 0 & 0,850  \\
        \hline
        20 & CoxPHFitter & 10 & \textbf{0,908} (+1,7~\%)  \\
        20 & CoxPHFitter & 0 & 0,894  \\
        \hline
        40 & RandomSurvivalForest & 10 & \textbf{0,906} (+0,7~\%)  \\
        40 & CoxPHFitter & 0 & 0,900  \\
        \hline
        63 & CoxPHFitter & 3 & 0,940 ($\pm$0,0~\%)  \\
        63 & CoxPHFitter & 0 & 0,940  \\
        \hline
        \end{tabular}
    \caption{Vergleich der insgesamt besten Survival-Modelle nach der \gls{c_index_ipcw}}
    \label{tab:best_c_index_ipcw_surv}
\end{table}
\begin{table}[H]
    \centering
        \begin{tabular}{| c | l  c  l |}
        \hline
        \textit{n\_dev} & Modelltyp & \textit{n\_aug} & \texti

## Regression Models

In [52]:
reg_cv_df = pd.read_csv(f"./data/cross_validation/reg_cross_validation.csv")

In [53]:
by_metrics = [("metrics.mdt", False), ("metrics.med_dt", False)]
group_by = "params.n_dev"

model_class_to_short = {
    "DecisionTreeRegressor": "dt",
    "LinearRegression": "lr",
    "XGBRegressor": "xgb",
}

In [54]:
for model_class, model_group in reg_cv_df.groupby(by="params.model_class"):
    model_abbr = model_class_to_short[model_class]
    caption = f"Vergleich der besten Metriken für \\gls{{{model_abbr}}}-Modelle."
    label = f"best_metrics_{model_abbr}"
    tab = latex_tabs.LatexTable(model_group)
    latex_str = tab.compare_best_by_metrics_and_n_aug(
        by_metrics,
        group_by,
        caption=caption,
        label=label,
        model_class=model_class,
    )

    print(latex_str)
    with open(f"{LATEX_RESOURCES}/../best_metrics_{model_abbr}.tex", "w") as file:
        file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{mdt}} $\uparrow$ & \textit{n\_aug} & \textit{\gls{med_dt}} $\uparrow$ \\
        \hline
        \hline
        10 & 1 & 41,7 (-2,1~\%) & 5 & \textbf{26,9} (+1,4~\%)  \\
        10 & 0 & \textbf{42,6} & 0 & 26,5  \\
        \hline
        20 & 1 & \textbf{45,1} (+7,1~\%) & 1 & \textbf{27,4} (+10,6~\%)  \\
        20 & 0 & 42,1 & 0 & 24,8  \\
        \hline
        40 & 1 & 43,9 (-0,2~\%) & 1 & 27,5 (-8,7~\%)  \\
        40 & 0 & \textbf{44,0} & 0 & \textbf{30,1}  \\
        \hline
        63 & 1 & 43,2 (-3,4~\%) & 1 & 26,4 (-1,4~\%)  \\
        63 & 0 & \textbf{44,7} & 0 & \textbf{26,8}  \\
        \hline
        \end{tabular}
    \caption{Vergleich der besten Metriken für \gls{dt}-Modelle.}
    \label{tab:best_metrics_dt}
\end{table}

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \tex

In [57]:
latex_str = ""
for by_metrics in [("metrics.mdt", False), ("metrics.med_dt", False)]:
    caption = f"Vergleich der insgesamt besten Regressions-Modelle nach der \\gls{{{by_metrics[0].split('.')[1]}}}."
    label = f"best_{by_metrics[0].split('.')[1]}_reg"
    tab = latex_tabs.LatexTable(reg_cv_df)
    latex_str += tab.compare_best_by_metrics_and_n_aug(
        [by_metrics],
        group_by,
        caption=caption,
        label=label,
        add_model_type=True,
    )
print(latex_str)
with open(f"{LATEX_RESOURCES}/../best_metrics_reg.tex", "w") as file:
    file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | l  c  l |}
        \hline
        \textit{n\_dev} & Modelltyp & \textit{n\_aug} & \textit{\gls{mdt}} $\uparrow$ \\
        \hline
        \hline
        10 & LinearRegression & 1 & \textbf{44,5} (+4,5~\%)  \\
        10 & DecisionTreeRegressor & 0 & 42,6  \\
        \hline
        20 & DecisionTreeRegressor & 1 & \textbf{45,1} (+7,1~\%)  \\
        20 & DecisionTreeRegressor & 0 & 42,1  \\
        \hline
        40 & LinearRegression & 1 & \textbf{46,6} (+6,0~\%)  \\
        40 & DecisionTreeRegressor & 0 & 44,0  \\
        \hline
        63 & LinearRegression & 1 & 44,0 (-1,6~\%)  \\
        63 & DecisionTreeRegressor & 0 & \textbf{44,7}  \\
        \hline
        \end{tabular}
    \caption{Vergleich der insgesamt besten Regressions-Modelle nach der \gls{mdt}.}
    \label{tab:best_mdt_reg}
\end{table}
\begin{table}[H]
    \centering
        \begin{tabular}{| c | l  c  l |}
        \hline
        \textit{n\_dev} & Modelltyp 