# Latex metric tables unvalidated models

This notebook creates the comparison tables latex code for the unvalidated experiments. These tables are not used in the thesis anymore.

In [1]:
import os

import pandas as pd
from dotenv import load_dotenv

import utilities.latex_tables as latex_tabs

load_dotenv()
EXPERIMENT = os.environ.get("EXPERIMENT_NUMBER")
LATEX_RESOURCES = os.environ.get("LATEX_RESOURCES")

## Regression Models

In [2]:
runs_df = pd.read_csv(f"./data/runs/reg_runs_{EXPERIMENT}.csv")

In [3]:
by_metrics = [("metrics.mdt", False), ("metrics.med_dt", False)]
group_by = "params.n_dev"

model_class_to_short = {
    "DecisionTreeRegressor": "dt",
    "LinearRegression": "lr",
    "XGBRegressor": "xgb",
}

In [4]:
for model_class, model_group in runs_df.groupby(by="params.model_class"):
    model_abbr = model_class_to_short[model_class]
    caption = f"Vergleich der besten Metriken für \\gls{{{model_abbr}}}-Modelle."
    label = f"best_metrics_{model_abbr}"
    tab = latex_tabs.LatexTable(model_group)
    latex_str = tab.compare_best_by_metrics_and_n_aug(
        by_metrics,
        group_by,
        caption=caption,
        label=label,
        model_class=model_class,
    )

    print(latex_str)
    with open(f"{LATEX_RESOURCES}/../best_metrics_{model_abbr}.tex", "w") as file:
        file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{mdt}} $\uparrow$ & \textit{n\_aug} & \textit{\gls{med_dt}} $\uparrow$ \\
        \hline
        \hline
        10 & 5 & \textbf{37,7} (+5,0~\%) & 5 & \textbf{25,0} (+4,2~\%)  \\
        10 & 0 & 35,9 & 0 & 24,0  \\
        \hline
        20 & 1 & \textbf{36,0} (+3,2~\%) & 1 & 25,0 ($\pm$0,0~\%)  \\
        20 & 0 & 34,9 & 0 & 25,0  \\
        \hline
        40 & 1 & \textbf{38,2} (+19,9~\%) & 1 & \textbf{25,0} (+4,2~\%)  \\
        40 & 0 & 31,8 & 0 & 24,0  \\
        \hline
        63 & 1 & \textbf{36,8} (+3,1~\%) & 1 & 25,0 (-3,8~\%)  \\
        63 & 0 & 35,7 & 0 & \textbf{26,0}  \\
        \hline
        \end{tabular}
    \caption{Vergleich der besten Metriken für \gls{dt}-Modelle.}
    \label{tab:best_metrics_dt}
\end{table}

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n

In [5]:
caption = "Vergleich der insgesamt besten Regressions-Modelle."
label = "best_metrics_reg"
tab = latex_tabs.LatexTable(runs_df)
latex_str = tab.compare_best_by_metrics_and_n_aug(
    by_metrics,
    group_by,
    caption=caption,
    label=label,
    add_model_type=True,
)
print(latex_str)
with open(f"{LATEX_RESOURCES}/../best_metrics_reg.tex", "w") as file:
    file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{mdt}} $\uparrow$ & \textit{n\_aug} & \textit{\gls{med_dt}} $\uparrow$ \\
        \hline
        \hline
        10 & LinearRegression & 3 & \textbf{39,2} (+6,9~\%) & LinearRegression & 1 & \textbf{27,5} (+10,0~\%)  \\
        10 & XGBRegressor & 0 & 36,7 & XGBRegressor & 0 & 25,0  \\
        \hline
        20 & LinearRegression & 1 & \textbf{52,1} (+28,3~\%) & LinearRegression & 1 & 26,0 (-1,9~\%)  \\
        20 & LinearRegression & 0 & 40,6 & LinearRegression & 0 & \textbf{26,5}  \\
        \hline
        40 & LinearRegression & 1 & \textbf{46,0} (+24,4~\%) & XGBRegressor & 5 & \textbf{27,0} (+3,8~\%)  \\
        40 & LinearRegression & 0 & 37,0 & LinearRegression & 0 & 26,0  \\
        \hline
        63 & LinearRegression & 1 & \textbf{44,8} (+22,8~\%) & XGBRegressor & 1 & \textbf{28,5} (+5,6~\%)  \\
        63 & LinearRegression & 0 & 3

## Survival Models

In [6]:
runs_df = pd.read_csv(f"./data/runs/surv_runs_{EXPERIMENT}.csv")

In [7]:
by_metrics = [("metrics.mdt", False), ("metrics.med_dt", False)]
group_by = "params.n_dev"

model_class_to_short = {
    "kaplan_meier_estimator": "km",
    "CoxPHFitter": "cph",
    "RandomSurvivalForest": "rsf",
}

In [8]:
for model_class, model_group in runs_df.groupby(by="params.model_class"):
    model_abbr = model_class_to_short[model_class]
    caption = f"Vergleich der besten Metriken für \\gls{{{model_abbr}}}-Modelle."
    label = f"best_metrics_{model_abbr}"
    latex_tabs.LatexTable(model_group)
    latex_str = tab.compare_best_by_metrics_and_n_aug(
        by_metrics,
        group_by,
        caption=caption,
        label=label,
        add_model_type=True,
    )
    print(latex_str)
    with open(f"{LATEX_RESOURCES}/../best_metrics_{model_abbr}.tex", "w") as file:
        file.write(latex_str)
    print(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l || c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{mdt}} $\uparrow$ & \textit{n\_aug} & \textit{\gls{med_dt}} $\uparrow$ \\
        \hline
        \hline
        10 & LinearRegression & 3 & \textbf{39,2} (+6,9~\%) & LinearRegression & 1 & \textbf{27,5} (+10,0~\%)  \\
        10 & XGBRegressor & 0 & 36,7 & XGBRegressor & 0 & 25,0  \\
        \hline
        20 & LinearRegression & 1 & \textbf{52,1} (+28,3~\%) & LinearRegression & 1 & 26,0 (-1,9~\%)  \\
        20 & LinearRegression & 0 & 40,6 & LinearRegression & 0 & \textbf{26,5}  \\
        \hline
        40 & LinearRegression & 1 & \textbf{46,0} (+24,4~\%) & XGBRegressor & 5 & \textbf{27,0} (+3,8~\%)  \\
        40 & LinearRegression & 0 & 37,0 & LinearRegression & 0 & 26,0  \\
        \hline
        63 & LinearRegression & 1 & \textbf{44,8} (+22,8~\%) & XGBRegressor & 1 & \textbf{28,5} (+5,6~\%)  \\
        63 & LinearRegression & 0 & 3

In [9]:
latex_str  = ""

for by_metrics in [("metrics.c_index_ipcw", False), ("metrics.ibs", True)]:
    caption = f"Vergleich der insgesamt besten Survival-Modelle nach der \\gls{{{by_metrics[0].split('.')[1]}}}"
    label = f"best_{by_metrics[0].split('.')[1]}_surv"
    tab = latex_tabs.LatexTable(runs_df)
    latex_str += tab.compare_best_by_metrics_and_n_aug(
        [by_metrics],
        group_by,
        caption=caption,
        label=label,
        add_model_type=True,
    )
print(latex_str)
with open(f"{LATEX_RESOURCES}/../best_metrics_surv.tex", "w") as file:
    file.write(latex_str)

\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l |}
        \hline
        \textit{n\_dev} & \textit{n\_aug} & \textit{\gls{c_index_ipcw}} $\uparrow$ \\
        \hline
        \hline
        10 & RandomSurvivalForest & 5 & \textbf{0,948} (+8,1~\%)  \\
        10 & RandomSurvivalForest & 0 & 0,877  \\
        \hline
        20 & RandomSurvivalForest & 3 & \textbf{0,960} (+15,0~\%)  \\
        20 & CoxPHFitter & 0 & 0,835  \\
        \hline
        40 & RandomSurvivalForest & 10 & \textbf{0,944} (+15,9~\%)  \\
        40 & RandomSurvivalForest & 0 & 0,815  \\
        \hline
        63 & RandomSurvivalForest & 10 & \textbf{0,947} (+19,8~\%)  \\
        63 & RandomSurvivalForest & 0 & 0,790  \\
        \hline
        \end{tabular}
    \caption{Vergleich der insgesamt besten Survival-Modelle nach der \gls{c_index_ipcw}}
    \label{tab:best_c_index_ipcw_surv}
\end{table}
\begin{table}[H]
    \centering
        \begin{tabular}{| c | c  l |}
        \hline
        \textit{n\_