### **`generate_table03_summary_stats.ipynb`**

TABLE III: Cumulative reward (mean ± 99 % confidence-interval bounds) over a evaluation set for each controller in the LLEC-HeatPumpHouse environment. T01–C04 mark the configurations explored in the ablation study.

In [1]:
import os
import pandas as pd
import numpy as np
import re
from textwrap import dedent
from pathlib import Path

# Evaluation dataset (LLEC measurements) – Folders: first 'temperature', then 'combined'
data_dirs = ["../results/temperature/outdoor_data", "../results/combined/outdoor_data"] # eval
results     = {}
avg_rewards = {}

valid_suffixes = ("_best.csv", "PID_Control.csv", "PI_Control.csv", "Fuzzy_Control.csv")

# Step 1: Read CSV files per directory ----------------------------
for results_dir in data_dirs:
    # List directory
    print(f"\n===== Bearbeite Verzeichnis: {results_dir} =====")

    if not os.path.isdir(results_dir):
        print(f"  [WARN] Directory '{results_dir}' does not exist.")
        continue

    # local container, only for this one directory
    dir_results     = {}
    dir_avg_rewards = {}

    for filename in sorted(os.listdir(results_dir)):
        if filename.endswith(valid_suffixes) or ("MPC_Control" in filename and filename.endswith(".csv")):
            file_path  = os.path.join(results_dir, filename)
            model_name = (
                filename.replace("eval_", "")
                        .replace(".csv", "")
                        .replace("_", " ")
                        .strip()
            )

            try:
                df = pd.read_csv(file_path, engine="python", on_bad_lines="error")
                df.columns = df.columns.str.strip()
                avg_reward = df.groupby("episode")["reward"].sum().mean()

                dir_results[model_name]     = df
                dir_avg_rewards[model_name] = avg_reward
            except Exception as e:
                print(f"  Fehler beim Einlesen von '{filename}': {e}")

    # Overview for this directory
    unique_algorithms = list(dir_results.keys())
    print(f"  Anzahl verschiedener Algorithmen: {len(unique_algorithms)}")
    print("  Liste der Algorithmen:")
    for alg in unique_algorithms:
        print(f"  - {alg}")

    # Add global containers
    results.update(dir_results)
    avg_rewards.update(dir_avg_rewards)

# Step 2: Build summary DataFrame ---------------------------------
summary_list = []

for algo_name, df in results.items():
    # Define base name
    if "a2c" in algo_name:
        base_algo = "A2C"
    elif "ppo" in algo_name:
        base_algo = "PPO"
    elif "sac" in algo_name:
        base_algo = "SAC"
    elif "ddpg" in algo_name:
        base_algo = "DDPG"
    elif "Fuzzy" in algo_name:
        base_algo = "FUZZY"
    elif "PI" in algo_name and "PID" not in algo_name:
        base_algo = "PI"
    elif "PID" in algo_name:
        base_algo = "PID"
    elif "Perfect MPC" in algo_name:
        base_algo = "PERFECT MPC"
    elif "MPC" in algo_name:
        base_algo = "MPC"
    else:
        base_algo = algo_name.upper()  # Fallback

    # Extract last row per episode
    last_steps = df[df["time_step"] == df["time_step"].max()]
    for _, row in last_steps.iterrows():
        summary_list.append({
            "Algorithm": algo_name,
            "Base Algorithm": base_algo,
            "Episode": row["episode"],
            "Seed": row["seed"],
            "Final Cumulative Reward": row["cumulative_reward"]
        })

summary_df = pd.DataFrame(summary_list)

# Step 3: Readable labels (optional, unchanged) ------------------
def clean_label(name):
    if "best" in name and name.startswith("T"):
        parts = name.split()
        if len(parts) >= 3:
            return f"{parts[0]} ({parts[1].upper()})"
    return name

summary_df["Algorithm Clean"] = summary_df["Algorithm"].apply(clean_label)

# Output overview
unique_algorithms = summary_df["Algorithm"].unique()
print(f"Anzahl verschiedener Algorithmen: {len(unique_algorithms)}")
print("Liste der Algorithmen:")
for alg in unique_algorithms:
    print(f"- {alg}")


===== Bearbeite Verzeichnis: ../results/temperature/outdoor_data =====
  Anzahl verschiedener Algorithmen: 21
  Liste der Algorithmen:
  - T01 Fuzzy Control
  - T01 MPC Control 12
  - T01 PID Control
  - T01 PI Control
  - T01 Perfect MPC Control 12
  - T01 a2c best
  - T01 ddpg best
  - T01 ppo best
  - T01 sac best
  - T02 a2c best
  - T02 ddpg best
  - T02 ppo best
  - T02 sac best
  - T03 a2c best
  - T03 ddpg best
  - T03 ppo best
  - T03 sac best
  - T04 a2c best
  - T04 ddpg best
  - T04 ppo best
  - T04 sac best

===== Bearbeite Verzeichnis: ../results/combined/outdoor_data =====
  Anzahl verschiedener Algorithmen: 21
  Liste der Algorithmen:
  - C01 Fuzzy Control
  - C01 MPC Control 12
  - C01 PID Control
  - C01 PI Control
  - C01 Perfect MPC Control 12
  - C01 a2c best
  - C01 ddpg best
  - C01 ppo best
  - C01 sac best
  - C02 a2c best
  - C02 ddpg best
  - C02 ppo best
  - C02 sac best
  - C03 a2c best
  - C03 ddpg best
  - C03 ppo best
  - C03 sac best
  - C04 a2c best
 

In [2]:
# Step 4: Calculate basic statistics ------------------------------
grouped = summary_df.groupby("Algorithm")["Final Cumulative Reward"]
summary = grouped.agg(mean="mean", std="std", count="count").reset_index()

# Step 5: 99 % confidence interval (z-value for 99 % = 2.576) -----
z = 2.576
summary["ci99_lower"] = summary["mean"] - z * (summary["std"] / np.sqrt(summary["count"]))
summary["ci99_upper"] = summary["mean"] + z * (summary["std"] / np.sqrt(summary["count"]))

# Round to two decimal places as strings for LaTeX
for col in ("mean", "ci99_lower", "ci99_upper"):
    summary[col] = summary[col].map(lambda x: f"{x:.2f}")

# Step 6: Variant and controller columns split --------------------
summary["Variant"] = summary["Algorithm"].str.slice(0, 3)      # e.g. T01, C02
summary["Controller"] = summary["Algorithm"].str.slice(4)      # all after the underscore

rl_controllers = {"A2C", "DDPG", "PPO", "SAC"}                 # for sorting (classic vs. RL)

def clean_controller(name: str) -> str:
    """standardizes controller names (upper capitalization, BEST instead of PERFECT, ...)"""
    name = name.strip()
    name = re.sub(r"PERFECT", "BEST", name, flags=re.IGNORECASE)
    name = re.sub(r"\bCONTROL\b", "", name, flags=re.IGNORECASE).strip()
    # Return RL abbreviation immediately (so that  A2C-xyz → A2C )
    for rl in rl_controllers:
        if name.upper().startswith(rl):
            return rl
    name = re.sub(r"\d+", "", name).strip()     # Remove numbers
    return name.upper()

summary["Controller"] = summary["Controller"].apply(clean_controller)
summary["Group"] = summary["Controller"].apply(lambda x: "RL" if x in rl_controllers else "Classical")

# Step 7: Build LaTeX table ---------------------------------------

# Helper function to replace missing values with “--”
def get_metric(df, variant: str, ctrl: str, metric: str) -> str:
    val = df.loc[(df["Variant"] == variant) & (df["Controller"] == ctrl), metric]
    return val.iloc[0] if not val.empty else "--"

# all temperature variants (T01 ...); C variant is formed by replacing T→C
temp_variants = sorted(v for v in summary["Variant"].unique() if v.startswith("T"))
controllers_ordered = (
    list(summary.loc[summary["Group"] == "Classical", "Controller"].unique())
    + list(summary.loc[summary["Group"] == "RL", "Controller"].unique())
)

# Table header with two column groups
tabular = dedent(r"""
\begin{tabular}{@{}lcccccc@{}}
\toprule
\multicolumn{1}{@{}l}{\textbf{Variants:}} &
\multicolumn{3}{c}{\textbf{Temperature (T01--T04)}} &
\multicolumn{3}{c@{}}{\textbf{Combined (C01--C04)}} \\
\cmidrule(lr){2-4} \cmidrule(lr){5-7}
Controller & Mean & Min\textsuperscript{*} & Max\textsuperscript{*} &
Mean & Min\textsuperscript{*} & Max\textsuperscript{*} \\
\midrule
""").lstrip()

lines = []
for t_var in temp_variants:
    c_var = "C" + t_var[1:]           # e.g. T01 → C01 (may not exist)
    # Header for Txx/Cxx block
    lines.append(rf"\multicolumn{{7}}{{@{{}}l}}{{\textbf{{{t_var}/{c_var}}}}}\\")
    # First classic, then RL controllers
    for ctrl in controllers_ordered:
        # Temperature metrics
        m_t  = get_metric(summary, t_var, ctrl, "mean")
        lo_t = get_metric(summary, t_var, ctrl, "ci99_lower")
        hi_t = get_metric(summary, t_var, ctrl, "ci99_upper")
        # Combined metrics
        m_c  = get_metric(summary, c_var, ctrl, "mean")
        lo_c = get_metric(summary, c_var, ctrl, "ci99_lower")
        hi_c = get_metric(summary, c_var, ctrl, "ci99_upper")
        # if no T or C values available → next controller
        if {m_t, lo_t, hi_t, m_c, lo_c, hi_c} == {"--"}:
            continue
        lines.append(
            f"{ctrl:<8} & {m_t} & {lo_t} & {hi_t} & "
            f"{m_c} & {lo_c} & {hi_c} \\\\"
        )
    lines.append(r"\midrule")   # Line between Txx/Cxx blocks

# Remove last \midrule
if lines and lines[-1] == r"\midrule":
    lines.pop()

tabular += "\n".join(lines)

# Footnote & End
tabular += dedent(r"""
\bottomrule
\multicolumn{7}{l}{\footnotesize *\,99\% \gls{CI} bounds.} \\
\end{tabular}""")

# Embed complete table
latex_table = dedent(rf"""
%------ RL ABLATION STUDY TABLE 3 START ------
\begin{{table}}[!t]
\centering
\caption{{Ablation study results showing cumulative reward (mean $\pm$ 99\,\% confidence interval) for each controller (T01--C04) in the \blinded{{\texttt{{LLEC-HeatPumpHouse-1R1C}}}} environment.}}
\label{{tab:final_cumulative_reward}}
{tabular}
\end{{table}}
%------ RL ABLATION STUDY TABLE 3 END ------
""").lstrip()

# ------------------------------------------------------------
# Step 8: Output LaTeX ---------------------------------------
# ------------------------------------------------------------

print(latex_table)

%------ RL ABLATION STUDY TABLE 3 START ------
\begin{table}[!t]
\centering
\caption{Ablation study results showing cumulative reward (mean $\pm$ 99\,\% confidence interval) for each controller (T01--C04) in the \blinded{\texttt{LLEC-HeatPumpHouse-1R1C}} environment.}
\label{tab:final_cumulative_reward}
\begin{tabular}{@{}lcccccc@{}}
\toprule
\multicolumn{1}{@{}l}{\textbf{Variants:}} &
\multicolumn{3}{c}{\textbf{Temperature (T01--T04)}} &
\multicolumn{3}{c@{}}{\textbf{Combined (C01--C04)}} \\
\cmidrule(lr){2-4} \cmidrule(lr){5-7}
Controller & Mean & Min\textsuperscript{*} & Max\textsuperscript{*} &
Mean & Min\textsuperscript{*} & Max\textsuperscript{*} \\
\midrule
\multicolumn{7}{@{}l}{\textbf{T01/C01}}\\
FUZZY    & 225.69 & 217.27 & 234.11 & 205.51 & 194.64 & 216.38 \\
MPC      & 266.90 & 266.01 & 267.78 & 246.68 & 243.58 & 249.79 \\
PI       & 251.98 & 250.46 & 253.51 & 230.86 & 226.17 & 235.55 \\
PID      & 251.98 & 250.45 & 253.51 & 230.86 & 226.17 & 235.55 \\
BEST MPC & 280.51 & 2

In [3]:
print("Latex table generated successfully in 'generate_table03_summary_stats.ipynb' successfully.") 

Latex table generated successfully in 'generate_table03_summary_stats.ipynb' successfully.
