In [1]:
import json
import pandas as pd
from pathlib import Path

# >>> Hier deine einzelnen Pfade eintragen (beliebig viele):
FILE_PATHS = [
    r"C:\\Users\\Marius\\Desktop\\Dev\\CodePilot\\app\\evaluation\\results\\thirdIteration\\notion_output_word_stats_3.json",
    r"C:\\Users\\Marius\\Desktop\\Dev\\CodePilot\\app\\evaluation\\results\\PersonaFeedback\\Persona1\\notion_output_word_stats_Persona1.json",
    r"C:\\Users\\Marius\\Desktop\\Dev\\CodePilot\\app\\evaluation\\results\\PersonaFeedback\\Persona2\\notion_output_word_stats_Persona2.json",
    r"C:\\Users\\Marius\\Desktop\\Dev\\CodePilot\\app\\evaluation\\results\\PersonaFeedback\\Persona3\\notion_output_word_stats_Persona3.json",
]

def load_stats_from_file(file_path: str):
    """Liest eine JSON-Datei im angegebenen Format und liefert zwei Dicts:
       {row: mean} und {row: median}."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    means = {}
    medians = {}
    for entry in data:
        row = entry.get("row")
        if row is None:
            continue
        means[row] = entry.get("mean")
        medians[row] = entry.get("median")
    return means, medians

def build_tables(paths):
    mean_cols = {}
    median_cols = {}

    for idx, p in enumerate(paths, start=1):
        p = Path(p)
        # Spaltenname: Dateiname ohne Endung (oder "DateiX" als Fallback)
        col_name = p.stem if p.stem else f"Datei{idx}"

        means, medians = load_stats_from_file(p)
        mean_cols[col_name] = means
        median_cols[col_name] = medians

    # DataFrames bauen; Zeilen (rows) numerisch sortieren
    mean_df = pd.DataFrame(mean_cols).sort_index()
    median_df = pd.DataFrame(median_cols).sort_index()

    # Optional: row-Bezeichnung schöner (z.B. "row1", "row2", ...)
    # Wenn dir lieber die nackte Zahl gefällt, einfach auskommentieren.
    mean_df.index = [f"row{int(i)}" for i in mean_df.index]
    median_df.index = [f"row{int(i)}" for i in median_df.index]
    mean_df.index.name = "Row"
    median_df.index.name = "Row"

    return mean_df, median_df

if __name__ == "__main__":
    mean_df, median_df = build_tables(FILE_PATHS)

    print("=== MEANS ===")
    print(mean_df.to_string())
    print("\n=== MEDIANS ===")
    print(median_df.to_string())

    # Als CSV speichern
    mean_df.to_csv("means.csv", encoding="utf-8")
    median_df.to_csv("medians.csv", encoding="utf-8")

    # Optional auch als Excel:
    # with pd.ExcelWriter("stat_tables.xlsx") as xls:
    #     mean_df.to_excel(xls, sheet_name="means")
    #     median_df.to_excel(xls, sheet_name="medians")

    print("\nDateien gespeichert: means.csv, medians.csv")


=== MEANS ===
      notion_output_word_stats_3  notion_output_word_stats_Persona1  notion_output_word_stats_Persona2  notion_output_word_stats_Persona3
Row                                                                                                                                      
row1                        60.5                               69.9                               59.5                               60.0
row2                        25.3                               25.4                               29.3                               25.5
row3                        42.4                               47.8                               43.9                               56.9
row4                         5.0                                5.0                                5.0                                5.0

=== MEDIANS ===
      notion_output_word_stats_3  notion_output_word_stats_Persona1  notion_output_word_stats_Persona2  notion_output_word_stats_Persona3
Row