# Inputs & outputs
- **Inputs:** Wildchat evaluation pickles in `final_data/wild_data` and new-model results in `final_data/newmodels_data`.
- **Outputs:** summary stats and optional plots used for Supplementary Figures S4–S5.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import pickle
from pathlib import Path

In [1]:


custom_style = {
    # Font sizes
    "axes.labelsize": 25,
    "axes.titlesize": 20,
    "xtick.labelsize": 20,
    "ytick.labelsize": 20,

    # Line and marker styles
    "lines.linewidth": 3,
    "lines.markersize": 8,
    "lines.color": "black",
    "errorbar.capsize": 5,

    # Axes & spines
    "axes.edgecolor": "black",
    "axes.linewidth": 2,

    # Tick styling
    "xtick.color": "black",
    "ytick.color": "black",
    "xtick.major.width": 1.2,
    "ytick.major.width": 1.2,

    # Grid
    "axes.grid": True,
    "grid.color": "gray",
    "grid.linewidth": 0.7,
    "grid.linestyle": "--",
    "grid.alpha": 0.6,

    # Figure settings
    "figure.figsize": (12, 8),
    "figure.dpi": 300,
    "figure.facecolor": "white"
}


plt.rcParams.update(custom_style)

path_to_data="./final_data"

# 1) Wildchat

In [None]:

path_to_data=os.path.join(path_to_data,"wild_data")


files = os.listdir(path_to_data)
print(files)

In [None]:


# =============================================================================
# Path configuration
# =============================================================================
path = Path(path_to_data)


# =============================================================================
# Iterate over result files
# =============================================================================
for file in path.glob("*.pkl"):

    m = re.match(
        r"(gpt-[\d\.]+(?:-[\w]+)?)_(assist|synth)\.pkl",
        file.name
    )
    if not m:
        continue

    model, kind = m.groups()


    # -------------------------------------------------------------------------
    # Load data
    # -------------------------------------------------------------------------
    with open(file, "rb") as f:
        data = pickle.load(f)

    s = pd.Series(data)
    print(f"{model} {kind} mean: {s.mean()}" )


    # -------------------------------------------------------------------------
    # Optional visualization (disabled)
    # -------------------------------------------------------------------------
    # s.hist(bins=30)
    # plt.title(f"{model} — {kind}")
    # plt.xlabel("Value")
    # plt.ylabel("Frequency")
    # plt.show()

# 2 ) new models

In [None]:

path_to_data="./final_data"


files = os.listdir(os.path.join(path_to_data,"newmodels_data"))
print(files)

In [None]:


# =============================================================================
# Path configuration
# =============================================================================
path = Path(os.path.join(path_to_data, "newmodels_data"))


# =============================================================================
# Iterate over result files
# =============================================================================
for file in path.glob("*.pkl"):

    # -------------------------------------------------------------------------
    # Extract model name (everything before first underscore)
    # -------------------------------------------------------------------------
    m = re.match(r"([^_]+)_.+\.pkl", file.name)
    if not m:
        continue

    model = m.group(1)
    print(model)


    # -------------------------------------------------------------------------
    # Load data
    # -------------------------------------------------------------------------
    with open(file, "rb") as f:
        data = pickle.load(f)

    s = pd.Series(data)
    print(f"{model} mean:", s.mean(), len(data))
    print(file)


    # -------------------------------------------------------------------------
    # Optional visualization (disabled)
    # -------------------------------------------------------------------------
    # s.hist(bins=30)
    # plt.title(model)
    # plt.xlabel("Value")
    # plt.ylabel("Frequency")
    # plt.show()