In [None]:
import sys
import pickle
import time
import torch
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Import Project Utils
from project.utils.dataset import derive_features
from project.utils.modeling import predict_model


# ---- Setup Paths ----
def add_src_to_path():
    p = Path.cwd().resolve()
    # Search up the tree for 'src'
    for parent in [p] + list(p.parents):
        if (parent / "src").exists():
            sys.path.insert(0, str(parent / "src"))
            print(f"‚úÖ Added to sys.path: {parent / 'src'}")
            return parent / "data"
    raise RuntimeError("Could not locate 'src' directory.")


DATA_ROOT = add_src_to_path()

# Configuration - Now including all sports
SPORTS = ["biking", "running", "walking"]
SCALES = [0, 2, 5, 10]

print(f"üìÇ Data Root: {DATA_ROOT}")
print(f"üèÉ Sports to process: {', '.join(SPORTS)}")

‚úÖ Added to sys.path: /Users/jonasgundlach/Academic/P3/Data_Preparation/data-preparation-2026-group-project/src
üìÇ Data Root: /Users/jonasgundlach/Academic/P3/Data_Preparation/data-preparation-2026-group-project/data
üèÉ Sports to process: biking, running, walking


In [5]:
# %%
def load_and_recalc(sport, scale, variant):
    """
    Loads a parquet file and forces recalculation of speed/distance.
    Saves the result with a '_recalculated' suffix to skip future processing.
    """
    sport_dir = DATA_ROOT / sport

    # 1. Determine Original File Path
    if variant == "Cleaned":
        filename = f"cleaned_scale_{scale}_{sport}.parquet"
        path = sport_dir / filename
        if not path.exists():
            path = sport_dir / "cleaned" / filename
    else:
        filename = (
            f"{sport}_test_raw.parquet"
            if scale == 0
            else f"erroneous_scale_{scale}_{sport}_data.parquet"
        )
        path = sport_dir / filename

    # 2. Determine Recalculated File Path
    recalc_filename = path.stem + "_recalculated.parquet"
    recalc_path = path.parent / recalc_filename

    # 3. IF RECALCULATED FILE EXISTS: Load it and skip math
    if recalc_path.exists():
        import pyarrow.parquet as pq

        table = pq.read_table(recalc_path)
        df = pd.DataFrame({c: table[c].to_pylist() for c in table.column_names})
        print(f"  üöÄ {sport.capitalize()} [{variant} Scale {scale}]: Loaded from cache")
        return df

    # 4. IF NOT: Load original and do the heavy lifting
    if not path.exists():
        return None

    import pyarrow.parquet as pq

    table = pq.read_table(path)
    df = pd.DataFrame({c: table[c].to_pylist() for c in table.column_names})

    # Swap for physics (if uncleaned and scale > 0)
    if variant == "Uncleaned" and scale > 0:
        if "erroneous_latitude" in df.columns:
            df["latitude"] = df["erroneous_latitude"]
            df["longitude"] = df["erroneous_longitude"]

    # Run derive_features (The slow part)
    df = derive_features(df)

    # SAVE for next time
    df.to_parquet(recalc_path, index=False)

    print(f"  ‚úÖ {sport.capitalize()} [{variant} Scale {scale}]: Recalculated & Saved")
    return df


# Nested Dictionary: datasets[sport][scale][variant]
datasets = {}

print("üîÑ Processing Physics (Loading Cached or Recalculating)...")

for sport in SPORTS:
    datasets[sport] = {}
    for scale in SCALES:
        datasets[sport][scale] = {}
        for variant in ["Uncleaned", "Cleaned"]:
            df = load_and_recalc(sport, scale, variant)
            if df is not None:
                datasets[sport][scale][variant] = df

print(
    "\n‚ú® Success: All sport datasets are ready (using _recalculated files where available)."
)

üîÑ Processing Physics (Loading Cached or Recalculating)...
  üöÄ Biking [Uncleaned Scale 0]: Loaded from cache
  üöÄ Biking [Cleaned Scale 0]: Loaded from cache
  üöÄ Biking [Uncleaned Scale 2]: Loaded from cache
  üöÄ Biking [Cleaned Scale 2]: Loaded from cache
  üöÄ Biking [Uncleaned Scale 5]: Loaded from cache
  üöÄ Biking [Cleaned Scale 5]: Loaded from cache
  üöÄ Biking [Uncleaned Scale 10]: Loaded from cache
  üöÄ Biking [Cleaned Scale 10]: Loaded from cache
  üöÄ Running [Uncleaned Scale 0]: Loaded from cache
  üöÄ Running [Cleaned Scale 0]: Loaded from cache
  üöÄ Running [Uncleaned Scale 2]: Loaded from cache
  üöÄ Running [Cleaned Scale 2]: Loaded from cache
  üöÄ Running [Uncleaned Scale 5]: Loaded from cache
  üöÄ Running [Cleaned Scale 5]: Loaded from cache
  üöÄ Running [Uncleaned Scale 10]: Loaded from cache
  üöÄ Running [Cleaned Scale 10]: Loaded from cache
  üöÄ Walking [Uncleaned Scale 0]: Loaded from cache
  üöÄ Walking [Cleaned Scale 0]: Loaded 

In [None]:
# %%
DATASET_ARGS = {
    "numerical_columns": [
        "time_elapsed_standardized",
        "altitude_standardized",
        "derived_speed_standardized",
        "derived_distance_standardized",
    ],
    "categorical_columns": ["userId_idx", "sport_idx", "gender_idx"],
    "heartrate_input_column": "heart_rate_standardized",
    "heartrate_output_column": "heart_rate",
    "workout_id_column": "id",
    "use_heartrate_input": True,
}

def apply_scaling_optimized(df, scaler, id_col="userId"):
    df_out = df.copy()
    raw_cols = ["time_elapsed", "altitude", "derived_speed", "derived_distance", "heart_rate"]
    us = scaler.user_stats
    default_stats = (0.0, 1.0)
    for col in raw_cols:
        col_stats = us.get(col, {})
        def get_stat_tuple(uid):
            val = col_stats.get(uid, us.get(uid, {}).get(col, default_stats))
            return (val.get("mean", 0.0), val.get("std", 1.0)) if isinstance(val, dict) else val
        stat_tuples = df_out[id_col].map(get_stat_tuple)
        normalized_data = [
            ((np.asarray(seq, dtype=float) - mu) / (sig if sig > 1e-12 else 1.0)).tolist()
            for seq, (mu, sig) in zip(df_out[col], stat_tuples)
        ]
        df_out[f"{col}_standardized"] = normalized_data
    return df_out

results_log = []
print("\nüöÄ Starting Memory-Safe Inference on M4 Pro...")
start_time_global = time.time()

for sport in SPORTS:
    t0 = time.time()
    sport_dir = DATA_ROOT / sport
    model_path = sport_dir / f"{sport}_fitrec_model.pt"
    
    try:
        from project.utils.modeling.model import FitRecModel
        torch.serialization.add_safe_globals([FitRecModel])
        model = torch.load(model_path, map_location="cpu", weights_only=True)
        model.eval()
        
        with open(sport_dir / f"{sport}_user_standard_scaler.pkl", "rb") as f:
            scaler = pickle.load(f)
        with open(sport_dir / f"{sport}_static_ordinal_encoder.pkl", "rb") as f:
            encoder = pickle.load(f)
    except Exception as e:
        print(f"  ‚ùå Error loading {sport}: {e}")
        continue

    for scale in SCALES:
        for variant in ["Uncleaned", "Cleaned"]:
            if sport not in datasets or scale not in datasets[sport] or variant not in datasets[sport][scale]:
                continue
            
            df_curr = datasets[sport][scale][variant].copy()
            df_curr = apply_scaling_optimized(df_curr, scaler)
            df_curr = encoder.transform(df_curr)
            
            preds = predict_model(model, df_curr, dataset_args=DATASET_ARGS, n_workers=6)
            df_curr["predicted_heart_rate"] = list(preds)
            
            y_true = np.concatenate([np.array(x, dtype=float) for x in df_curr["heart_rate"]])
            y_pred = np.concatenate([np.array(x, dtype=float) for x in df_curr["predicted_heart_rate"]])
            
            mae = np.mean(np.abs(y_true - y_pred))
            mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-9)))
            accuracy_pct = max(0, (1 - mape) * 100)
            
            results_log.append({
                "Sport": sport, "Scale": scale, "Variant": variant,
                "MAE": mae, "Accuracy": accuracy_pct, "R2": r2_score(y_true, y_pred),
                "DataFrame": df_curr[["id", "heart_rate", "predicted_heart_rate"]].copy()
            })

    print(f"  ‚è±Ô∏è {sport.upper()} total time: {time.time() - t0:.1f}s")

print(f"\n‚ú® Global Inference Finished in {time.time() - start_time_global:.1f}s")


üöÄ Starting Memory-Safe Inference on M4 Pro...

üß† BIKING: Loading artifacts...
  üì¶ Processing Biking Uncleaned Scale 0 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:46<00:00,  6.45it/s]


    ‚úÖ MAE: 1.98 | Acc: 98.4% | R¬≤: 0.979
  üì¶ Processing Biking Cleaned Scale 0 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:44<00:00,  6.74it/s]


    ‚úÖ MAE: 1.40 | Acc: 98.9% | R¬≤: 0.988
  üì¶ Processing Biking Uncleaned Scale 2 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:44<00:00,  6.81it/s]


    ‚úÖ MAE: 4.55 | Acc: 96.3% | R¬≤: 0.847
  üì¶ Processing Biking Cleaned Scale 2 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:43<00:00,  6.83it/s]


    ‚úÖ MAE: 3.98 | Acc: 96.8% | R¬≤: 0.857
  üì¶ Processing Biking Uncleaned Scale 5 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:45<00:00,  6.59it/s]


    ‚úÖ MAE: 4.99 | Acc: 95.9% | R¬≤: 0.807
  üì¶ Processing Biking Cleaned Scale 5 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:43<00:00,  6.84it/s]


    ‚úÖ MAE: 4.42 | Acc: 96.4% | R¬≤: 0.816
  üì¶ Processing Biking Uncleaned Scale 10 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:44<00:00,  6.81it/s]


    ‚úÖ MAE: 5.19 | Acc: 95.8% | R¬≤: 0.788
  üì¶ Processing Biking Cleaned Scale 10 (10294 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:43<00:00,  6.83it/s]


    ‚úÖ MAE: 4.61 | Acc: 96.3% | R¬≤: 0.797
  ‚è±Ô∏è BIKING total time: 392.2s

üß† RUNNING: Loading artifacts...
  üì¶ Processing Running Uncleaned Scale 0 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:41<00:00,  7.19it/s]


    ‚úÖ MAE: 1.71 | Acc: 98.7% | R¬≤: 0.972
  üì¶ Processing Running Cleaned Scale 0 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:42<00:00,  7.12it/s]


    ‚úÖ MAE: 0.96 | Acc: 99.3% | R¬≤: 0.985
  üì¶ Processing Running Uncleaned Scale 2 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:42<00:00,  7.10it/s]


    ‚úÖ MAE: 11.39 | Acc: 92.2% | R¬≤: -0.861
  üì¶ Processing Running Cleaned Scale 2 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:42<00:00,  7.10it/s]


    ‚úÖ MAE: 10.79 | Acc: 92.7% | R¬≤: -0.924
  üì¶ Processing Running Uncleaned Scale 5 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:43<00:00,  6.90it/s]


    ‚úÖ MAE: 12.39 | Acc: 91.6% | R¬≤: -1.142
  üì¶ Processing Running Cleaned Scale 5 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:43<00:00,  6.91it/s]


    ‚úÖ MAE: 11.80 | Acc: 92.0% | R¬≤: -1.218
  üì¶ Processing Running Uncleaned Scale 10 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:44<00:00,  6.81it/s]


    ‚úÖ MAE: 12.66 | Acc: 91.4% | R¬≤: -1.223
  üì¶ Processing Running Cleaned Scale 10 (8433 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:45<00:00,  6.57it/s]


    ‚úÖ MAE: 12.08 | Acc: 91.8% | R¬≤: -1.304
  ‚è±Ô∏è RUNNING total time: 378.8s

üß† WALKING: Loading artifacts...
  üì¶ Processing Walking Uncleaned Scale 0 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.64it/s] 


    ‚úÖ MAE: 3.21 | Acc: 97.3% | R¬≤: 0.961
  üì¶ Processing Walking Cleaned Scale 0 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.73it/s] 


    ‚úÖ MAE: 2.18 | Acc: 98.2% | R¬≤: 0.972
  üì¶ Processing Walking Uncleaned Scale 2 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.73it/s] 


    ‚úÖ MAE: 25.53 | Acc: 79.3% | R¬≤: -1.965
  üì¶ Processing Walking Cleaned Scale 2 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.74it/s] 


    ‚úÖ MAE: 24.75 | Acc: 80.0% | R¬≤: -1.997
  üì¶ Processing Walking Uncleaned Scale 5 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.71it/s] 


    ‚úÖ MAE: 25.93 | Acc: 79.1% | R¬≤: -2.055
  üì¶ Processing Walking Cleaned Scale 5 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.75it/s] 


    ‚úÖ MAE: 25.16 | Acc: 79.7% | R¬≤: -2.090
  üì¶ Processing Walking Uncleaned Scale 10 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.74it/s] 


    ‚úÖ MAE: 25.96 | Acc: 79.1% | R¬≤: -2.060
  üì¶ Processing Walking Cleaned Scale 10 (299 rows)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 300/300 [00:34<00:00,  8.73it/s] 

    ‚úÖ MAE: 25.19 | Acc: 79.7% | R¬≤: -2.096
  ‚è±Ô∏è WALKING total time: 275.9s

‚ú® Global Inference Finished in 1046.9s





In [7]:
import pickle

# Save to the root of your data folder
save_path = DATA_ROOT / "inference_results_cache.pkl"

with open(save_path, "wb") as f:
    pickle.dump(results_log, f)

print(f"‚úÖ Results log saved to {save_path}")

‚úÖ Results log saved to /Users/jonasgundlach/Academic/P3/Data_Preparation/data-preparation-2026-group-project/data/inference_results_cache.pkl


In [None]:
# %%
df_res = pd.DataFrame(results_log)
fig_mae = make_subplots(
    rows=3, cols=1,
    subplot_titles=[f"<b>{s.upper()}</b>" for s in SPORTS],
    vertical_spacing=0.1,
    shared_xaxes=True
)

def add_traces_and_formulas(df_variant, name, color, row, show_legend):
    if df_variant.empty:
        return
    
    df_v = df_variant.sort_values("Scale")
    x, y = df_v["Scale"].values, df_v["MAE"].values
    
    fig_mae.add_trace(go.Scatter(
        x=x, y=y, mode='markers', name=name,
        marker=dict(color=color, size=8, line=dict(width=1, color='white')),
        showlegend=show_legend, legendgroup=name
    ), row=row, col=1)
    
    if len(x) >= 2:
        x_log = np.log1p(x).reshape(-1, 1)
        model = LinearRegression().fit(x_log, y)
        a, b = model.coef_[0], model.intercept_
        
        x_range = np.linspace(0, 10, 100)
        y_range = model.predict(np.log1p(x_range).reshape(-1, 1))
        fig_mae.add_trace(go.Scatter(
            x=x_range, y=y_range, mode='lines', name=f"{name} Fit",
            line=dict(color=color, width=2), opacity=0.8,
            showlegend=False, legendgroup=name, hoverinfo="skip"
        ), row=row, col=1)
        
        y_offset = 0.95 if name == "Noisy" else 0.82
        x_ref = "x domain" if row == 1 else f"x{row} domain"
        y_ref = "y domain" if row == 1 else f"y{row} domain"
        
        fig_mae.add_annotation(
            x=0.02, y=y_offset, xref=x_ref, yref=y_ref,
            text=f"<b>{name}:</b> y={a:.2f}ln(x+1)+{b:.1f}",
            showarrow=False, font=dict(size=11, color=color),
            bgcolor="rgba(255,255,255,0.7)"
        )

for i, sport in enumerate(SPORTS):
    row = i + 1
    sport_data = df_res[df_res["Sport"] == sport]
    try:
        baseline_val = sport_data[(sport_data["Scale"] == 0) & (sport_data["Variant"] == "Uncleaned")]["MAE"].values[0]
        fig_mae.add_hline(y=baseline_val, line_dash="dash", line_color="royalblue", row=row, col=1)
    except (IndexError, KeyError):
        pass
    
    add_traces_and_formulas(sport_data[sport_data["Variant"] == "Uncleaned"], "Noisy", "#d62728", row, i == 0)
    add_traces_and_formulas(sport_data[sport_data["Variant"] == "Cleaned"], "Cleaned", "#2ca02c", row, i == 0)

fig_mae.show()

In [12]:
# %%
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression

df_res = pd.DataFrame(results_log)

# 1. Create Subplots
fig_acc = make_subplots(
    rows=3,
    cols=1,
    subplot_titles=[f"<b>{s.upper()}</b>" for s in SPORTS],
    vertical_spacing=0.1,
    shared_xaxes=True,
)


def add_acc_traces(df_variant, name, color, row, show_legend):
    if df_variant.empty:
        return

    df_v = df_variant.sort_values("Scale")
    x = df_v["Scale"].values
    y = df_v["Accuracy"].values

    # Scatter Points
    fig_acc.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode="markers",
            name=name,
            marker=dict(color=color, size=8, line=dict(width=1, color="white")),
            showlegend=show_legend,
            legendgroup=name,
        ),
        row=row,
        col=1,
    )

    # Fit Line (Logarithmic)
    if len(x) >= 2:
        x_log = np.log1p(x).reshape(-1, 1)
        model = LinearRegression().fit(x_log, y)
        a, b = model.coef_[0], model.intercept_
        r2 = model.score(x_log, y)

        x_range = np.linspace(0, 10, 100)
        y_range = model.predict(np.log1p(x_range).reshape(-1, 1))

        # Clip the visual line at 100% (Physics cap)
        y_range = np.minimum(y_range, 100.0)

        fig_acc.add_trace(
            go.Scatter(
                x=x_range,
                y=y_range,
                mode="lines",
                name=f"{name} Fit",
                line=dict(color=color, width=2),
                opacity=0.8,
                showlegend=False,
                legendgroup=name,
                hoverinfo="skip",
            ),
            row=row,
            col=1,
        )

        # Formula Annotation
        # Cleaned (Green) is usually higher (better), Noisy (Red) is lower
        y_offset_domain = 1.1 if name == "Cleaned" else 1

        # Handle Plotly axis naming
        x_ref = "x domain" if row == 1 else f"x{row} domain"
        y_ref = "y domain" if row == 1 else f"y{row} domain"

        formula = f"<b>{name}:</b> y={a:.2f}ln(x+1)+{b:.1f} (R¬≤={r2:.2f})"

        fig_acc.add_annotation(
            x=0.02,
            y=y_offset_domain,
            xref=x_ref,
            yref=y_ref,
            text=formula,
            showarrow=False,
            font=dict(size=11, color=color, family="Arial"),
            align="left",
            bgcolor="rgba(255,255,255,0.7)",
        )


# 2. Populate Plot
for i, sport in enumerate(SPORTS):
    row = i + 1
    sport_data = df_res[df_res["Sport"] == sport]
    show_legend = i == 0

    # Baseline Line
    try:
        baseline_val = sport_data[
            (sport_data["Scale"] == 0) & (sport_data["Variant"] == "Uncleaned")
        ]["Accuracy"].values[0]
        fig_acc.add_hline(
            y=baseline_val,
            line_dash="dash",
            line_color="royalblue",
            annotation_text="Base",
            annotation_position="bottom right",
            row=row,
            col=1,
        )
    except:
        pass

    # Add Cleaned first (so it renders behind Noisy if they overlap, or swap as preferred)
    add_acc_traces(
        sport_data[sport_data["Variant"] == "Cleaned"],
        "Cleaned",
        "#2ca02c",
        row,
        show_legend,
    )
    add_acc_traces(
        sport_data[sport_data["Variant"] == "Uncleaned"],
        "Noisy",
        "#d62728",
        row,
        show_legend,
    )

# 3. Formatting
fig_acc.update_layout(
    title=dict(text="<b>Accuracy Retention Analysis:</b> Resilience to Noise", x=0.5),
    template="plotly_white",
    height=1000,
    width=800,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    margin=dict(t=100, b=60, l=60, r=40),
)

# Apply Zoomed Axis for Accuracy (95-100 is standard for this metric)
fig_acc.update_yaxes(range=[95, 100.5], title_text="Accuracy (%)", gridcolor="#f0f0f0")
fig_acc.update_xaxes(
    title_text="Noise Scale (Std Dev)", row=3, col=1, gridcolor="#f0f0f0"
)
fig_acc.update_annotations(patch=dict(yshift=10))

fig_acc.show()