In [1]:
import numpy as np
import time
import copy
from environment import IntegratorSwitchingEnv
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from reward_model import LagrangeReward1
import cantera as ct
from tqdm import tqdm


In [12]:
def simulate_one_superstep_from_state(env, action, start_state, dt, super_steps, next_ref_state):
    """
    Build a fresh solver and integrate EXACTLY one super-step horizon starting from 'start_state'.
    Return (cpu_time, step_error) comparing the end state to 'next_ref_state'.
    This does NOT touch the live env state.
    """
    cfg = env.solver_configs[action]
    # fresh solver & a temporary gas bound to it
    solver, gas_tmp = env._build_fresh_solver(action, start_state.copy())

    # integrate 'super_steps' micro-steps of size dt
    t0 = time.time()
    try:
        current_state = start_state.copy()
        for _ in range(super_steps):
            if cfg['type'] == 'cvode':
                solver.set_state(current_state, 0.0)
                current_state = solver.solve_to(dt)
            elif cfg['type'] == 'qss':
                solver.setState(current_state.tolist(), 0.0)
                rc = solver.integrateToTime(dt)
                if rc != 0:
                    raise RuntimeError(f"QSS integrateToTime failed rc={rc}")
                current_state = np.array(solver.y)
            else:
                raise ValueError(f"Unknown solver type {cfg['type']}")
            # keep gas_tmp coherent (not strictly required for error)
            gas_tmp.TPY = current_state[0], env.current_pressure, current_state[1:]
        cpu_time = time.time() - t0

        # compute step error vs next reference state
        step_err = env._calculate_error(current_state, next_ref_state)
        return cpu_time, step_err

    except Exception:
        return np.inf, np.inf


def reference_anchored_oracle(env, k_step):
    """
    k_step indexes the decision point in the episode (0-based).
    We:
      - take start_ref = ref_states[k_index]
      - take next_ref  = ref_states[k_index + 1]
      where k_index = k_step * env.super_steps
    Evaluate both actions from start_ref and pick the fastest that satisfies epsilon.
    Fallback to BDF (assume action 0) otherwise.

    Returns: (best_action, (cpu_qss, err_qss), (cpu_bdf, err_bdf), obs_from_start_ref)
    """
    # map indices in the reference arrays
    start_idx = k_step * env.super_steps
    next_idx  = (k_step + 1) * env.super_steps
    if next_idx >= len(env.ref_states):
        # out of reference horizon, default to BDF
        start_idx = max(0, len(env.ref_states) - env.super_steps - 1)
        next_idx  = min(len(env.ref_states) - 1, start_idx + env.super_steps)

    start_ref = env.ref_states[start_idx]
    next_ref  = env.ref_states[next_idx]

    # Evaluate both actions from the same (reference) start state
    # Ensure action indices match your solver_configs: assume 0=BDF, 1=QSS
    cpu_bdf, err_bdf = simulate_one_superstep_from_state(env, 0, start_ref, env.dt, env.super_steps, next_ref)
    cpu_qss, err_qss = simulate_one_superstep_from_state(env, 1, start_ref, env.dt, env.super_steps, next_ref)

    eps = env.reward_function.epsilon
    # choose fastest feasible
    feasible = []
    if err_bdf <= eps: feasible.append((0, cpu_bdf))
    if err_qss <= eps: feasible.append((1, cpu_qss))

    if feasible:
        feasible.sort(key=lambda x: x[1])
        best_action = feasible[0][0]
    else:
        best_action = 0  # fallback to BDF when neither meets epsilon

    # Build features from the SAME reference state (prevents drift leakage)
    obs_ref = env._obs_from_arbitrary_state(start_ref, set_last_obs=False)
    return best_action, (cpu_qss, err_qss), (cpu_bdf, err_bdf), obs_ref


def build_ref_anchored_dataset(env_maker,
                               n_episodes=150,
                               max_steps_per_ep=200,
                               save_path="oracle_dataset_ref.npz",
                               verbose_every=10):
    """
    For each episode:
      - env.reset() to generate a fresh reference trajectory
      - at decision k, compute label by comparing solvers FROM ref[k] TO ref[k+1]
      - store obs computed from ref[k]
      - Step the LIVE env using the oracle action (so we also log realistic rollouts if desired),
        but labels/inputs are always ref-anchored and thus drift-free.
    """
    X, y = [], []

    for ep in range(n_episodes):
        env = env_maker()
        # pick conditions that likely include ignition (curriculum helps)
        obs, info = env.reset(
            temperature=np.random.uniform(*env.temp_range),
            pressure=np.random.choice(np.arange(1, 6)) * ct.one_atm,
            phi=np.random.uniform(*env.phi_range),
            total_time=np.random.uniform(*env.time_range),
            dt=np.random.uniform(*env.dt_range),
            etol=env.etol
        )

        T_ref_init = env.ref_states[0][0]
        T_ref_final = env.ref_states[-1][0]
        pressure = env.current_pressure
        phi = env.current_phi
        total_time = env.total_time
        dt = env.dt
        print(f"[episode {ep}] - T_ref_init: {T_ref_init}, T_ref_final: {T_ref_final}, pressure: {pressure}, phi: {phi}, total_time: {total_time}, dt: {dt}")
        # number of decision points = env.n_episodes
        K = env.n_episodes
        pbar = tqdm(range(min(K, max_steps_per_ep)), desc="Building reference-anchored dataset")
        for k in pbar:
            a_star, (cpu_qss, err_qss), (cpu_bdf, err_bdf), obs_ref = reference_anchored_oracle(env, k)
            X.append(obs_ref.astype(np.float32))
            y.append(a_star)

            # Advance the REAL env with the oracle action (optional but keeps trajectories sensible)
            obs, reward, terminated, truncated, info = env.step(a_star)
            if terminated or truncated:
                break
            pbar.set_postfix({
                "episode": ep,
                "step": k,
                "err_qss": err_qss,
                "err_bdf": err_bdf,
                "cpu_qss": cpu_qss,
                "cpu_bdf": cpu_bdf,
                "Ti": T_ref_init,
                "Tf": T_ref_final,
            })
            pbar.update(1)
            

            
        if (ep + 1) % verbose_every == 0:
            print(f"[ref-dataset] ep {ep+1}/{n_episodes}, total samples={len(X)}")

    X = np.stack(X, axis=0).astype(np.float32)
    y = np.array(y, dtype=np.int64)
    np.savez_compressed(save_path, X=X, y=y)
    print(f"[ref-dataset] saved {save_path} with X={X.shape}, y={y.shape}")


In [13]:



# ---- Configure solver order: 0=BDF, 1=QSS (must match oracle_refanchored.py assumptions) ----
SOLVER_CONFIGS = [
    # 0 = BDF (CVODE)
    {'type': 'cvode', 'rtol': 1e-6, 'atol': 1e-12, 'mxsteps': 100000, 'name': 'CVODE_BDF'},
    # 1 = QSS
    {'type': 'qss', 'dtmin': 1e-16, 'dtmax': 1e-6, 'stabilityCheck': False, 'itermax': 2,
     'epsmin': 0.002, 'epsmax': 100.0, 'abstol': 1e-8, 'mxsteps': 1000, 'name': 'QSS'},
]


def env_maker():
    """
    Returns a fresh IntegratorSwitchingEnv instance each call.
    The builder will call env.reset(...) with random ICs drawn from these ranges.
    Bias ranges so many episodes include ignition within the horizon.
    """
    reward_cfg = dict(
        epsilon=1e-4,
        lambda_init=1.0,
        lambda_lr=0.05,
        target_violation=0.0,
        cpu_log_delta=1e-3,
        reward_clip=5.0,
    )

    env = IntegratorSwitchingEnv(
        mechanism_file="large_mechanism/n-dodecane.yaml",  # or "gri30.yaml"
        fuel="nc12h26",                                     # or "CH4:1.0" for GRI-30
        oxidizer="O2:0.21, N2:0.79",                        # or "N2:3.76, O2:1.0"
        # Ranges the builder will sample from at reset():
        temp_range=(600.0, 1400.0),     # wide enough to see pre- & post-ignition
        phi_range=(0.7, 1.6),
        pressure_range=(1, 6),          # (ignored internally; builder passes explicit P in atm)
        time_range=(1e-3, 1e-2),        # ensure horizon can include ignition
        dt_range=(1e-6, 1e-6),          # fixed dt for comparability
        etol=1e-4,
        verbose=False,
        termination_count_threshold=100,
        reward_function=LagrangeReward1(**reward_cfg),
    )

    # Ensure solver indices: 0=BDF, 1=QSS (oracle assumes this order)
    env.solver_configs = SOLVER_CONFIGS
    return env





In [14]:
build_ref_anchored_dataset(
    env_maker=env_maker,
    n_episodes=200,            # total episodes to sample for the dataset
    max_steps_per_ep=200,      # cap decisions per episode
    save_path="oracle_dataset_ref.npz",
    verbose_every=10
)

print("Saved dataset to oracle_dataset_ref.npz")


[episode 0] - T_ref_init: 718.0930049591914, T_ref_final: 957.6584620278323, pressure: 405300.0, phi: 0.9080777256685133, total_time: 0.007782272165221903, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:56<00:00,  1.78it/s, episode=0, step=99, err_qss=5.78e-6, err_bdf=1.64e-6, cpu_qss=0.00779, cpu_bdf=0.183, Ti=718, Tf=958] 


[episode 1] - T_ref_init: 1065.1312190553303, T_ref_final: 1046.6313151902064, pressure: 303975.0, phi: 1.5548787318305848, total_time: 0.002183875543322598, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:33<00:00,  3.00it/s, episode=1, step=101, err_qss=8.65e-6, err_bdf=6.55e-6, cpu_qss=0.00217, cpu_bdf=0.0511, Ti=1.07e+3, Tf=1.05e+3]


[episode 2] - T_ref_init: 776.1164037230996, T_ref_final: 956.5259271747864, pressure: 506625.0, phi: 1.0231915022017108, total_time: 0.003583343999156343, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:39<00:00,  2.53it/s, episode=2, step=100, err_qss=3.07e-6, err_bdf=9.74e-7, cpu_qss=0.00363, cpu_bdf=0.084, Ti=776, Tf=957]


[episode 3] - T_ref_init: 1224.582948110214, T_ref_final: 2757.1611525962685, pressure: 303975.0, phi: 1.3108792148572777, total_time: 0.004820865266028653, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:48<00:00,  2.06it/s, episode=3, step=98, err_qss=0.00212, err_bdf=7.1e-15, cpu_qss=0.00755, cpu_bdf=0.104, Ti=1.22e+3, Tf=2.76e+3]  


[episode 4] - T_ref_init: 734.3197419339242, T_ref_final: 734.3298369508469, pressure: 506625.0, phi: 1.5549577215006611, total_time: 0.0011553239804231884, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 104/105 [00:29<00:00,  3.49it/s, episode=4, step=103, err_qss=2.13e-5, err_bdf=1.73e-6, cpu_qss=0.00107, cpu_bdf=0.0302, Ti=734, Tf=734]


[episode 5] - T_ref_init: 803.6664204585502, T_ref_final: 861.2532798395832, pressure: 202650.0, phi: 1.2546263489292655, total_time: 0.006550425691099945, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:50<00:00,  1.94it/s, episode=5, step=98, err_qss=8.62e-8, err_bdf=1.06e-8, cpu_qss=0.00609, cpu_bdf=0.157, Ti=804, Tf=861] 


[episode 6] - T_ref_init: 842.9792703045197, T_ref_final: 842.9790886294238, pressure: 202650.0, phi: 0.7996082784476755, total_time: 0.004853769177509051, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:39<00:00,  2.53it/s, episode=6, step=99, err_qss=1.57e-5, err_bdf=4.97e-7, cpu_qss=0.00487, cpu_bdf=0.111, Ti=843, Tf=843]


[episode 7] - T_ref_init: 1183.1389401003057, T_ref_final: 2549.7260964985585, pressure: 405300.0, phi: 0.7132575721773508, total_time: 0.003986501730026885, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:39<00:00,  2.57it/s, episode=7, step=100, err_qss=2.36e-10, err_bdf=1.31e-15, cpu_qss=0.00396, cpu_bdf=0.0828, Ti=1.18e+3, Tf=2.55e+3]


[episode 8] - T_ref_init: 1311.4064790824157, T_ref_final: 2739.4498132033905, pressure: 101325.0, phi: 1.2204681081063924, total_time: 0.005362092581877933, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:41<00:00,  2.38it/s, episode=8, step=99, err_qss=3.4e-7, err_bdf=3.5e-12, cpu_qss=0.0053, cpu_bdf=0.11, Ti=1.31e+3, Tf=2.74e+3]    


[episode 9] - T_ref_init: 689.2542459145589, T_ref_final: 878.9956299326586, pressure: 101325.0, phi: 1.3739728919938725, total_time: 0.008992105416984184, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:54<00:00,  1.82it/s, episode=9, step=99, err_qss=3.3e-6, err_bdf=9.2e-7, cpu_qss=0.00795, cpu_bdf=0.238, Ti=689, Tf=879]   


[ref-dataset] ep 10/200, total samples=1016
[episode 10] - T_ref_init: 698.3336760208348, T_ref_final: 869.1352168368129, pressure: 101325.0, phi: 1.1304864536487091, total_time: 0.008572341447109586, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:53<00:00,  1.85it/s, episode=10, step=98, err_qss=6.11e-7, err_bdf=2.61e-7, cpu_qss=0.00885, cpu_bdf=0.205, Ti=698, Tf=869] 


[episode 11] - T_ref_init: 1232.5571159081917, T_ref_final: 2634.522578666585, pressure: 202650.0, phi: 1.5803186198449979, total_time: 0.008845946000859788, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:04<00:00,  1.53it/s, episode=11, step=98, err_qss=0.00671, err_bdf=3.64e-12, cpu_qss=0.0136, cpu_bdf=0.183, Ti=1.23e+3, Tf=2.63e+3] 


[episode 12] - T_ref_init: 637.856508276836, T_ref_final: 637.8565085728482, pressure: 506625.0, phi: 1.090496525977578, total_time: 0.004477956085286065, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:36<00:00,  2.75it/s, episode=12, step=99, err_qss=8.54e-7, err_bdf=1.07e-6, cpu_qss=0.00443, cpu_bdf=0.0973, Ti=638, Tf=638] 


[episode 13] - T_ref_init: 981.1343956093522, T_ref_final: 979.5885844961609, pressure: 202650.0, phi: 1.3650612856769726, total_time: 0.004660123823601324, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:39<00:00,  2.53it/s, episode=13, step=99, err_qss=4.91e-6, err_bdf=4.6e-6, cpu_qss=0.00476, cpu_bdf=0.109, Ti=981, Tf=980] 


[episode 14] - T_ref_init: 1366.732524383758, T_ref_final: 2801.5982477108137, pressure: 202650.0, phi: 1.106226054803328, total_time: 0.004193962245713644, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:43<00:00,  2.31it/s, episode=14, step=100, err_qss=7.15e-5, err_bdf=3.04e-12, cpu_qss=0.00562, cpu_bdf=0.086, Ti=1.37e+3, Tf=2.8e+3] 


[episode 15] - T_ref_init: 1156.9268874314646, T_ref_final: 2546.8957930561155, pressure: 202650.0, phi: 0.7517365673495392, total_time: 0.009556947906727167, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:54<00:00,  1.81it/s, episode=15, step=98, err_qss=5.46e-10, err_bdf=4.49e-14, cpu_qss=0.00946, cpu_bdf=0.201, Ti=1.16e+3, Tf=2.55e+3] 


[episode 16] - T_ref_init: 796.6206326547205, T_ref_final: 856.9030459706038, pressure: 202650.0, phi: 0.731387821893978, total_time: 0.005338157253034465, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:47<00:00,  2.09it/s, episode=16, step=98, err_qss=1.55e-7, err_bdf=3.65e-8, cpu_qss=0.00494, cpu_bdf=0.129, Ti=797, Tf=857] 


[episode 17] - T_ref_init: 776.1421556569662, T_ref_final: 899.9345189598951, pressure: 303975.0, phi: 0.7841743733675022, total_time: 0.00823069058270354, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:53<00:00,  1.86it/s, episode=17, step=98, err_qss=1.38e-7, err_bdf=5.84e-8, cpu_qss=0.00829, cpu_bdf=0.195, Ti=776, Tf=900] 


[episode 18] - T_ref_init: 1231.3320199985558, T_ref_final: 2698.6983645294054, pressure: 405300.0, phi: 1.494534049706885, total_time: 0.008101092648294275, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:02<00:00,  1.59it/s, episode=18, step=98, err_qss=0.0046, err_bdf=1.05e-14, cpu_qss=0.0192, cpu_bdf=0.175, Ti=1.23e+3, Tf=2.7e+3]  


[episode 19] - T_ref_init: 1321.366826899246, T_ref_final: 2776.319061752692, pressure: 202650.0, phi: 1.0633532729975654, total_time: 0.005485423588119422, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:48<00:00,  2.04it/s, episode=19, step=99, err_qss=0.000452, err_bdf=1.77e-14, cpu_qss=0.00714, cpu_bdf=0.115, Ti=1.32e+3, Tf=2.78e+3]


[ref-dataset] ep 20/200, total samples=2021
[episode 20] - T_ref_init: 920.5281673635476, T_ref_final: 920.4470237039731, pressure: 303975.0, phi: 0.7955840747487899, total_time: 0.006050252903808585, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:48<00:00,  2.02it/s, episode=20, step=98, err_qss=1.22e-5, err_bdf=5.16e-6, cpu_qss=0.00615, cpu_bdf=0.14, Ti=921, Tf=920]  


[episode 21] - T_ref_init: 927.3816601605487, T_ref_final: 927.210666223015, pressure: 202650.0, phi: 1.096199611425137, total_time: 0.0071774140052558765, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:47<00:00,  2.11it/s, episode=21, step=99, err_qss=5.91e-6, err_bdf=3.44e-6, cpu_qss=0.0072, cpu_bdf=0.167, Ti=927, Tf=927] 


[episode 22] - T_ref_init: 1035.9015164566615, T_ref_final: 1028.2738306856775, pressure: 202650.0, phi: 1.3121572502818093, total_time: 0.003254239746049975, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:36<00:00,  2.71it/s, episode=22, step=99, err_qss=4.58e-6, err_bdf=5.32e-6, cpu_qss=0.00331, cpu_bdf=0.0767, Ti=1.04e+3, Tf=1.03e+3]


[episode 23] - T_ref_init: 1364.5363731876398, T_ref_final: 2701.3979056877097, pressure: 101325.0, phi: 1.5453888311457098, total_time: 0.006505270455715283, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:54<00:00,  1.81it/s, episode=23, step=98, err_qss=0.000413, err_bdf=1.08e-11, cpu_qss=0.00662, cpu_bdf=0.135, Ti=1.36e+3, Tf=2.7e+3] 


[episode 24] - T_ref_init: 1304.9478780684537, T_ref_final: 2761.35977778838, pressure: 303975.0, phi: 0.9474074950348161, total_time: 0.00784802979471966, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:57<00:00,  1.72it/s, episode=24, step=98, err_qss=1.54e-5, err_bdf=4.31e-13, cpu_qss=0.0125, cpu_bdf=0.172, Ti=1.3e+3, Tf=2.76e+3] 


[episode 25] - T_ref_init: 1027.9096410995492, T_ref_final: 2577.810916353525, pressure: 506625.0, phi: 1.4573037037880465, total_time: 0.00973006627018297, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:14<00:00,  1.33it/s, episode=25, step=98, err_qss=0.00821, err_bdf=2.17e-14, cpu_qss=0.0214, cpu_bdf=0.207, Ti=1.03e+3, Tf=2.58e+3] 


[episode 26] - T_ref_init: 1254.1820942086947, T_ref_final: 2718.6229463451673, pressure: 303975.0, phi: 1.4635548790404334, total_time: 0.003289716006446431, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:41<00:00,  2.42it/s, episode=26, step=100, err_qss=0.00126, err_bdf=9.21e-13, cpu_qss=0.00469, cpu_bdf=0.0679, Ti=1.25e+3, Tf=2.72e+3]


[episode 27] - T_ref_init: 1204.9859268185992, T_ref_final: 2740.5019855354644, pressure: 405300.0, phi: 1.3672190839797698, total_time: 0.009631608540997118, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:09<00:00,  1.42it/s, episode=27, step=98, err_qss=0.00409, err_bdf=4.14e-15, cpu_qss=0.0202, cpu_bdf=0.208, Ti=1.2e+3, Tf=2.74e+3] 


[episode 28] - T_ref_init: 741.7584959173932, T_ref_final: 892.1430381104686, pressure: 202650.0, phi: 1.085802194163801, total_time: 0.003567144837147787, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:42<00:00,  2.35it/s, episode=28, step=99, err_qss=6.79e-7, err_bdf=2.26e-7, cpu_qss=0.00364, cpu_bdf=0.0944, Ti=742, Tf=892] 


[episode 29] - T_ref_init: 950.2870871278864, T_ref_final: 947.9609073333489, pressure: 303975.0, phi: 1.15134465572284, total_time: 0.009559755917044629, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:56<00:00,  1.76it/s, episode=29, step=98, err_qss=1.21e-5, err_bdf=3.4e-6, cpu_qss=0.00947, cpu_bdf=0.225, Ti=950, Tf=948]  


[ref-dataset] ep 30/200, total samples=3026
[episode 30] - T_ref_init: 881.9839621554911, T_ref_final: 881.9812955001959, pressure: 101325.0, phi: 0.8298910854146554, total_time: 0.00601922658529695, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:41<00:00,  2.41it/s, episode=30, step=98, err_qss=1.29e-6, err_bdf=2.14e-7, cpu_qss=0.00622, cpu_bdf=0.139, Ti=882, Tf=882] 


[episode 31] - T_ref_init: 852.9769057327574, T_ref_final: 852.976667263703, pressure: 202650.0, phi: 0.8142282481530998, total_time: 0.0032559854527230456, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:34<00:00,  2.89it/s, episode=31, step=99, err_qss=7.98e-6, err_bdf=3.86e-7, cpu_qss=0.00313, cpu_bdf=0.0741, Ti=853, Tf=853]


[episode 32] - T_ref_init: 1156.1030979048573, T_ref_final: 2770.4444236990935, pressure: 506625.0, phi: 1.0885647379703536, total_time: 0.003100356260582239, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:40<00:00,  2.42it/s, episode=32, step=98, err_qss=0.000444, err_bdf=7.36e-13, cpu_qss=0.00521, cpu_bdf=0.068, Ti=1.16e+3, Tf=2.77e+3] 


[episode 33] - T_ref_init: 755.8346936325879, T_ref_final: 842.9670938311754, pressure: 101325.0, phi: 1.0190824956901805, total_time: 0.007849876122472286, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:10<00:00,  1.40it/s, episode=33, step=98, err_qss=6.93e-8, err_bdf=2.78e-8, cpu_qss=0.00758, cpu_bdf=0.186, Ti=756, Tf=843]  


[episode 34] - T_ref_init: 671.4412376307591, T_ref_final: 671.4412407825488, pressure: 506625.0, phi: 1.5406028348969798, total_time: 0.002200467005437711, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:30<00:00,  3.24it/s, episode=34, step=98, err_qss=3.94e-7, err_bdf=3.23e-6, cpu_qss=0.00233, cpu_bdf=0.0487, Ti=671, Tf=671]  


[episode 35] - T_ref_init: 1056.7612967951677, T_ref_final: 2696.1781854963365, pressure: 303975.0, phi: 1.1914303082093733, total_time: 0.0074939122124521, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:55<00:00,  1.81it/s, episode=35, step=99, err_qss=0.000845, err_bdf=2.22e-11, cpu_qss=0.0111, cpu_bdf=0.152, Ti=1.06e+3, Tf=2.7e+3]


[episode 36] - T_ref_init: 667.0099900724606, T_ref_final: 667.0100400097491, pressure: 303975.0, phi: 1.1046771691975281, total_time: 0.0044752455083543865, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:36<00:00,  2.75it/s, episode=36, step=99, err_qss=8.18e-7, err_bdf=5.04e-6, cpu_qss=0.00426, cpu_bdf=0.0984, Ti=667, Tf=667] 


[episode 37] - T_ref_init: 1167.44767507362, T_ref_final: 2749.6739936198255, pressure: 303975.0, phi: 1.1849269399507207, total_time: 0.007031658805048963, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:56<00:00,  1.74it/s, episode=37, step=98, err_qss=0.000767, err_bdf=4.38e-15, cpu_qss=0.0126, cpu_bdf=0.153, Ti=1.17e+3, Tf=2.75e+3] 


[episode 38] - T_ref_init: 752.3305490751762, T_ref_final: 898.3213088076069, pressure: 202650.0, phi: 1.1696731700661267, total_time: 0.007750660501777125, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:53<00:00,  1.85it/s, episode=38, step=98, err_qss=6.37e-8, err_bdf=3.77e-8, cpu_qss=0.00701, cpu_bdf=0.184, Ti=752, Tf=898] 


[episode 39] - T_ref_init: 1114.4789743160013, T_ref_final: 2711.0738495353035, pressure: 405300.0, phi: 0.9860505744151912, total_time: 0.009248720329109582, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:04<00:00,  1.54it/s, episode=39, step=98, err_qss=9.35e-5, err_bdf=4.87e-13, cpu_qss=0.0162, cpu_bdf=0.204, Ti=1.11e+3, Tf=2.71e+3]  


[ref-dataset] ep 40/200, total samples=4029
[episode 40] - T_ref_init: 1215.554758812185, T_ref_final: 2784.9406872234513, pressure: 405300.0, phi: 1.227482373682045, total_time: 0.006117881553543724, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:53<00:00,  1.86it/s, episode=40, step=98, err_qss=0.00278, err_bdf=9.56e-15, cpu_qss=0.0106, cpu_bdf=0.132, Ti=1.22e+3, Tf=2.78e+3] 


[episode 41] - T_ref_init: 1064.832917877645, T_ref_final: 1058.9191743428219, pressure: 202650.0, phi: 1.5151666771235799, total_time: 0.0012113159361231045, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:29<00:00,  3.32it/s, episode=41, step=98, err_qss=5.24e-6, err_bdf=7.67e-6, cpu_qss=0.00138, cpu_bdf=0.032, Ti=1.06e+3, Tf=1.06e+3]  


[episode 42] - T_ref_init: 662.5485490992592, T_ref_final: 662.5485556987611, pressure: 506625.0, phi: 0.9524142438573747, total_time: 0.0036032683179195936, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:34<00:00,  2.91it/s, episode=42, step=98, err_qss=3.62e-7, err_bdf=3.45e-6, cpu_qss=0.00358, cpu_bdf=0.0804, Ti=663, Tf=663]  


[episode 43] - T_ref_init: 1057.650280084379, T_ref_final: 1048.7311410073607, pressure: 405300.0, phi: 1.5346796797744708, total_time: 0.0012759148123821498, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 105/106 [00:31<00:00,  3.32it/s, episode=43, step=104, err_qss=1.04e-5, err_bdf=8.22e-6, cpu_qss=0.00137, cpu_bdf=0.0325, Ti=1.06e+3, Tf=1.05e+3]


[episode 44] - T_ref_init: 905.0036475437619, T_ref_final: 905.0006257377304, pressure: 405300.0, phi: 0.7129225008341035, total_time: 0.0017960900945108288, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 104/105 [00:31<00:00,  3.26it/s, episode=44, step=103, err_qss=9.82e-6, err_bdf=6.26e-7, cpu_qss=0.00178, cpu_bdf=0.0395, Ti=905, Tf=905]


[episode 45] - T_ref_init: 1162.1160286843656, T_ref_final: 2714.625183918198, pressure: 405300.0, phi: 1.366035161830289, total_time: 0.00229959636414622, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 103/104 [00:38<00:00,  2.68it/s, episode=45, step=102, err_qss=0.00106, err_bdf=8.17e-12, cpu_qss=0.00296, cpu_bdf=0.0458, Ti=1.16e+3, Tf=2.71e+3]


[episode 46] - T_ref_init: 1085.2725188844138, T_ref_final: 2620.140628928446, pressure: 202650.0, phi: 1.413560876005473, total_time: 0.006657645941003909, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:51<00:00,  1.94it/s, episode=46, step=98, err_qss=0.00164, err_bdf=9.57e-10, cpu_qss=0.00844, cpu_bdf=0.137, Ti=1.09e+3, Tf=2.62e+3]


[episode 47] - T_ref_init: 1064.6381017831309, T_ref_final: 2715.0577117928765, pressure: 405300.0, phi: 1.0819172613966228, total_time: 0.007804064422896672, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:04<00:00,  1.55it/s, episode=47, step=98, err_qss=0.0012, err_bdf=4.34e-13, cpu_qss=0.0151, cpu_bdf=0.165, Ti=1.06e+3, Tf=2.72e+3]   


[episode 48] - T_ref_init: 614.9414776350449, T_ref_final: 614.9414776586566, pressure: 405300.0, phi: 0.9912502584694953, total_time: 0.005659795511792387, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:38<00:00,  2.57it/s, episode=48, step=99, err_qss=8.65e-6, err_bdf=3.77e-7, cpu_qss=0.0056, cpu_bdf=0.126, Ti=615, Tf=615]   


[episode 49] - T_ref_init: 990.1446221315887, T_ref_final: 990.0156800380308, pressure: 101325.0, phi: 0.9713923638461545, total_time: 0.0014321783168574154, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:30<00:00,  3.35it/s, episode=49, step=100, err_qss=7.04e-7, err_bdf=7.76e-7, cpu_qss=0.00134, cpu_bdf=0.0347, Ti=990, Tf=990]


[ref-dataset] ep 50/200, total samples=5047
[episode 50] - T_ref_init: 1176.916584173342, T_ref_final: 2770.957221967553, pressure: 405300.0, phi: 1.1789821527606066, total_time: 0.002405326910892073, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:36<00:00,  2.68it/s, episode=50, step=98, err_qss=0.000447, err_bdf=7.31e-13, cpu_qss=0.00295, cpu_bdf=0.0511, Ti=1.18e+3, Tf=2.77e+3] 


[episode 51] - T_ref_init: 953.0389679463852, T_ref_final: 951.3603867626514, pressure: 405300.0, phi: 1.087122216820056, total_time: 0.006347350813304021, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:47<00:00,  2.07it/s, episode=51, step=98, err_qss=1.79e-5, err_bdf=5.4e-6, cpu_qss=0.00648, cpu_bdf=0.147, Ti=953, Tf=951]  


[episode 52] - T_ref_init: 1068.720602301118, T_ref_final: 2593.4315982709536, pressure: 405300.0, phi: 0.828710293919064, total_time: 0.00749737610434026, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:57<00:00,  1.73it/s, episode=52, step=99, err_qss=2.3e-5, err_bdf=1.31e-15, cpu_qss=0.00746, cpu_bdf=0.156, Ti=1.07e+3, Tf=2.59e+3] 


[episode 53] - T_ref_init: 908.1788809878225, T_ref_final: 908.1717919630711, pressure: 202650.0, phi: 0.9313468835766574, total_time: 0.002885245806838599, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:34<00:00,  2.96it/s, episode=53, step=101, err_qss=3.45e-6, err_bdf=3.41e-7, cpu_qss=0.00289, cpu_bdf=0.0653, Ti=908, Tf=908]


[episode 54] - T_ref_init: 1358.1182678376135, T_ref_final: 2708.5346136532985, pressure: 202650.0, phi: 0.8427818014199809, total_time: 0.0026014506480069753, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:34<00:00,  2.86it/s, episode=54, step=98, err_qss=1.57e-10, err_bdf=6.93e-15, cpu_qss=0.0028, cpu_bdf=0.0562, Ti=1.36e+3, Tf=2.71e+3] 


[episode 55] - T_ref_init: 1263.1371615244436, T_ref_final: 2724.2425910930288, pressure: 202650.0, phi: 0.9631091518545555, total_time: 0.009697588869794486, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:52<00:00,  1.91it/s, episode=55, step=99, err_qss=0.000113, err_bdf=4.47e-13, cpu_qss=0.0111, cpu_bdf=0.205, Ti=1.26e+3, Tf=2.72e+3]


[episode 56] - T_ref_init: 708.1243319864479, T_ref_final: 708.7003147444216, pressure: 405300.0, phi: 0.8527443663600773, total_time: 0.0030460008561899055, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:33<00:00,  2.98it/s, episode=56, step=99, err_qss=5.44e-5, err_bdf=6.91e-6, cpu_qss=0.00306, cpu_bdf=0.0849, Ti=708, Tf=709]


[episode 57] - T_ref_init: 804.344398029336, T_ref_final: 891.1830798419262, pressure: 303975.0, phi: 1.1462535211775822, total_time: 0.004246183029630191, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:42<00:00,  2.33it/s, episode=57, step=99, err_qss=3.72e-8, err_bdf=3.45e-8, cpu_qss=0.00426, cpu_bdf=0.103, Ti=804, Tf=891]


[episode 58] - T_ref_init: 684.1122025403658, T_ref_final: 684.3128026360363, pressure: 101325.0, phi: 0.7990637676638184, total_time: 0.006675239632150674, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:42<00:00,  2.35it/s, episode=58, step=99, err_qss=5.18e-6, err_bdf=3e-6, cpu_qss=0.0059, cpu_bdf=0.178, Ti=684, Tf=684]    


[episode 59] - T_ref_init: 1071.9471956054906, T_ref_final: 2643.6123787732176, pressure: 405300.0, phi: 1.3906482644720624, total_time: 0.006205926564410314, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:57<00:00,  1.73it/s, episode=59, step=98, err_qss=0.00548, err_bdf=8.47e-13, cpu_qss=0.0127, cpu_bdf=0.131, Ti=1.07e+3, Tf=2.64e+3] 


[ref-dataset] ep 60/200, total samples=6055
[episode 60] - T_ref_init: 1386.9870793893053, T_ref_final: 2748.799625615881, pressure: 101325.0, phi: 1.0517466065451062, total_time: 0.005988689696881272, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:41<00:00,  2.38it/s, episode=60, step=99, err_qss=1.01e-9, err_bdf=3.34e-12, cpu_qss=0.0059, cpu_bdf=0.12, Ti=1.39e+3, Tf=2.75e+3]   


[episode 61] - T_ref_init: 1191.8972262249865, T_ref_final: 2780.142295594178, pressure: 506625.0, phi: 1.2522430946831036, total_time: 0.0026767676550790774, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:39<00:00,  2.59it/s, episode=61, step=100, err_qss=0.00172, err_bdf=6.73e-13, cpu_qss=0.00415, cpu_bdf=0.056, Ti=1.19e+3, Tf=2.78e+3] 


[episode 62] - T_ref_init: 882.8433477371224, T_ref_final: 888.8942863766681, pressure: 506625.0, phi: 1.513094345499943, total_time: 0.007290044163975967, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:56<00:00,  1.77it/s, episode=62, step=99, err_qss=7.19e-6, err_bdf=1.57e-6, cpu_qss=0.00744, cpu_bdf=0.174, Ti=883, Tf=889]


[episode 63] - T_ref_init: 749.4855584805076, T_ref_final: 2513.5540164832873, pressure: 506625.0, phi: 1.238667130976469, total_time: 0.007095151728806915, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:56<00:00,  1.76it/s, episode=63, step=99, err_qss=8.35e-5, err_bdf=2.4e-12, cpu_qss=0.0106, cpu_bdf=0.149, Ti=749, Tf=2.51e+3]  


[episode 64] - T_ref_init: 1015.835830324006, T_ref_final: 2634.4515942858943, pressure: 506625.0, phi: 1.3470804019008842, total_time: 0.009472733775871787, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:09<00:00,  1.42it/s, episode=64, step=98, err_qss=0.00045, err_bdf=2.22e-14, cpu_qss=0.0203, cpu_bdf=0.202, Ti=1.02e+3, Tf=2.63e+3] 


[episode 65] - T_ref_init: 1196.6496309166284, T_ref_final: 2703.9533278114523, pressure: 202650.0, phi: 0.9821813049604776, total_time: 0.006071265366405059, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:44<00:00,  2.24it/s, episode=65, step=99, err_qss=1.61e-5, err_bdf=3.82e-12, cpu_qss=0.00639, cpu_bdf=0.125, Ti=1.2e+3, Tf=2.7e+3]


[episode 66] - T_ref_init: 659.3666159841736, T_ref_final: 659.3666187417277, pressure: 101325.0, phi: 0.9597161726742108, total_time: 0.004993072475388261, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:37<00:00,  2.64it/s, episode=66, step=99, err_qss=3.14e-7, err_bdf=2.93e-6, cpu_qss=0.00499, cpu_bdf=0.111, Ti=659, Tf=659] 


[episode 67] - T_ref_init: 890.5776888213546, T_ref_final: 890.5670472253422, pressure: 101325.0, phi: 1.2515190337825208, total_time: 0.007169352950668615, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:44<00:00,  2.24it/s, episode=67, step=98, err_qss=1.55e-6, err_bdf=2.61e-7, cpu_qss=0.00712, cpu_bdf=0.166, Ti=891, Tf=891]


[episode 68] - T_ref_init: 1021.7174877024745, T_ref_final: 1498.253171417158, pressure: 405300.0, phi: 0.7457215298233472, total_time: 0.009749297724042053, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:01<00:00,  1.60it/s, episode=68, step=98, err_qss=0.0101, err_bdf=0.00118, cpu_qss=0.0267, cpu_bdf=0.263, Ti=1.02e+3, Tf=1.5e+3]    


[episode 69] - T_ref_init: 1290.625505617693, T_ref_final: 2730.4356814348275, pressure: 101325.0, phi: 1.2520813440226088, total_time: 0.007348143170835957, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:46<00:00,  2.14it/s, episode=69, step=98, err_qss=1.11e-5, err_bdf=6.1e-13, cpu_qss=0.0075, cpu_bdf=0.153, Ti=1.29e+3, Tf=2.73e+3]   


[ref-dataset] ep 70/200, total samples=7062
[episode 70] - T_ref_init: 1261.1688406211324, T_ref_final: 2724.6237472964553, pressure: 202650.0, phi: 1.4152176805586985, total_time: 0.0011054711745866416, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:32<00:00,  3.08it/s, episode=70, step=98, err_qss=4.01e-6, err_bdf=8.91e-10, cpu_qss=0.00137, cpu_bdf=0.0226, Ti=1.26e+3, Tf=2.72e+3]


[episode 71] - T_ref_init: 1084.9132551295052, T_ref_final: 2696.2692469249264, pressure: 303975.0, phi: 1.0294267363012066, total_time: 0.005915814514064, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:55<00:00,  1.78it/s, episode=71, step=98, err_qss=0.000128, err_bdf=4.21e-13, cpu_qss=0.014, cpu_bdf=0.186, Ti=1.08e+3, Tf=2.7e+3]  


[episode 72] - T_ref_init: 801.6664375111956, T_ref_final: 801.666467064958, pressure: 101325.0, phi: 1.2319449378496923, total_time: 0.004279063583567903, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:43<00:00,  2.28it/s, episode=72, step=99, err_qss=8.77e-6, err_bdf=4.65e-7, cpu_qss=0.00414, cpu_bdf=0.0967, Ti=802, Tf=802]


[episode 73] - T_ref_init: 1077.2420851229688, T_ref_final: 1048.6391375510898, pressure: 202650.0, phi: 1.566921020356277, total_time: 0.0032987450886958905, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:36<00:00,  2.76it/s, episode=73, step=101, err_qss=6.47e-6, err_bdf=5.39e-6, cpu_qss=0.00328, cpu_bdf=0.0777, Ti=1.08e+3, Tf=1.05e+3]


[episode 74] - T_ref_init: 1287.1238944962897, T_ref_final: 2730.0344139295585, pressure: 101325.0, phi: 1.2023038424657575, total_time: 0.00978296996122717, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:52<00:00,  1.90it/s, episode=74, step=98, err_qss=2.34e-5, err_bdf=4.5e-14, cpu_qss=0.0101, cpu_bdf=0.201, Ti=1.29e+3, Tf=2.73e+3]  


[episode 75] - T_ref_init: 801.5978501346381, T_ref_final: 966.0407641160627, pressure: 405300.0, phi: 1.5489382815006603, total_time: 0.005661010160526491, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:45<00:00,  2.22it/s, episode=75, step=99, err_qss=8.19e-6, err_bdf=3.32e-6, cpu_qss=0.00534, cpu_bdf=0.135, Ti=802, Tf=966]


[episode 76] - T_ref_init: 1103.7359554418051, T_ref_final: 2549.357509576469, pressure: 405300.0, phi: 1.5920390697985431, total_time: 0.007913625539284094, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:02<00:00,  1.58it/s, episode=76, step=98, err_qss=0.000459, err_bdf=7.66e-15, cpu_qss=0.0141, cpu_bdf=0.168, Ti=1.1e+3, Tf=2.55e+3]


[episode 77] - T_ref_init: 881.9778624202373, T_ref_final: 882.0018507540822, pressure: 405300.0, phi: 1.2913373784036635, total_time: 0.008377847359518565, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:02<00:00,  1.57it/s, episode=77, step=98, err_qss=2.24e-5, err_bdf=3.79e-6, cpu_qss=0.00857, cpu_bdf=0.202, Ti=882, Tf=882] 


[episode 78] - T_ref_init: 1399.4310518946172, T_ref_final: 2823.499366311793, pressure: 506625.0, phi: 1.480022845968478, total_time: 0.006851713683927483, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:57<00:00,  1.71it/s, episode=78, step=98, err_qss=0.00695, err_bdf=1.81e-14, cpu_qss=0.014, cpu_bdf=0.149, Ti=1.4e+3, Tf=2.82e+3]   


[episode 79] - T_ref_init: 823.36890951677, T_ref_final: 874.926622629658, pressure: 303975.0, phi: 0.7972327211368772, total_time: 0.009777827713516332, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:58<00:00,  1.68it/s, episode=79, step=98, err_qss=2.27e-8, err_bdf=3.94e-8, cpu_qss=0.00968, cpu_bdf=0.232, Ti=823, Tf=875] 


[ref-dataset] ep 80/200, total samples=8067
[episode 80] - T_ref_init: 932.9402702788175, T_ref_final: 932.8962856850993, pressure: 202650.0, phi: 1.1256285760624198, total_time: 0.003092913076880962, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:35<00:00,  2.91it/s, episode=80, step=101, err_qss=2.96e-6, err_bdf=5.59e-7, cpu_qss=0.00278, cpu_bdf=0.071, Ti=933, Tf=933]


[episode 81] - T_ref_init: 667.7752038607673, T_ref_final: 667.7752070477253, pressure: 506625.0, phi: 1.2316743775340133, total_time: 0.002580899213470292, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:32<00:00,  3.15it/s, episode=81, step=101, err_qss=3.22e-7, err_bdf=3.36e-6, cpu_qss=0.00257, cpu_bdf=0.0557, Ti=668, Tf=668]


[episode 82] - T_ref_init: 765.9935400318532, T_ref_final: 920.9633552343006, pressure: 506625.0, phi: 0.9174743826273337, total_time: 0.0013522464177796113, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 103/104 [00:34<00:00,  2.96it/s, episode=82, step=102, err_qss=5.95e-5, err_bdf=1.89e-7, cpu_qss=0.00144, cpu_bdf=0.0428, Ti=766, Tf=921]


[episode 83] - T_ref_init: 940.943136635753, T_ref_final: 940.8564167488396, pressure: 405300.0, phi: 1.076425665103183, total_time: 0.002370145084162584, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:34<00:00,  2.99it/s, episode=83, step=101, err_qss=1.51e-5, err_bdf=1.04e-5, cpu_qss=0.00246, cpu_bdf=0.0556, Ti=941, Tf=941]


[episode 84] - T_ref_init: 1110.584931272023, T_ref_final: 1131.326042126788, pressure: 202650.0, phi: 1.3025326222646063, total_time: 0.0032939502810303495, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:36<00:00,  2.76it/s, episode=84, step=100, err_qss=0.000113, err_bdf=2.49e-5, cpu_qss=0.00476, cpu_bdf=0.0943, Ti=1.11e+3, Tf=1.13e+3]


[episode 85] - T_ref_init: 620.2966534969188, T_ref_final: 620.2966534979766, pressure: 506625.0, phi: 0.8789842689160532, total_time: 0.0011658535060229233, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 104/105 [00:28<00:00,  3.60it/s, episode=85, step=103, err_qss=1.99e-15, err_bdf=2.86e-16, cpu_qss=0.00114, cpu_bdf=0.0234, Ti=620, Tf=620]


[episode 86] - T_ref_init: 1147.195935385513, T_ref_final: 2672.262658526833, pressure: 202650.0, phi: 1.3811501892161997, total_time: 0.004829646711844935, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:47<00:00,  2.09it/s, episode=86, step=98, err_qss=0.00262, err_bdf=1.07e-11, cpu_qss=0.00601, cpu_bdf=0.0984, Ti=1.15e+3, Tf=2.67e+3]  


[episode 87] - T_ref_init: 878.0172737973002, T_ref_final: 878.0031647105837, pressure: 202650.0, phi: 1.0394541386659102, total_time: 0.008944530489936101, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:50<00:00,  1.97it/s, episode=87, step=98, err_qss=8.9e-6, err_bdf=6.01e-7, cpu_qss=0.0089, cpu_bdf=0.208, Ti=878, Tf=878]   


[episode 88] - T_ref_init: 662.9634647306344, T_ref_final: 662.9634662315785, pressure: 202650.0, phi: 1.5863935990015459, total_time: 0.003155866232776509, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:32<00:00,  3.07it/s, episode=88, step=99, err_qss=2.53e-7, err_bdf=2.67e-6, cpu_qss=0.00311, cpu_bdf=0.068, Ti=663, Tf=663]  


[episode 89] - T_ref_init: 1365.318579614178, T_ref_final: 2759.8353359110747, pressure: 101325.0, phi: 1.2444442878814064, total_time: 0.009686354372096313, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:52<00:00,  1.90it/s, episode=89, step=98, err_qss=5.44e-5, err_bdf=7.58e-15, cpu_qss=0.0104, cpu_bdf=0.205, Ti=1.37e+3, Tf=2.76e+3]  


[ref-dataset] ep 90/200, total samples=9088
[episode 90] - T_ref_init: 1249.1594107392796, T_ref_final: 2558.156975535214, pressure: 202650.0, phi: 0.708270511172581, total_time: 0.005966959243553759, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:41<00:00,  2.38it/s, episode=90, step=99, err_qss=3.15e-10, err_bdf=3.51e-12, cpu_qss=0.00586, cpu_bdf=0.122, Ti=1.25e+3, Tf=2.56e+3]


[episode 91] - T_ref_init: 1109.0633760110336, T_ref_final: 1094.2878583883567, pressure: 101325.0, phi: 1.317758403989526, total_time: 0.001130226164581732, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:29<00:00,  3.42it/s, episode=91, step=100, err_qss=2.22e-6, err_bdf=4.59e-6, cpu_qss=0.00124, cpu_bdf=0.034, Ti=1.11e+3, Tf=1.09e+3]


[episode 92] - T_ref_init: 974.264904980781, T_ref_final: 969.2026342680559, pressure: 202650.0, phi: 1.5888261488866309, total_time: 0.009126988233957216, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:52<00:00,  1.88it/s, episode=92, step=98, err_qss=6.05e-6, err_bdf=2.73e-6, cpu_qss=0.00907, cpu_bdf=0.217, Ti=974, Tf=969] 


[episode 93] - T_ref_init: 1099.561050098434, T_ref_final: 2728.7070880097144, pressure: 405300.0, phi: 1.069175812559415, total_time: 0.00294101408636829, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:39<00:00,  2.55it/s, episode=93, step=99, err_qss=0.000554, err_bdf=1.67e-9, cpu_qss=0.00572, cpu_bdf=0.0587, Ti=1.1e+3, Tf=2.73e+3]


[episode 94] - T_ref_init: 1227.0390104465403, T_ref_final: 2794.5822476854773, pressure: 405300.0, phi: 1.1679180311316237, total_time: 0.007570133875501763, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:57<00:00,  1.72it/s, episode=94, step=98, err_qss=0.00167, err_bdf=2.35e-15, cpu_qss=0.0156, cpu_bdf=0.162, Ti=1.23e+3, Tf=2.79e+3]  


[episode 95] - T_ref_init: 1183.2919722272018, T_ref_final: 2707.4069024932255, pressure: 202650.0, phi: 1.3380449867697262, total_time: 0.009640873410436667, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:07<00:00,  1.46it/s, episode=95, step=98, err_qss=0.00181, err_bdf=1.7e-13, cpu_qss=0.013, cpu_bdf=0.199, Ti=1.18e+3, Tf=2.71e+3]    


[episode 96] - T_ref_init: 1250.0289191197826, T_ref_final: 2761.1233285001153, pressure: 202650.0, phi: 1.178507458614436, total_time: 0.0013225198319680744, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:32<00:00,  3.05it/s, episode=96, step=99, err_qss=2.97e-8, err_bdf=7.43e-11, cpu_qss=0.00154, cpu_bdf=0.0269, Ti=1.25e+3, Tf=2.76e+3]


[episode 97] - T_ref_init: 605.6847035217384, T_ref_final: 605.6847035670914, pressure: 506625.0, phi: 1.0264950189054076, total_time: 0.009627342232905381, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:47<00:00,  2.09it/s, episode=97, step=98, err_qss=1.12e-5, err_bdf=2.87e-7, cpu_qss=0.0094, cpu_bdf=0.207, Ti=606, Tf=606]   


[episode 98] - T_ref_init: 1139.3458311932327, T_ref_final: 2639.6030700157266, pressure: 506625.0, phi: 0.8271126850512714, total_time: 0.008002354657177566, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:50<00:00,  1.97it/s, episode=98, step=98, err_qss=6.67e-5, err_bdf=4.31e-15, cpu_qss=0.00984, cpu_bdf=0.171, Ti=1.14e+3, Tf=2.64e+3] 


[episode 99] - T_ref_init: 1273.3716964464395, T_ref_final: 2737.264557964706, pressure: 405300.0, phi: 0.8899285882220327, total_time: 0.0033658569511687895, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [01:21<00:00,  1.22it/s, episode=99, step=99, err_qss=7.47e-5, err_bdf=5.47e-15, cpu_qss=0.00379, cpu_bdf=0.0713, Ti=1.27e+3, Tf=2.74e+3]


[ref-dataset] ep 100/200, total samples=10094
[episode 100] - T_ref_init: 639.6740552002829, T_ref_final: 639.6740553206171, pressure: 303975.0, phi: 1.379990488095337, total_time: 0.0036073857473346315, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:34<00:00,  2.91it/s, episode=100, step=98, err_qss=9.78e-7, err_bdf=9.74e-7, cpu_qss=0.00304, cpu_bdf=0.0794, Ti=640, Tf=640]  


[episode 101] - T_ref_init: 619.3397546476178, T_ref_final: 619.3397546806003, pressure: 303975.0, phi: 1.4243250354792023, total_time: 0.005310057189326027, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:41<00:00,  2.38it/s, episode=101, step=98, err_qss=4.69e-6, err_bdf=4.54e-7, cpu_qss=0.00456, cpu_bdf=0.116, Ti=619, Tf=619]  


[episode 102] - T_ref_init: 799.905460699685, T_ref_final: 799.905616788281, pressure: 101325.0, phi: 0.799405508323257, total_time: 0.007634280042377484, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:50<00:00,  1.96it/s, episode=102, step=98, err_qss=1.62e-5, err_bdf=7.05e-7, cpu_qss=0.00674, cpu_bdf=0.174, Ti=800, Tf=800] 


[episode 103] - T_ref_init: 926.43156215638, T_ref_final: 926.3839038343588, pressure: 202650.0, phi: 1.3100948956629002, total_time: 0.003450993235175312, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:45<00:00,  2.19it/s, episode=103, step=99, err_qss=3.65e-6, err_bdf=5.9e-7, cpu_qss=0.00343, cpu_bdf=0.0791, Ti=926, Tf=926] 


[episode 104] - T_ref_init: 1051.5999540243765, T_ref_final: 2723.413551254614, pressure: 506625.0, phi: 1.1019871244482773, total_time: 0.009033166197571257, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:12<00:00,  1.37it/s, episode=104, step=98, err_qss=0.000664, err_bdf=8.24e-13, cpu_qss=0.0188, cpu_bdf=0.191, Ti=1.05e+3, Tf=2.72e+3] 


[episode 105] - T_ref_init: 1369.095666330698, T_ref_final: 2747.793168577003, pressure: 101325.0, phi: 1.0792534460031926, total_time: 0.001705409811932983, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:32<00:00,  3.09it/s, episode=105, step=98, err_qss=6.22e-10, err_bdf=1.72e-11, cpu_qss=0.00173, cpu_bdf=0.034, Ti=1.37e+3, Tf=2.75e+3] 


[episode 106] - T_ref_init: 1350.8578923418413, T_ref_final: 2696.91421241163, pressure: 202650.0, phi: 0.8278658068818169, total_time: 0.006538678401215623, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:41<00:00,  2.36it/s, episode=106, step=98, err_qss=3.06e-8, err_bdf=4.48e-13, cpu_qss=0.00628, cpu_bdf=0.137, Ti=1.35e+3, Tf=2.7e+3] 


[episode 107] - T_ref_init: 738.2402764338563, T_ref_final: 877.2224215773183, pressure: 405300.0, phi: 0.856837842266405, total_time: 0.0020286703123790443, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:33<00:00,  3.01it/s, episode=107, step=99, err_qss=0.00556, err_bdf=3.41e-7, cpu_qss=0.0243, cpu_bdf=0.0684, Ti=738, Tf=877]  


[episode 108] - T_ref_init: 700.9060353789575, T_ref_final: 922.6713810722949, pressure: 405300.0, phi: 0.8475727928051877, total_time: 0.007046868574055519, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:48<00:00,  2.02it/s, episode=108, step=98, err_qss=9.2e-7, err_bdf=2.29e-7, cpu_qss=0.00688, cpu_bdf=0.165, Ti=701, Tf=923]  


[episode 109] - T_ref_init: 1386.6585032550672, T_ref_final: 2755.8368084154413, pressure: 202650.0, phi: 0.9161995646985741, total_time: 0.004860267539632365, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:38<00:00,  2.61it/s, episode=109, step=99, err_qss=3.36e-5, err_bdf=2.19e-13, cpu_qss=0.005, cpu_bdf=0.101, Ti=1.39e+3, Tf=2.76e+3]  


[ref-dataset] ep 110/200, total samples=11097
[episode 110] - T_ref_init: 907.9269964681326, T_ref_final: 907.908262706227, pressure: 405300.0, phi: 0.8016056204904649, total_time: 0.003515780619275388, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:48<00:00,  2.06it/s, episode=110, step=98, err_qss=1.72e-5, err_bdf=6.81e-7, cpu_qss=0.00353, cpu_bdf=0.136, Ti=908, Tf=908]  


[episode 111] - T_ref_init: 1224.0510762960275, T_ref_final: 2803.7168786188304, pressure: 506625.0, phi: 1.1083592914605525, total_time: 0.0071512068058287494, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [01:13<00:00,  1.35it/s, episode=111, step=98, err_qss=0.000642, err_bdf=2.58e-14, cpu_qss=0.0148, cpu_bdf=0.153, Ti=1.22e+3, Tf=2.8e+3] 


[episode 112] - T_ref_init: 1324.0707855145035, T_ref_final: 2773.4558535503993, pressure: 202650.0, phi: 1.3607304426947024, total_time: 0.004502072017811218, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:44<00:00,  2.21it/s, episode=112, step=98, err_qss=0.000348, err_bdf=2.18e-15, cpu_qss=0.00609, cpu_bdf=0.0955, Ti=1.32e+3, Tf=2.77e+3]


[episode 113] - T_ref_init: 866.7772844956803, T_ref_final: 866.7745335730656, pressure: 101325.0, phi: 0.9910647550009986, total_time: 0.009467300719360052, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:49<00:00,  2.00it/s, episode=113, step=98, err_qss=1.67e-6, err_bdf=2.47e-7, cpu_qss=0.00931, cpu_bdf=0.216, Ti=867, Tf=867] 


[episode 114] - T_ref_init: 1373.2197551630043, T_ref_final: 2790.425155235394, pressure: 303975.0, phi: 0.9483826694113, total_time: 0.0025896007535675795, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 102/103 [00:33<00:00,  3.02it/s, episode=114, step=101, err_qss=5.68e-7, err_bdf=6.84e-12, cpu_qss=0.00252, cpu_bdf=0.0514, Ti=1.37e+3, Tf=2.79e+3]


[episode 115] - T_ref_init: 989.6973580645368, T_ref_final: 987.9265182500384, pressure: 506625.0, phi: 0.8322202079603856, total_time: 0.003169314662860368, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:36<00:00,  2.74it/s, episode=115, step=100, err_qss=1.69e-5, err_bdf=1.02e-5, cpu_qss=0.00319, cpu_bdf=0.0727, Ti=990, Tf=988]


[episode 116] - T_ref_init: 1119.5353999352035, T_ref_final: 2506.517131669388, pressure: 202650.0, phi: 0.7309188382145396, total_time: 0.004507714934565079, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:42<00:00,  2.34it/s, episode=116, step=98, err_qss=5.87e-9, err_bdf=3.42e-10, cpu_qss=0.004, cpu_bdf=0.0906, Ti=1.12e+3, Tf=2.51e+3]  


[episode 117] - T_ref_init: 1094.0350553624864, T_ref_final: 1073.304866866049, pressure: 303975.0, phi: 1.2550519663696713, total_time: 0.0015283819828363734, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:30<00:00,  3.27it/s, episode=117, step=99, err_qss=9.94e-6, err_bdf=6.12e-6, cpu_qss=0.00149, cpu_bdf=0.0379, Ti=1.09e+3, Tf=1.07e+3]


[episode 118] - T_ref_init: 933.6626705484365, T_ref_final: 933.5706848024031, pressure: 101325.0, phi: 1.1545029994455032, total_time: 0.006267819641684112, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:42<00:00,  2.37it/s, episode=118, step=99, err_qss=8.65e-7, err_bdf=4.14e-7, cpu_qss=0.00613, cpu_bdf=0.146, Ti=934, Tf=934]


[episode 119] - T_ref_init: 819.0283429549786, T_ref_final: 819.0287543806917, pressure: 202650.0, phi: 0.925311465096174, total_time: 0.0018415436867777258, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:31<00:00,  3.18it/s, episode=119, step=100, err_qss=4.65e-5, err_bdf=8.56e-7, cpu_qss=0.00195, cpu_bdf=0.045, Ti=819, Tf=819]


[ref-dataset] ep 120/200, total samples=12106
[episode 120] - T_ref_init: 1332.3221936541884, T_ref_final: 2680.5407903673763, pressure: 405300.0, phi: 0.762993799961921, total_time: 0.00703856706718964, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:44<00:00,  2.21it/s, episode=120, step=98, err_qss=1.83e-5, err_bdf=1.88e-15, cpu_qss=0.00723, cpu_bdf=0.155, Ti=1.33e+3, Tf=2.68e+3] 


[episode 121] - T_ref_init: 1053.0029616170314, T_ref_final: 1030.5972352068063, pressure: 405300.0, phi: 1.54373267752637, total_time: 0.002852710174522391, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:36<00:00,  2.75it/s, episode=121, step=99, err_qss=1.87e-5, err_bdf=8.11e-6, cpu_qss=0.00267, cpu_bdf=0.0766, Ti=1.05e+3, Tf=1.03e+3]


[episode 122] - T_ref_init: 610.7103597638644, T_ref_final: 610.7103597734507, pressure: 202650.0, phi: 1.282394061495365, total_time: 0.005951737965480704, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:40<00:00,  2.44it/s, episode=122, step=98, err_qss=8.47e-15, err_bdf=1.58e-15, cpu_qss=0.00604, cpu_bdf=0.171, Ti=611, Tf=611] 


[episode 123] - T_ref_init: 1393.9293729412027, T_ref_final: 2594.5126695064014, pressure: 101325.0, phi: 0.700166330339594, total_time: 0.005227240583872075, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:40<00:00,  2.47it/s, episode=123, step=98, err_qss=2.6e-10, err_bdf=3.27e-13, cpu_qss=0.00522, cpu_bdf=0.109, Ti=1.39e+3, Tf=2.59e+3]  


[episode 124] - T_ref_init: 942.4921962566939, T_ref_final: 942.4651360074845, pressure: 101325.0, phi: 1.1085144107593934, total_time: 0.0022620311557311093, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:33<00:00,  3.05it/s, episode=124, step=100, err_qss=9.97e-7, err_bdf=4.89e-7, cpu_qss=0.00241, cpu_bdf=0.0523, Ti=942, Tf=942]


[episode 125] - T_ref_init: 651.199945897331, T_ref_final: 651.1999459172787, pressure: 506625.0, phi: 0.8487034890325496, total_time: 0.0011981276905314118, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 107/108 [00:30<00:00,  3.48it/s, episode=125, step=106, err_qss=1.49e-6, err_bdf=1.15e-6, cpu_qss=0.00112, cpu_bdf=0.0234, Ti=651, Tf=651] 


[episode 126] - T_ref_init: 811.9802559536231, T_ref_final: 923.921274430569, pressure: 506625.0, phi: 0.7482638375671734, total_time: 0.005615315419244725, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 99/100 [00:45<00:00,  2.19it/s, episode=126, step=98, err_qss=7.29e-7, err_bdf=3.77e-7, cpu_qss=0.00567, cpu_bdf=0.136, Ti=812, Tf=924] 


[episode 127] - T_ref_init: 959.4937814090282, T_ref_final: 959.025071822065, pressure: 303975.0, phi: 0.7042404788752589, total_time: 0.0056605736189572495, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [00:42<00:00,  2.34it/s, episode=127, step=99, err_qss=8.1e-6, err_bdf=4.98e-6, cpu_qss=0.00561, cpu_bdf=0.132, Ti=959, Tf=959] 


[episode 128] - T_ref_init: 1041.1180387598147, T_ref_final: 1035.5316231443828, pressure: 303975.0, phi: 1.1494893571563152, total_time: 0.002054247220990313, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 101/102 [00:54<00:00,  1.86it/s, episode=128, step=100, err_qss=7.73e-6, err_bdf=7.91e-6, cpu_qss=0.002, cpu_bdf=0.0474, Ti=1.04e+3, Tf=1.04e+3] 


[episode 129] - T_ref_init: 837.935201350857, T_ref_final: 865.2893553295759, pressure: 303975.0, phi: 1.215082358569828, total_time: 0.005675146259033287, dt: 1e-06


Building reference-anchored dataset:  99%|█████████▉| 100/101 [01:47<00:01,  1.07s/it, episode=129, step=99, err_qss=5.66e-6, err_bdf=4.49e-7, cpu_qss=0.00561, cpu_bdf=0.135, Ti=838, Tf=865]


[ref-dataset] ep 130/200, total samples=13121
[episode 130] - T_ref_init: 829.8376873177758, T_ref_final: 888.93690182347, pressure: 405300.0, phi: 0.8677747866762152, total_time: 0.0037153415498548067, dt: 1e-06


Building reference-anchored dataset:   0%|          | 0/100 [00:00<?, ?it/s]

: 

In [None]:

class SwitcherMLP(nn.Module):
    def __init__(self, in_dim, hidden=128, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 2)  # 0=BDF, 1=QSS
        )

    def forward(self, x):
        return self.net(x)

def train_classifier(dataset_path="oracle_dataset.npz",
                     epochs=15,
                     batch_size=512,
                     lr=3e-4,
                     out_path="switcher_classifier.pt"):
    data = np.load(dataset_path)
    X = data["X"].astype(np.float32)
    y = data["y"].astype(np.int64)

    Xtr, Xval, ytr, yval = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SwitcherMLP(in_dim=X.shape[1]).to(device)

    train_ds = TensorDataset(torch.from_numpy(Xtr), torch.from_numpy(ytr))
    val_ds   = TensorDataset(torch.from_numpy(Xval), torch.from_numpy(yval))
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=False)
    val_loader   = DataLoader(val_ds, batch_size=2048, shuffle=False, drop_last=False)

    opt = torch.optim.AdamW(model.parameters(), lr=lr)
    crit = nn.CrossEntropyLoss()

    best_acc = 0.0
    for ep in range(1, epochs+1):
        model.train()
        tr_loss, tr_n = 0.0, 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            logits = model(xb)
            loss = crit(logits, yb)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            tr_loss += loss.item() * xb.size(0)
            tr_n += xb.size(0)
        tr_loss /= tr_n

        # val
        model.eval()
        correct, tot = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                logits = model(xb)
                pred = logits.argmax(dim=1)
                correct += (pred == yb).sum().item()
                tot += xb.size(0)
        acc = correct / tot
        print(f"[clf] ep {ep:02d} | train_loss {tr_loss:.4f} | val_acc {acc:.4f}")

        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), out_path)
            print(f"[clf] saved best to {out_path} (acc={best_acc:.4f})")

    print(f"[clf] done. best val acc={best_acc:.4f}")

if __name__ == "__main__":
    train_classifier()
