# File Naming / Saving / Loading Integration Test

This notebook programmatically tests the qspectro2d data pipeline:

1. Construct a minimal dummy simulation context (no expensive dynamics).
2. Save multiple 1D dummy signals with different `inhom_index` values -> verify unique, deterministic filenames.
3. Save an *averaged* inhomogeneous result (`inhom_averaged=True`) and check filename suffix.
5. Load every saved file using `load_simulation_data` and validate metadata + array shapes.
6. Summarize results and (optionally) clean up.

> The notebook avoids running the real solver; it only exercises file naming and I/O paths.

In [1]:
# Imports & setup
import numpy as np
from pathlib import Path
from datetime import datetime

from qspectro2d.core.simulation.sim_config import SimulationConfig
from qspectro2d.utils.data_io import save_simulation_data, load_simulation_data
from qspectro2d.utils.file_naming import generate_unique_data_filename
from thesis_paths import DATA_DIR, ensure_project_directories

ensure_project_directories()
print('Data root:', DATA_DIR)

Data root: C:\Users\leopo\.vscode\thesis_python\data


In [2]:
# Dummy object stubs (minimal attributes + to_dict)
class DummySystem:
    def __init__(self, n_atoms=1):
        self.n_atoms = n_atoms
        self.n_chains = 1
        self.n_rings = None
        self.coupling_cm = 0.0
        self.frequencies_cm = [16000.0]
    def to_dict(self):
        return {
            'n_atoms': self.n_atoms,
            'n_chains': self.n_chains,
            'n_rings': self.n_rings,
            'coupling_cm': self.coupling_cm,
        }

class DummyLaser:
    def __init__(self):
        self.base_amplitude = 0.01
        self.carrier_freq_cm = 16000.0
        self.pulse_fwhm_fs = 15.0
    def to_dict(self):
        return {
            'base_amplitude': self.base_amplitude,
            'carrier_freq_cm': self.carrier_freq_cm,
            'pulse_fwhm_fs': self.pulse_fwhm_fs,
        }

class DummyBath:
    def __init__(self):
        self.tag = 'ohmic'
    # Pickle is fine with simple objects; no to_dict required for saving.

class DummySimModule:
    def __init__(self, sim_config):
        self.system = DummySystem()
        self.laser = DummyLaser()
        self.bath = DummyBath()
        self.simulation_config = sim_config

# Helper for pretty printing base names
def stem(path):
    return Path(path).stem

In [3]:
# Create a working subdirectory under data for this test run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
test_dir = Path(DATA_DIR) / 'io_naming_tests' / timestamp
test_dir.mkdir(parents=True, exist_ok=True)
print('Test directory:', test_dir)

Test directory: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432


In [4]:
# Base SimulationConfig (we only vary inhom_index & averaged flags).
base_cfg = SimulationConfig(
    ode_solver='ME',
    dt=0.1,
    t_coh=50.0,
    t_wait=0.0,
    t_det_max=100.0,
    n_phases=4,
    n_inhomogen=4,
    sim_type='1d',
    signal_types=['rephasing'],
)
print(base_cfg.summary())

SimulationConfig Summary:
-------------------------------
1d ELECTRONIC SPECTROSCOPY SIMULATION
Signal Type        : ['rephasing']
Time Parameters:
Coherence Time     : 50.0 fs
Wait Time          : 0.0 fs
Max Det. Time      : 100.0 fs

Time Step (dt)     : 0.1 fs
-------------------------------
Solver Type        : ME
Use rwa_sl         : True

-------------------------------
Phase Cycles       : 4
Inhom. Points      : 4
Inhom Enabled      : False
Inhom Averaged     : False
Inhom Index        : 0
Max Workers        : 1
-------------------------------



In [5]:
# 1. Save raw inhomogeneous configurations (inhom_index = 0..3)
raw_paths = []
for idx in range(base_cfg.n_inhomogen):
    cfg = SimulationConfig(**{**base_cfg.to_dict(), 'inhom_index': idx, 'inhom_enabled': True})
    sim = DummySimModule(cfg)
    # Dummy 1D signal: small sinusoid shifted by index
    t_det = np.linspace(0, 100, 256)
    data = np.sin(0.1 * t_det + idx) * np.exp(-t_det/120)
    meta = {
        'signal_types': cfg.signal_types,
        't_coh_value': cfg.t_coh,
        'inhom_enabled': True,
        'inhom_averaged': False,
    }
    out = save_simulation_data(sim, meta, [data], t_det, data_root=test_dir)
    raw_paths.append(out)

print('Raw saved stems:')
for p in raw_paths:
    print('  ', stem(p))
assert len(set(map(stem, raw_paths))) == len(raw_paths), 'Filename collision detected for raw inhom configs.'

Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_info.pkl
Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_001_info.pkl
Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_002_info.pkl
Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_003_info.pkl
Raw saved stems:
   1d_t_coh_50_inhom_000_data
   1d_t_coh_50_inhom_001_data
   1d_t_coh_50_inhom_002_data
   1d_t_coh_50_inhom_003_data


In [6]:
# 2. Save an averaged inhomogeneous result (prefix style now)
avg_cfg = SimulationConfig(**{**base_cfg.to_dict(), 'inhom_index': 0, 'inhom_enabled': True, 'inhom_averaged': True})
sim_avg = DummySimModule(avg_cfg)
t_det = np.linspace(0, 100, 256)
avg_data = np.mean([load_simulation_data(p)['rephasing'] for p in raw_paths], axis=0)
meta_avg = {
    'signal_types': avg_cfg.signal_types,
    't_coh_value': avg_cfg.t_coh,
    'inhom_enabled': True,
    'inhom_averaged': True,
}
avg_path = save_simulation_data(sim_avg, meta_avg, [avg_data], t_det, data_root=test_dir)
print('Averaged stem:', stem(avg_path))
# New expectation: prefix starts with 'inhom_avg_'
assert stem(avg_path).startswith('1d_inhom_avg_'), 'Averaged filename missing inhom_avg prefix.'

Loading data bundle: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loading data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loaded data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loading info: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_info.pkl
Loaded info: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_info.pkl
Loading data bundle: C:\Users\leopo\.vscode\thesis_python\data\io_naming_te

In [7]:
# 3. Save a dummy 2D stacked dataset (now expects no tcoh_avg tag unless explicitly set)
cfg_2d = SimulationConfig(**{**base_cfg.to_dict(), 'sim_type': '2d', 'inhom_index': 0})
sim_2d = DummySimModule(cfg_2d)

# NOTE: We do NOT set t_coh_averaged=True anymore unless stacking/averaging over t_coh values.
# This keeps naming deterministic with current config.

t_det = np.linspace(0, 100, 128)
t_coh = np.linspace(0, 60, 11)
# Create a separable 2D dummy surface
det_part = np.exp(-t_det/80)
coh_part = np.exp(-t_coh/50)
data2d = (coh_part[:, None] * det_part[None, :])
meta_2d = {
    'signal_types': cfg_2d.signal_types,
    'inhom_enabled': False,
    'inhom_averaged': False,
}
path_2d = save_simulation_data(sim_2d, meta_2d, [data2d], t_det, t_coh=t_coh, data_root=test_dir)
print('2D stem:', stem(path_2d))
assert stem(path_2d).startswith('2d_'), '2D filename should start with sim_type prefix 2d_.'

Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_t_coh_50_inhom_000_info.pkl
2D stem: 2d_t_coh_50_inhom_000_data


In [8]:
# 4. Load & validate all saved files
all_paths = raw_paths + [avg_path, path_2d]
report = []
for p in all_paths:
    b = load_simulation_data(p)
    sim_cfg = b['sim_config']
    is_2d = sim_cfg.sim_type == '2d'
    if is_2d:
        arr = b['rephasing']
        assert arr.shape == (len(b['t_coh']), len(b['t_det']))
    else:
        arr = b['rephasing']
        assert arr.shape == (len(b['t_det']),)
    report.append({
        'stem': stem(p),
        'sim_type': sim_cfg.sim_type,
        'inhom_index': sim_cfg.inhom_index,
        'inhom_averaged': sim_cfg.inhom_averaged,
        'shape': arr.shape,
    })

report

Loading data bundle: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loading data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loaded data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_data.npz
Loading info: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_info.pkl
Loaded info: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\1d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\1d_t_coh_50_inhom_000_info.pkl
Loading data bundle: C:\Users\leopo\.vscode\thesis_python\data\io_naming_te

[{'stem': '1d_t_coh_50_inhom_000_data',
  'sim_type': '1d',
  'inhom_index': 0,
  'inhom_averaged': False,
  'shape': (256,)},
 {'stem': '1d_t_coh_50_inhom_001_data',
  'sim_type': '1d',
  'inhom_index': 1,
  'inhom_averaged': False,
  'shape': (256,)},
 {'stem': '1d_t_coh_50_inhom_002_data',
  'sim_type': '1d',
  'inhom_index': 2,
  'inhom_averaged': False,
  'shape': (256,)},
 {'stem': '1d_t_coh_50_inhom_003_data',
  'sim_type': '1d',
  'inhom_index': 3,
  'inhom_averaged': False,
  'shape': (256,)},
 {'stem': '1d_inhom_avg_t_coh_50_inhom_000_data',
  'sim_type': '1d',
  'inhom_index': 0,
  'inhom_averaged': True,
  'shape': (256,)},
 {'stem': '2d_t_coh_50_inhom_000_data',
  'sim_type': '2d',
  'inhom_index': 0,
  'inhom_averaged': False,
  'shape': (11, 128)}]

In [9]:
# 5. (Optional) Clean up test artifacts.
# Set to True to delete created files after inspection.
CLEANUP = False
if CLEANUP:
    import shutil
    shutil.rmtree(test_dir, ignore_errors=True)
    print('Removed test directory.')
else:
    print('Kept test data at:', test_dir)

Kept test data at: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432


In [10]:
# 6. Collision fallback test (enumeration now ALWAYS on collision; no collision_mode argument)
# Note: Files are saved in nested subfolders under collision_dir, so we search recursively.

collision_dir = test_dir / 'collision_case'
collision_dir.mkdir(exist_ok=True)

col_cfg = SimulationConfig(
    ode_solver='ME', dt=0.1, t_coh=33.3, t_wait=0.0, t_det_max=80.0,
    n_phases=4, n_inhomogen=1, sim_type='1d', signal_types=['rephasing'], inhom_index=0
)
col_sim = DummySimModule(col_cfg)

col_t_det = np.linspace(0, 80, 64)
col_data  = np.cos(0.05 * col_t_det)
meta_col = {
    'signal_types': col_cfg.signal_types,
    't_coh_value': col_cfg.t_coh,
    'inhom_enabled': False,
    'inhom_averaged': False,
}

# First save (base _data variant)
first_path  = save_simulation_data(col_sim, meta_col, [col_data], col_t_det, data_root=collision_dir)
# Second save (should auto-enumerate on _data stem -> _data_1)
second_path = save_simulation_data(col_sim, meta_col, [col_data], col_t_det, data_root=collision_dir)

npz_paths = sorted(p for p in collision_dir.rglob('*_data*.npz'))
print('Found data files (relative to collision_dir):')
for p in npz_paths:
    try:
        rel = p.relative_to(collision_dir)
    except ValueError:
        rel = p
    print('  ', rel)

stems = [p.name for p in npz_paths]  # include extension for clarity now
# Expect enumerated variant pattern: base_data_1.npz (or base_data_2, etc.)
base_enumerated_ok = any(s.endswith('_data_1.npz') or s.endswith('_data_2.npz') or s.endswith('_data_3.npz') for s in stems)
assert base_enumerated_ok, 'Missing enumerated base_data_<N>.npz variant among: ' + ', '.join(stems)

stems

Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\collision_case\1d_spectroscopy\1_atoms\ME\RWA\t_dm80.0_t_wait_0.0_dt_0.1\1d_t_coh_33.3_inhom_000_info.pkl
Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\collision_case\1d_spectroscopy\1_atoms\ME\RWA\t_dm80.0_t_wait_0.0_dt_0.1\1d_t_coh_33.3_inhom_000_info_1.pkl
Found data files (relative to collision_dir):
   1d_spectroscopy\1_atoms\ME\RWA\t_dm80.0_t_wait_0.0_dt_0.1\1d_t_coh_33.3_inhom_000_data.npz
   1d_spectroscopy\1_atoms\ME\RWA\t_dm80.0_t_wait_0.0_dt_0.1\1d_t_coh_33.3_inhom_000_data_1.npz


['1d_t_coh_33.3_inhom_000_data.npz', '1d_t_coh_33.3_inhom_000_data_1.npz']

In [11]:
# 7. Test prefix ordering for averaged outputs (updated expectations)
from pathlib import Path

avg_stem   = stem(avg_path)
stack_stem = stem(path_2d)

print('Averaged 1D stem: ', avg_stem)
print('Stacked  2D stem: ', stack_stem)

# Expect the new prefix style for averaged variants
assert avg_stem.startswith('1d_inhom_avg_'), (
    f"Expected inhom_avg_ prefix after flip, got: {avg_stem}"
)
assert 't_coh_' in avg_stem, 'Core t_coh segment missing in averaged 1D stem.'

# 2D dataset (non-averaged over t_coh) should just start with '2d_'
assert stack_stem.startswith('2d_'), (
    f"Expected 2d_ prefix for 2D dataset, got: {stack_stem}"
)
assert 't_coh_' in stack_stem, 'Core t_coh segment missing in 2D stacked stem.'

# Ensure no legacy suffix style remains (best-effort check)
legacy_suffix = avg_stem.endswith('_inhom_avg') or stack_stem.endswith('_tcoh_avg')
assert not legacy_suffix, 'Detected legacy suffix style in stems (should be prefix style).'

print('✅ Prefix ordering for averaged filenames verified.')

Averaged 1D stem:  1d_inhom_avg_t_coh_50_inhom_000_data
Stacked  2D stem:  2d_t_coh_50_inhom_000_data
✅ Prefix ordering for averaged filenames verified.


In [12]:
# 8. Both inhomogeneous and t_coh averaged (combined prefix) – UPDATED CONFIG
# Create a 2D dataset averaged over inhom configs AND representing stacked coherence delays.

cfg_both = SimulationConfig(**{
    **base_cfg.to_dict(),
    'sim_type': '2d',
    'inhom_index': 0,          # canonical index for averaged
    'inhom_enabled': True,
    'inhom_averaged': True,
    # t_coh_averaged flag has been deprecated from naming; combined averaging reflected only by inhom_avg prefix
})

sim_both = DummySimModule(cfg_both)

# Axes
both_t_det = np.linspace(0, 120, 90)
both_t_coh = np.linspace(0, 70, 9)

# Dummy separable 2D surface again (slightly different params to distinguish)
both_surface = np.exp(-both_t_coh[:, None]/60) * np.cos(0.07 * both_t_det)[None, :]

meta_both = {
    'signal_types': cfg_both.signal_types,
    'inhom_enabled': True,
    'inhom_averaged': True,
}

both_path = save_simulation_data(
    sim_both,
    meta_both,
    [both_surface],
    both_t_det,
    t_coh=both_t_coh,
    data_root=test_dir,
)

both_stem = stem(both_path)
print('Combined averaged stem:', both_stem)

# Assertions for prefix ordering
assert both_stem.startswith('2d_inhom_avg_'), (
    f"Expected combined averaged prefix 'inhom_avg_' but got: {both_stem}"
)
assert 't_coh_' in both_stem, 'Missing core t_coh segment in combined averaged stem.'

# Ensure no legacy trailing suffix style
assert not both_stem.endswith('_inhom_avg'), (
    'Legacy suffix style detected in combined averaged stem.'
)

# Basic shape validation from re-load
bundle_both = load_simulation_data(both_path)
arr_both = bundle_both['rephasing']
assert arr_both.shape == (len(both_t_coh), len(both_t_det)), 'Unexpected array shape for combined averaged 2D data.'

print('✅ Combined averaged (inhom + coherence stack) filename and data shape validated.')

Info saved: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_inhom_avg_t_coh_50_inhom_000_info.pkl
Combined averaged stem: 2d_inhom_avg_t_coh_50_inhom_000_data
Loading data bundle: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_inhom_avg_t_coh_50_inhom_000_data.npz
Loading data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_inhom_avg_t_coh_50_inhom_000_data.npz
Loaded data: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_inhom_avg_t_coh_50_inhom_000_data.npz
Loading info: C:\Users\leopo\.vscode\thesis_python\data\io_naming_tests\20250927_094432\2d_spectroscopy\1_atoms\ME\RWA\inhom\t_dm100.0_t_wait_0.0_dt_0.1\2d_inh