In [None]:
import subprocess
import os
from pathlib import Path
from datetime import datetime
import glob

# Configuration
OUTPUT_DIR = Path('/mnt/home/mlee1/ceph/hydro_replace_fields')
LENSPLANE_DIR = Path('/mnt/home/mlee1/ceph/hydro_replace_lensplanes')
LUX_OUT_DIR = Path('/mnt/home/mlee1/ceph/lux_out')
LOG_DIR = Path('/mnt/home/mlee1/hydro_replace2/logs')
SIM_RES = 2500

SNAPSHOTS = [29, 31, 33, 35, 38, 41, 43, 46, 49, 52, 56, 59, 63, 67, 71, 76, 80, 85, 90, 96, 99]
MASS_THRESHOLDS = ['Mgt12.5', 'Mgt13.0', 'Mgt13.5', 'Mgt14.0']
BCM_MODELS = ['arico20', 'schneider19', 'schneider25']

print(f"Pipeline Monitor - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)

In [None]:
# Check SLURM queue
print("\nüìã SLURM Job Status:")
print("-" * 60)
result = subprocess.run(['squeue', '-u', os.environ.get('USER', 'mlee1'), 
                         '--format=%.10i %.9P %.20j %.8u %.2t %.10M %.6D %R'],
                        capture_output=True, text=True)
print(result.stdout)

In [None]:
# Check matches progress
print("\nüìÅ Matches Progress:")
print("-" * 60)

matches_dir = OUTPUT_DIR / f'L205n{SIM_RES}TNG' / 'matches'
if matches_dir.exists():
    match_files = sorted(matches_dir.glob('matches_snap*.npz'))
    existing_snaps = set(int(f.stem.split('snap')[1]) for f in match_files)
    
    n_have = len(existing_snaps)
    n_need = len(SNAPSHOTS)
    missing = set(SNAPSHOTS) - existing_snaps
    
    print(f"Progress: {n_have}/{n_need} ({100*n_have/n_need:.1f}%)")
    if missing:
        print(f"Missing: {sorted(missing)}")
    else:
        print("‚úì All snapshots have matches!")
else:
    print("Matches directory not found!")

In [None]:
# Check 2D maps progress
print("\nüìÅ 2D Maps Progress:")
print("-" * 60)

# Expected files per snapshot: dmo, hydro, 4 replace, 12 bcm = 18 total
# But dmo and hydro are generated once, not per mass threshold
# So: dmo (1) + hydro (1) + replace (4) + bcm (12) = 18

map_status = []
for snap in SNAPSHOTS:
    snap_dir = OUTPUT_DIR / f'L205n{SIM_RES}TNG' / f'snap{snap:03d}' / 'projected'
    
    status = {
        'snap': snap,
        'dmo': False,
        'hydro': False,
        'replace': 0,
        'bcm': 0
    }
    
    if snap_dir.exists():
        files = list(snap_dir.glob('*.npz'))
        file_names = [f.name for f in files]
        
        status['dmo'] = 'dmo.npz' in file_names
        status['hydro'] = 'hydro.npz' in file_names
        status['replace'] = sum(1 for f in file_names if f.startswith('replace_'))
        status['bcm'] = sum(1 for f in file_names if f.startswith('bcm_'))
    
    map_status.append(status)

# Summary table
print(f"{'Snap':>5} {'DMO':>5} {'Hydro':>6} {'Replace':>8} {'BCM':>5} {'Total':>6}")
print("-" * 42)
for s in map_status:
    dmo = '‚úì' if s['dmo'] else '‚úó'
    hydro = '‚úì' if s['hydro'] else '‚úó'
    total = (1 if s['dmo'] else 0) + (1 if s['hydro'] else 0) + s['replace'] + s['bcm']
    print(f"{s['snap']:>5} {dmo:>5} {hydro:>6} {s['replace']:>5}/4  {s['bcm']:>3}/12  {total:>3}/18")

# Overall progress
total_files = sum((1 if s['dmo'] else 0) + (1 if s['hydro'] else 0) + s['replace'] + s['bcm'] for s in map_status)
expected_files = len(SNAPSHOTS) * 18
print(f"\nTotal: {total_files}/{expected_files} files ({100*total_files/expected_files:.1f}%)")

In [None]:
# Check profiles progress (both new and old format)
print("\nüìÅ Profiles Progress:")
print("-" * 60)

sim_dir = OUTPUT_DIR / f'L205n{SIM_RES}TNG'

# New format: profiles_spherical_snap*.h5, profiles_bcm_snap*.h5
spherical_files = sorted(sim_dir.glob('profiles_spherical_snap*.h5'))
bcm_profile_files = sorted(sim_dir.glob('profiles_bcm_snap*.h5'))

# Old format: snap*/profiles.h5
profile_snaps_old = []
for snap in SNAPSHOTS:
    profile_path = sim_dir / f'snap{snap:03d}' / 'profiles.h5'
    if profile_path.exists():
        profile_snaps_old.append(snap)

# Extract snapshot numbers from new format
spherical_snaps = set()
for f in spherical_files:
    try:
        snap = int(f.stem.split('snap')[1])
        spherical_snaps.add(snap)
    except:
        pass

bcm_snaps = set()
for f in bcm_profile_files:
    try:
        snap = int(f.stem.split('snap')[1])
        bcm_snaps.add(snap)
    except:
        pass

print(f"New format (spherical DMO+Hydro): {len(spherical_snaps)}/{len(SNAPSHOTS)} snapshots")
if spherical_snaps:
    print(f"  Available: {sorted(spherical_snaps)}")

print(f"New format (BCM profiles): {len(bcm_snaps)}/{len(SNAPSHOTS)} snapshots")
if bcm_snaps:
    print(f"  Available: {sorted(bcm_snaps)}")

print(f"Old format (snap*/profiles.h5): {len(profile_snaps_old)}/{len(SNAPSHOTS)} snapshots")
if profile_snaps_old:
    print(f"  Available: {profile_snaps_old}")

# For overall tracking
profile_snaps = spherical_snaps | bcm_snaps | set(profile_snaps_old)
print(f"\nTotal unique snapshots with profiles: {len(profile_snaps)}/{len(SNAPSHOTS)}")

In [None]:
# Check lens planes progress
print("\nüìÅ Lens Planes Progress:")
print("-" * 60)

if LENSPLANE_DIR.exists():
    for d in sorted(LENSPLANE_DIR.iterdir()):
        if d.is_dir():
            n_files = len(list(d.glob('snap*.bin')))
            config_ok = '‚úì' if (d / 'config.dat').exists() else '‚úó'
            print(f"{d.name:<45} {n_files:>3} files, config: {config_ok}")
else:
    print("Lens plane directory not found")

In [None]:
# Check ray-tracing progress
print("\nüìÅ Ray-Tracing Progress:")
print("-" * 60)

if LUX_OUT_DIR.exists():
    for d in sorted(LUX_OUT_DIR.iterdir()):
        if d.is_dir():
            n_conv = len(list(d.glob('convergence_*.bin')))
            print(f"{d.name:<45} {n_conv:>3} convergence maps")
else:
    print("Lux output directory not found")

In [None]:
# Check recent log files for errors
print("\nüìã Recent Log Errors:")
print("-" * 60)

# Find recent .e (error) files
error_files = sorted(LOG_DIR.glob('*.e*'), key=lambda x: x.stat().st_mtime, reverse=True)[:5]

for ef in error_files:
    size = ef.stat().st_size
    if size > 0:
        print(f"\n{ef.name} ({size} bytes):")
        with open(ef, 'r') as f:
            lines = f.readlines()
            # Show last 5 lines or all if fewer
            for line in lines[-5:]:
                print(f"  {line.rstrip()}")
    else:
        print(f"{ef.name}: empty (no errors)")

In [None]:
# Disk usage
print("\nüíæ Disk Usage:")
print("-" * 60)

for name, path in [
    ('2D Maps', OUTPUT_DIR / f'L205n{SIM_RES}TNG'),
    ('Lens Planes', LENSPLANE_DIR),
    ('Lux Output', LUX_OUT_DIR)
]:
    if path.exists():
        result = subprocess.run(['du', '-sh', str(path)], capture_output=True, text=True)
        size = result.stdout.split()[0]
        print(f"{name:<20} {size}")
    else:
        print(f"{name:<20} (not found)")

In [None]:
# Summary
print("\n" + "=" * 60)
print("PIPELINE SUMMARY")
print("=" * 60)

# Matches
matches_complete = len(existing_snaps) if 'existing_snaps' in dir() else 0
print(f"Matches:        {matches_complete}/{len(SNAPSHOTS)} ({100*matches_complete/len(SNAPSHOTS):.0f}%)")

# Maps
print(f"2D Maps:        {total_files}/{expected_files} ({100*total_files/expected_files:.0f}%)")

# Profiles (new format)
n_profile_snaps = len(profile_snaps) if 'profile_snaps' in dir() else 0
print(f"Profiles:       {n_profile_snaps}/{len(SNAPSHOTS)} ({100*n_profile_snaps/len(SNAPSHOTS):.0f}%)")
print(f"  - Spherical:  {len(spherical_snaps) if 'spherical_snaps' in dir() else 0}")
print(f"  - BCM:        {len(bcm_snaps) if 'bcm_snaps' in dir() else 0}")

# Lens planes (count directories with files)
if LENSPLANE_DIR.exists():
    lp_dirs_complete = sum(1 for d in LENSPLANE_DIR.iterdir() 
                          if d.is_dir() and list(d.glob('snap*.bin')))
    print(f"Lens Planes:    {lp_dirs_complete} model directories")
else:
    lp_dirs_complete = 0
    print(f"Lens Planes:    0 model directories")

# Ray-tracing
if LUX_OUT_DIR.exists():
    rt_dirs_complete = sum(1 for d in LUX_OUT_DIR.iterdir() 
                          if d.is_dir() and list(d.glob('convergence_*.bin')))
    print(f"Ray-Tracing:    {rt_dirs_complete} model directories")
else:
    print(f"Ray-Tracing:    0 model directories")

print(f"\nLast checked: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")