In [None]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
from pathlib import Path

# Configuration
SIM_RES = 2500
OUTPUT_DIR = Path('/mnt/home/mlee1/ceph/hydro_replace_fields')
SNAPSHOTS = [29, 31, 33, 35, 38, 41, 43, 46, 49, 52, 56, 59, 63, 67, 71, 76, 80, 85, 90, 96, 99]

In [None]:
# Check which snapshots have profile files
print("Profile File Inventory:")
print("=" * 60)

profile_files = []
for snap in SNAPSHOTS:
    profile_path = OUTPUT_DIR / f'L205n{SIM_RES}TNG' / f'snap{snap:03d}' / 'profiles.h5'
    if profile_path.exists():
        with h5py.File(profile_path, 'r') as f:
            n_halos = f.attrs.get('n_halos', 0)
            keys = list(f.keys())
        print(f"Snap {snap:3d}: ✓ ({n_halos} halos, keys: {', '.join(keys[:5])}...)")
        profile_files.append(snap)
    else:
        print(f"Snap {snap:3d}: ✗")

print(f"\nTotal: {len(profile_files)}/{len(SNAPSHOTS)} snapshots have profiles")

In [None]:
# Load profile data for a specific snapshot
SNAP = 99  # Change as needed

profile_path = OUTPUT_DIR / f'L205n{SIM_RES}TNG' / f'snap{SNAP:03d}' / 'profiles.h5'

if not profile_path.exists():
    print(f"No profile file for snapshot {SNAP}!")
else:
    with h5py.File(profile_path, 'r') as f:
        print(f"Profile file: {profile_path}")
        print(f"\nAttributes:")
        for key, val in f.attrs.items():
            print(f"  {key}: {val}")
        
        print(f"\nDatasets:")
        for key in f.keys():
            ds = f[key]
            print(f"  {key}: {ds.shape} {ds.dtype}")

In [None]:
# Load all profile data
with h5py.File(profile_path, 'r') as f:
    # Radial bins
    if 'radial_bins' in f:
        r_bins = f['radial_bins'][:]
    else:
        # Default bins
        r_bins = np.logspace(-2, 1, 31)
    
    # Halo info
    halo_masses = f['halo_masses'][:]
    halo_positions = f['halo_positions'][:]
    halo_radii = f['halo_radii'][:]
    
    # Profile arrays
    profiles = {}
    for key in ['dmo_profiles', 'hydro_profiles', 'replace_profiles']:
        if key in f:
            profiles[key.replace('_profiles', '')] = f[key][:]
    
    # BCM profiles
    for bcm in ['arico20', 'schneider19', 'schneider25']:
        key = f'bcm_{bcm}_profiles'
        if key in f:
            profiles[f'bcm_{bcm}'] = f[key][:]

print(f"Loaded {len(halo_masses)} halos")
print(f"Radial bins: {len(r_bins)-1} bins from {r_bins[0]:.3f} to {r_bins[-1]:.1f} r/R200")
print(f"\nProfile arrays:")
for name, p in profiles.items():
    n_nonzero = np.sum(np.any(p > 0, axis=1))
    print(f"  {name}: shape {p.shape}, {n_nonzero}/{len(p)} halos with data")

In [None]:
# Profile completeness analysis
r_centers = np.sqrt(r_bins[:-1] * r_bins[1:])

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Completeness by radial bin
ax = axes[0]
for name, p in profiles.items():
    completeness = np.sum(p > 0, axis=0) / len(p) * 100
    ax.semilogx(r_centers, completeness, '-o', label=name, markersize=4, alpha=0.7)

ax.set_xlabel('r / R200')
ax.set_ylabel('Completeness [%]')
ax.set_title('Profile Completeness by Radial Bin')
ax.legend()
ax.axhline(100, color='gray', linestyle='--', alpha=0.5)
ax.set_ylim(0, 105)
ax.grid(True, alpha=0.3)

# Distribution of non-zero bins per halo
ax = axes[1]
for name, p in profiles.items():
    n_nonzero_bins = np.sum(p > 0, axis=1)
    ax.hist(n_nonzero_bins, bins=np.arange(0, len(r_centers)+2)-0.5, 
            alpha=0.5, label=name, density=True)

ax.set_xlabel('Number of non-zero radial bins')
ax.set_ylabel('Density')
ax.set_title('Profile Completeness per Halo')
ax.legend()
ax.axvline(len(r_centers), color='red', linestyle='--', label='All bins')

plt.tight_layout()
plt.show()

In [None]:
# Stack profiles by mass bin
mass_bins = [10**12.5, 10**13.0, 10**13.5, 10**14.0, 10**15.0]
mass_labels = ['12.5-13.0', '13.0-13.5', '13.5-14.0', '>14.0']

def stack_profiles(profile_array, masses, mass_lo, mass_hi):
    """Stack profiles for halos in a mass range."""
    mask = (masses >= mass_lo) & (masses < mass_hi)
    if np.sum(mask) == 0:
        return None, 0
    
    selected = profile_array[mask]
    # Only use halos with complete profiles (all bins nonzero)
    complete_mask = np.all(selected > 0, axis=1)
    if np.sum(complete_mask) == 0:
        # Fall back to median of non-zero values
        median = np.zeros(selected.shape[1])
        for i in range(selected.shape[1]):
            vals = selected[:, i]
            vals = vals[vals > 0]
            if len(vals) > 0:
                median[i] = np.median(vals)
        return median, np.sum(mask)
    
    return np.median(selected[complete_mask], axis=0), np.sum(complete_mask)

# Stack profiles
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, (mass_lo, mass_hi, label) in enumerate(zip(mass_bins[:-1], mass_bins[1:], mass_labels)):
    ax = axes[i]
    
    n_halos = np.sum((halo_masses >= mass_lo) & (halo_masses < mass_hi))
    
    for name, p in profiles.items():
        stacked, n = stack_profiles(p, halo_masses, mass_lo, mass_hi)
        if stacked is not None and np.any(stacked > 0):
            ax.loglog(r_centers, stacked, '-', label=f'{name} ({n})', alpha=0.8)
    
    ax.set_xlabel('r / R200')
    ax.set_ylabel('Density [Msun/Mpc^3]')
    ax.set_title(f'log10(M) = {label} ({n_halos} halos)')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)

plt.suptitle(f'Stacked Density Profiles - Snap {SNAP}', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Ratio profiles (Hydro/DMO, Replace/DMO, BCM/DMO)
if 'dmo' in profiles and len(profiles) > 1:
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for i, (mass_lo, mass_hi, label) in enumerate(zip(mass_bins[:-1], mass_bins[1:], mass_labels)):
        ax = axes[i]
        
        dmo_stacked, n_dmo = stack_profiles(profiles['dmo'], halo_masses, mass_lo, mass_hi)
        
        if dmo_stacked is not None and np.any(dmo_stacked > 0):
            for name, p in profiles.items():
                if name == 'dmo':
                    continue
                
                stacked, n = stack_profiles(p, halo_masses, mass_lo, mass_hi)
                if stacked is not None and np.any(stacked > 0):
                    ratio = np.ones_like(stacked)
                    mask = dmo_stacked > 0
                    ratio[mask] = stacked[mask] / dmo_stacked[mask]
                    ax.semilogx(r_centers, ratio, '-', label=name, alpha=0.8)
        
        ax.axhline(1.0, color='gray', linestyle='--', alpha=0.5)
        ax.set_xlabel('r / R200')
        ax.set_ylabel('Ratio to DMO')
        ax.set_title(f'log10(M) = {label}')
        ax.legend(fontsize=8)
        ax.set_ylim(0.5, 2.0)
        ax.grid(True, alpha=0.3)
    
    plt.suptitle(f'Profile Ratios to DMO - Snap {SNAP}', fontsize=14)
    plt.tight_layout()
    plt.show()
else:
    print("Need DMO profiles to compute ratios")

In [None]:
# Individual halo examples
# Find halos with complete profiles
if 'dmo' in profiles:
    dmo_complete = np.all(profiles['dmo'] > 0, axis=1)
    complete_idx = np.where(dmo_complete)[0]
    
    print(f"Halos with complete DMO profiles: {len(complete_idx)}")
    
    if len(complete_idx) >= 4:
        # Pick 4 random halos with complete profiles
        np.random.seed(42)
        sample_idx = np.random.choice(complete_idx, 4, replace=False)
        
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        axes = axes.flatten()
        
        for i, idx in enumerate(sample_idx):
            ax = axes[i]
            
            for name, p in profiles.items():
                prof = p[idx]
                if np.any(prof > 0):
                    ax.loglog(r_centers, prof, '-', label=name, alpha=0.8)
            
            ax.set_xlabel('r / R200')
            ax.set_ylabel('Density [Msun/Mpc^3]')
            ax.set_title(f'Halo {idx}: log10(M) = {np.log10(halo_masses[idx]):.2f}')
            ax.legend(fontsize=8)
            ax.grid(True, alpha=0.3)
        
        plt.suptitle(f'Individual Halo Profiles - Snap {SNAP}', fontsize=14)
        plt.tight_layout()
        plt.show()
    else:
        print("Not enough halos with complete profiles")

In [None]:
# Summary statistics
print("=" * 70)
print(f"PROFILE SUMMARY - Snapshot {SNAP}")
print("=" * 70)

print(f"\nTotal halos: {len(halo_masses)}")
print(f"Mass range: {np.log10(halo_masses.min()):.2f} - {np.log10(halo_masses.max()):.2f}")
print(f"Radial bins: {len(r_centers)} from {r_centers[0]:.3f} to {r_centers[-1]:.2f} r/R200")

print("\nProfile completeness:")
for name, p in profiles.items():
    n_complete = np.sum(np.all(p > 0, axis=1))
    n_partial = np.sum(np.any(p > 0, axis=1)) - n_complete
    n_empty = np.sum(~np.any(p > 0, axis=1))
    print(f"  {name}: {n_complete} complete, {n_partial} partial, {n_empty} empty")

print("\nHalos by mass bin:")
for mass_lo, mass_hi, label in zip(mass_bins[:-1], mass_bins[1:], mass_labels):
    n = np.sum((halo_masses >= mass_lo) & (halo_masses < mass_hi))
    print(f"  log10(M) = {label}: {n}")