In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys
sys.path.insert(0, '/mnt/home/mlee1/hydro_replace2/scripts')

from illustris_python import groupcat

# Configuration
SIM_RES = 2500
OUTPUT_DIR = Path('/mnt/home/mlee1/ceph/hydro_replace_fields')
DMO_PATH = f'/mnt/sdceph/users/sgenel/IllustrisTNG/L205n{SIM_RES}TNG_DM/output'
HYDRO_PATH = f'/mnt/sdceph/users/sgenel/IllustrisTNG/L205n{SIM_RES}TNG/output'

# Mass unit: 10^10 Msun/h -> Msun/h
MASS_UNIT = 1e10

In [None]:
# Check which snapshots have matches
matches_dir = OUTPUT_DIR / f'L205n{SIM_RES}TNG' / 'matches'
match_files = sorted(matches_dir.glob('matches_snap*.npz'))

print(f"Found {len(match_files)} match files:")
for f in match_files:
    snap = int(f.stem.split('snap')[1])
    data = np.load(f)
    print(f"  Snap {snap:3d}: {len(data['dmo_indices']):6d} matched pairs")

In [None]:
# Analyze a specific snapshot
SNAP = 99  # Change to analyze different snapshot

# Load matches
matches = np.load(matches_dir / f'matches_snap{SNAP:03d}.npz')
dmo_idx = matches['dmo_indices']
hydro_idx = matches['hydro_indices']

print(f"Snapshot {SNAP}:")
print(f"  Matched pairs: {len(dmo_idx)}")

In [None]:
# Load halo catalogs
halo_dmo = groupcat.loadHalos(
    DMO_PATH, SNAP,
    fields=['Group_M_Crit200', 'Group_R_Crit200', 'GroupPos']
)
halo_hydro = groupcat.loadHalos(
    HYDRO_PATH, SNAP,
    fields=['Group_M_Crit200', 'Group_R_Crit200', 'GroupPos']
)

print(f"DMO halos: {halo_dmo['count']}")
print(f"Hydro halos: {halo_hydro['count']}")
print(f"Matched: {len(dmo_idx)} ({100*len(dmo_idx)/min(halo_dmo['count'], halo_hydro['count']):.1f}%)")

In [None]:
# Get masses and positions of matched halos
dmo_masses = halo_dmo['Group_M_Crit200'][dmo_idx] * MASS_UNIT
hydro_masses = halo_hydro['Group_M_Crit200'][hydro_idx] * MASS_UNIT

dmo_pos = halo_dmo['GroupPos'][dmo_idx] / 1e3  # kpc/h -> Mpc/h
hydro_pos = halo_hydro['GroupPos'][hydro_idx] / 1e3

# Mass ratio
mass_ratio = hydro_masses / dmo_masses

# Position offset (with periodic boundary handling)
BOX_SIZE = 205.0  # Mpc/h
pos_diff = hydro_pos - dmo_pos
pos_diff = np.where(pos_diff > BOX_SIZE/2, pos_diff - BOX_SIZE, pos_diff)
pos_diff = np.where(pos_diff < -BOX_SIZE/2, pos_diff + BOX_SIZE, pos_diff)
pos_offset = np.linalg.norm(pos_diff, axis=1)

In [None]:
# Plot mass ratio distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Mass ratio histogram
ax = axes[0]
ax.hist(mass_ratio, bins=50, range=(0.5, 1.5), edgecolor='black', alpha=0.7)
ax.axvline(1.0, color='red', linestyle='--', label='Perfect match')
ax.axvline(np.median(mass_ratio), color='green', linestyle='-', label=f'Median: {np.median(mass_ratio):.3f}')
ax.set_xlabel('M_hydro / M_dmo')
ax.set_ylabel('Count')
ax.set_title('Mass Ratio Distribution')
ax.legend()

# Position offset histogram
ax = axes[1]
ax.hist(pos_offset * 1000, bins=50, range=(0, 500), edgecolor='black', alpha=0.7)  # kpc/h
ax.axvline(np.median(pos_offset) * 1000, color='green', linestyle='-', 
           label=f'Median: {np.median(pos_offset)*1000:.1f} kpc/h')
ax.set_xlabel('Position offset [kpc/h]')
ax.set_ylabel('Count')
ax.set_title('Position Offset Distribution')
ax.legend()

# Mass ratio vs DMO mass
ax = axes[2]
ax.scatter(np.log10(dmo_masses), mass_ratio, alpha=0.3, s=1)
ax.axhline(1.0, color='red', linestyle='--')
ax.set_xlabel('log10(M_dmo) [Msun/h]')
ax.set_ylabel('M_hydro / M_dmo')
ax.set_title('Mass Ratio vs DMO Mass')
ax.set_ylim(0.5, 1.5)

plt.tight_layout()
plt.show()

In [None]:
# Matching completeness by mass bin
all_dmo_masses = halo_dmo['Group_M_Crit200'] * MASS_UNIT
mass_bins = np.logspace(10, 15, 20)

total_counts, _ = np.histogram(all_dmo_masses, bins=mass_bins)
matched_counts, _ = np.histogram(dmo_masses, bins=mass_bins)

completeness = np.where(total_counts > 0, matched_counts / total_counts, 0)
bin_centers = np.sqrt(mass_bins[:-1] * mass_bins[1:])

fig, ax = plt.subplots(figsize=(10, 5))
ax.semilogx(bin_centers, completeness * 100, 'o-', markersize=8)
ax.axhline(100, color='gray', linestyle='--', alpha=0.5)
ax.set_xlabel('Halo Mass [Msun/h]')
ax.set_ylabel('Matching Completeness [%]')
ax.set_title(f'Halo Matching Completeness (Snap {SNAP})')
ax.set_ylim(0, 105)
ax.grid(True, alpha=0.3)

# Add vertical lines for mass thresholds
for log_m, label in [(12.5, '12.5'), (13.0, '13.0'), (13.5, '13.5'), (14.0, '14.0')]:
    ax.axvline(10**log_m, color='red', linestyle=':', alpha=0.5)
    ax.text(10**log_m, 102, f'$10^{{{label}}}$', ha='center', fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
print("=" * 60)
print(f"MATCHING SUMMARY - Snapshot {SNAP}")
print("=" * 60)
print(f"Total DMO halos:       {halo_dmo['count']:,}")
print(f"Total Hydro halos:     {halo_hydro['count']:,}")
print(f"Matched pairs:         {len(dmo_idx):,}")
print()
print("Mass Ratio (Hydro/DMO):")
print(f"  Mean:   {np.mean(mass_ratio):.4f}")
print(f"  Median: {np.median(mass_ratio):.4f}")
print(f"  Std:    {np.std(mass_ratio):.4f}")
print()
print("Position Offset [kpc/h]:")
print(f"  Mean:   {np.mean(pos_offset)*1000:.2f}")
print(f"  Median: {np.median(pos_offset)*1000:.2f}")
print(f"  Max:    {np.max(pos_offset)*1000:.2f}")
print()
print("Halos above mass thresholds:")
for log_m in [12.5, 13.0, 13.5, 14.0]:
    n_total = np.sum(all_dmo_masses >= 10**log_m)
    n_matched = np.sum(dmo_masses >= 10**log_m)
    pct = 100 * n_matched / n_total if n_total > 0 else 0
    print(f"  M > 10^{log_m}: {n_matched:5d} / {n_total:5d} ({pct:.1f}%)")