## 1. Import Libraries

In [6]:
# Add package to path
import sys
import numpy as np
import matplotlib.pyplot as plt
from galform_analysis.config import get_base_dir, get_snapshot_redshift
from galform_analysis.analysis import aggregate_snapshot
from pathlib import Path
project_root = Path.cwd().parent
if str(project_root / 'src') not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))



plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

## 2. Define Helper Function

Create a function to compute efficiency statistics in mass bins.

In [7]:
def compute_efficiency_vs_mass(agg_data, mass_bins):
    """Compute median efficiency in halo mass bins."""
    mstar = agg_data['mstar']
    mhalo = agg_data['mhalo']
    
    # Filter for valid galaxies
    sel = (mstar > 0) & (mhalo > 0) & np.isfinite(mstar) & np.isfinite(mhalo)
    mstar, mhalo = mstar[sel], mhalo[sel]
    
    if len(mstar) == 0:
        return None
    
    # Compute efficiency
    eta = mstar / mhalo
    logMh = np.log10(mhalo)
    
    # Bin and compute statistics
    centers = 0.5 * (mass_bins[1:] + mass_bins[:-1])
    eta_med = np.full_like(centers, np.nan)
    eta_p16 = np.full_like(centers, np.nan)
    eta_p84 = np.full_like(centers, np.nan)
    
    for i in range(len(centers)):
        mask = (logMh >= mass_bins[i]) & (logMh < mass_bins[i+1])
        if np.any(mask):
            vals = eta[mask]
            eta_med[i] = np.median(vals)
            eta_p16[i] = np.percentile(vals, 16)
            eta_p84[i] = np.percentile(vals, 84)
    
    return {
        'centers': centers,
        'eta_med': eta_med,
        'eta_p16': eta_p16,
        'eta_p84': eta_p84,
        'z': agg_data.get('z')
    }

print("âœ“ Helper function defined")

âœ“ Helper function defined


## 3. Configure Analysis Parameters

In [8]:
# The BASE_DIR is already configured in config.py
base_dir = get_base_dir()

# Snapshots to analyze - using available redshifts
snapshots = ['iz82', 'iz100', 'iz120', 'iz142', 'iz176']

# Halo mass bins (log10 M_sun)
mass_bins = np.arange(10.0, 15.5, 0.2)

print(f"Base directory: {base_dir}")
print(f"Snapshots: {snapshots}")
print(f"Mass bins: {len(mass_bins)-1} bins from {mass_bins[0]} to {mass_bins[-1]}")


Base directory: /cosma5/data/durham/dc-hick2/Galform_Out/L800/gp14
Snapshots: ['iz82', 'iz100', 'iz120', 'iz142', 'iz176']
Mass bins: 27 bins from 10.0 to 15.39999999999998


## 4. Process All Snapshots

Loop through snapshots and compute efficiency for each.

In [None]:
results = []

for snapshot in snapshots:
    iz_path = base_dir / snapshot
    
    if not iz_path.exists():
        print(f"âš  Skipping {snapshot} - path not found")
        continue
    
    z = get_snapshot_redshift(snapshot)
    label = f"z={z:.2f}" if z is not None else snapshot
    print(f"\nðŸ“Š Processing {snapshot} ({label})...")
    
    # Aggregate data from all subvolumes
    agg = aggregate_snapshot(str(iz_path))
    if agg is None:
        print("  âœ— No data found")
        continue
    
    print(f"  âœ“ Loaded {len(agg['mstar'])} galaxies at zâ‰ˆ{agg.get('z', '?'):.2f}")
    
    # Compute efficiency
    eff = compute_efficiency_vs_mass(agg, mass_bins)
    if eff is None:
        print("  âœ— Could not compute efficiency")
        continue
    
    results.append(eff)
    print(f"  âœ“ Efficiency computed for {np.sum(np.isfinite(eff['eta_med']))} mass bins")

print(f"\nâœ“ Successfully processed {len(results)} snapshots")


ðŸ“Š Processing iz82 (z=6.01)...
  âœ“ Loaded 34976233 galaxies at zâ‰ˆ6.01
  âœ“ Loaded 34976233 galaxies at zâ‰ˆ6.01
  âœ“ Efficiency computed for 13 mass bins

ðŸ“Š Processing iz100 (z=4.30)...
  âœ“ Efficiency computed for 13 mass bins

ðŸ“Š Processing iz100 (z=4.30)...
  âœ“ Loaded 49275829 galaxies at zâ‰ˆ4.30
  âœ“ Loaded 49275829 galaxies at zâ‰ˆ4.30
  âœ“ Efficiency computed for 16 mass bins

ðŸ“Š Processing iz120 (z=3.05)...
  âœ“ Efficiency computed for 16 mass bins

ðŸ“Š Processing iz120 (z=3.05)...
  âœ“ Loaded 52612390 galaxies at zâ‰ˆ3.05
  âœ“ Loaded 52612390 galaxies at zâ‰ˆ3.05
  âœ“ Efficiency computed for 18 mass bins

ðŸ“Š Processing iz142 (z=2.00)...
  âœ“ Efficiency computed for 18 mass bins

ðŸ“Š Processing iz142 (z=2.00)...
  âœ“ Loaded 104683617 galaxies at zâ‰ˆ2.00
  âœ“ Loaded 104683617 galaxies at zâ‰ˆ2.00
  âœ“ Efficiency computed for 21 mass bins

ðŸ“Š Processing iz176 (z=1.01)...
  âœ“ Efficiency computed for 21 mass bins

ðŸ“Š Processing iz176 (z=1.01)

## 5. Plot Galaxy Formation Efficiency

Create a beautiful plot showing how efficiency varies with halo mass across redshifts.

In [1]:
import os
import pandas as pd

# Save efficiency plot and data to correct subfolders
plot_dir = 'plots/efficiency'
os.makedirs(plot_dir, exist_ok=True)
plt.savefig(os.path.join(plot_dir, 'galaxy_efficiency.png'), dpi=150, bbox_inches='tight')

# Save efficiency data as CSV for each snapshot
data_dir = 'plots/_plots_data/efficiency'
os.makedirs(data_dir, exist_ok=True)
for i, (snap, result) in enumerate(zip(snapshots, results)):
    if result is not None:
        df_out = pd.DataFrame({
            'log_Mhalo': result['centers'],
            'eta_median': result['eta_med'],
            'eta_p16': result['eta_p16'],
            'eta_p84': result['eta_p84']
        })
        df_out.to_csv(os.path.join(data_dir, f'galaxy_efficiency_{snap}.csv'), index=False)
print(f"\nSaved efficiency data for {len(results)} snapshots to {data_dir}")

NameError: name 'plt' is not defined

## 6. Find Peak Efficiency

Let's identify where star formation is most efficient for each redshift.