## Import Libraries

In [1]:
# Add package to path and import modules (with reload for updated analysis code)
import sys
import numpy as np
import os
import glob
import math
import pandas as pd
from pathlib import Path
import re
import matplotlib.pyplot as plt
import importlib 

from galform_analysis.analysis import hmf as hmf_mod
importlib.reload(hmf_mod)
from galform_analysis.analysis import plot_massfunction_convergence as convergence_mod
importlib.reload(convergence_mod)
from galform_analysis.analysis.plot_massfunction_convergence import plot_hmf_convergence_by_subvolumes, plot_hmf_convergence_by_redshift
from galform_analysis.config import get_base_dir, get_snapshot_redshift

project_root = Path.cwd().parent
if str(project_root / 'src') not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))

# Reload hmf first, then convergence to pick up new signatures


plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11


base_dir = get_base_dir()


## Filter and Get only Successful subvolume runs

In [2]:
# Scan for completed galaxy files
from galform_analysis.analysis import aggregation as agg_mod
importlib.reload(agg_mod)
from galform_analysis.analysis.aggregation import completed_galaxies

df_completed = completed_galaxies(str(base_dir))

# Show summary
print(f"\nTotal files found: {len(df_completed)}")
print(f"Completed files: {df_completed['completed'].sum()}")
print(f"Incomplete files: {(~df_completed['completed']).sum()}")

# Show breakdown by snapshot
print("\nCompleted files by snapshot:")
summary = df_completed.groupby('iz')['completed'].agg(['sum', 'count'])
summary.columns = ['completed', 'total']
print(summary)

Completed iz iz100 checked 839 subvolumes. Completed: 337/500
Completed iz iz105 checked 839 subvolumes. Completed: 354/500
Completed iz iz105 checked 839 subvolumes. Completed: 354/500
Completed iz iz120 checked 900 subvolumes. Completed: 271/500
Completed iz iz120 checked 900 subvolumes. Completed: 271/500
Completed iz iz142 checked 900 subvolumes. Completed: 498/566
Completed iz iz142 checked 900 subvolumes. Completed: 498/566
Completed iz iz155 checked 1000 subvolumes. Completed: 672/800
Completed iz iz155 checked 1000 subvolumes. Completed: 672/800
Completed iz iz176 checked 1000 subvolumes. Completed: 791/791
Completed iz iz176 checked 1000 subvolumes. Completed: 791/791
Completed iz iz207 checked 1000 subvolumes. Completed: 800/800
Completed iz iz207 checked 1000 subvolumes. Completed: 800/800
Completed iz iz271 checked 1000 subvolumes. Completed: 800/800
Completed iz iz271 checked 1000 subvolumes. Completed: 800/800
Completed iz iz82 checked 839 subvolumes. Completed: 448/500



## HMF changing as we average over more sub-volumes

In [7]:
# Number of subvolumes to sample
n_subvolumes = [1, 2, 3, 4, 5]#, 8, 10, 20, 50]

# Number of iterations for each subvolume sample
n_iterations = 1

# Snapshot numbers to use (integers, not strings!)
print(summary)
iz_snapshots = ["100", "82", "105"]#, "176", "207", "271", "120", "155"]

# Panel mode using explicit subvolume sets - now using only completed files
hmf_ivol_panels = plot_hmf_convergence_by_subvolumes(
    base_dir,
    df_completed,
    iz_snapshots, 
    n_subvolumes,
    n_iterations,
    bins=np.arange(9.0, 15.0, 0.2),
    outdir='results',
    do_save=True,
    xlim=(9.0,14.5),
    ylim=(1e-5,2.0),
    panel_size=(14,10),
)


       completed  total
iz                     
iz100        337    500
iz105        354    500
iz120        271    500
iz142        498    566
iz155        672    800
iz176        791    791
iz207        800    800
iz271        800    800
iz82         448    500
Testing convergence with 5 sample sizes: [1, 2, 3, 4, 5]
Averaging over 1 iteration(s) per sample size

=== Computing with n=1 subvolume(s) ===
  iz100: done (1 ivols × 1 iterations)
  iz82: done (1 ivols × 1 iterations)
  iz82: done (1 ivols × 1 iterations)
  iz105: done (1 ivols × 1 iterations)
  iz105: done (1 ivols × 1 iterations)

=== Computing with n=2 subvolume(s) ===
  iz100: done (1 ivols × 1 iterations)

=== Computing with n=2 subvolume(s) ===
  iz100: 

KeyboardInterrupt: 

## HMF convergence as we add more 

In [None]:
# Per-redshift HMF convergence - test different subvolume counts per redshift
print("=" * 60)
print("PER-REDSHIFT HMF CONVERGENCE")
print("=" * 60)

iz_snapshots_for_redshift = [82, 100, 105, 176, 207, 271]
n_subvolumes_per_z = [1, 2, 5, 10, 20]

hmf_by_z = plot_hmf_convergence_by_redshift(
    base_dir,
    df_completed,
    iz_snapshots_for_redshift,
    n_subvolumes=n_subvolumes_per_z,
    n_iterations=1,
    bins=np.arange(9.0, 15.0, 0.2),
    outdir='results',
    do_save=True,
    xlim=(9.0, 14.5),
    ylim=(1e-5, 2.0),
    panel_size=(10, 7)
)
