In [1]:
import h5py
import numpy as np
from synthesizer.conversions import lnu_to_absolute_mag
import pandas as pd
import unyt
from unyt import erg, Hz, s
import cmasher as cmr
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import sys
sys.path.append("/home/jovyan/camels/proj1/")
from setup_params import get_photometry, get_luminosity_function, get_colour_distribution, get_safe_name, get_colour_dir_name, get_magnitude_mask
from variables_config import get_config

In [5]:
def process_data(input_dir, redshift_values, uvlf_limits, n_bins_lf, lf_data_dir, 
                colour_limits, n_bins_colour, colour_data_dir, category, bands, 
                colour_pairs=None, mag_limits=None, simulation=None, dataset="CV"):
    """Process data for any combination of bands and color pairs"""
    # List photometry files in the input directory
    photo_files = [f for f in os.listdir(input_dir) if f.endswith('_photometry.hdf5')]
    
    for filename in photo_files:
        # Remove the model prefix if it exists and '_photometry.hdf5'
        sim_name = filename.replace(f'{simulation}_', '').replace('_photometry.hdf5', '')
        print(f"\nProcessing {sim_name}")
        
        for snap, redshift_info in redshift_values.items():
            print(f"  Processing z={redshift_info['label']}")
            
            try:
                # Process bands (UVLF)
                if bands is not None:
                    filters_to_process = [bands] if isinstance(bands, str) else bands
                    
                    photo = get_photometry(
                        sim_name=sim_name,  # Now just 'CV_8' instead of 'IllustrisTNG_CV_8'
                        spec_type="intrinsic" if category == "intrinsic" else "attenuated",
                        snap=snap,
                        sps="BC03",
                        model=simulation,  # This will add the model name in get_photometry
                        filters=filters_to_process,
                        photo_dir=input_dir
                    )
                    
                    for band in filters_to_process:
                        phi, phi_sigma, hist, bin_lims = get_luminosity_function(
                            photo,
                            band,
                            *uvlf_limits,
                            n_bins=n_bins_lf
                        )
                        
                        # Save UVLF
                        bin_centers = 0.5 * (bin_lims[1:] + bin_lims[:-1])
                        uvlf_df = pd.DataFrame({
                            'magnitude': bin_centers,
                            'phi': phi,
                            'phi_sigma': phi_sigma,
                            'hist': hist
                        })
                        
                        filter_system = get_safe_name(band, filter_system_only=True)
                        spec_type = "intrinsic" if category == "intrinsic" else "attenuated"
                        
                        output_dir = os.path.join(lf_data_dir[category][filter_system], 
                                                get_safe_name(redshift_info['label']))
                        os.makedirs(output_dir, exist_ok=True)
                        
                        uvlf_filename = f"UVLF_{sim_name}_{get_safe_name(band)}_{get_safe_name(redshift_info['label'])}_{spec_type}.txt"
                        uvlf_df.to_csv(os.path.join(output_dir, uvlf_filename), 
                                     index=False, sep='\t')
                
                # Process colours
                if colour_pairs:
                    all_bands = []
                    for band1, band2 in colour_pairs:
                        all_bands.extend([band1, band2])
                    
                    photo = get_photometry(
                        sim_name=sim_name,
                        spec_type="intrinsic" if category == "intrinsic" else "attenuated",
                        snap=snap,
                        sps="BC03",
                        model=simulation,
                        filters=all_bands,
                        photo_dir=input_dir
                    )
                    
                    for band1, band2 in colour_pairs:
                        if band1 in photo and band2 in photo:
                            mask = get_magnitude_mask(photo, [band1, band2], mag_limits)
                            
                            colour_dist, bin_lims = get_colour_distribution(
                                photo,
                                band1,
                                band2,
                                *colour_limits,
                                n_bins=n_bins_colour,
                                mask=mask
                            )
                            
                            bin_centers = 0.5 * (bin_lims[1:] + bin_lims[:-1])
                            colour_df = pd.DataFrame({
                                'colour': bin_centers,
                                'distribution': colour_dist
                            })
                            
                            filter_system = get_colour_dir_name(band1, band2)
                            spec_type = "intrinsic" if category == "intrinsic" else "attenuated"
                            
                            output_dir = os.path.join(colour_data_dir[category][filter_system], 
                                                    get_safe_name(redshift_info['label']))
                            os.makedirs(output_dir, exist_ok=True)
                            
                            colour_filename = f"Colour_{sim_name}_{filter_system}_{get_safe_name(redshift_info['label'])}_{spec_type}.txt"
                            colour_df.to_csv(os.path.join(output_dir, colour_filename),
                                           index=False, sep='\t')
            
            except Exception as e:
                print(f"Error processing {sim_name} at z={redshift_info['label']}: {e}")
                continue
            
            print(f"    Completed processing for z={redshift_info['label']}")



In [6]:
def process_all_data(input_dir, redshift_values, uvlf_limits, n_bins_lf, lf_data_dir, 
                    colour_limits, n_bins_colour, colour_data_dir, mag_limits, 
                    simulation=None, dataset="CV"):
    """Process all combinations of data types and bands"""
    
    config = get_config(dataset=dataset, simulation=simulation)
    band_combinations = config["filters"]
    colour_combinations = config["colour_pairs"]

    # Process each category
    for category in ["attenuated", "intrinsic"]:
        # Process each band combination
        print(f"\nProcessing {category} bands: {band_combinations[category]}")
        process_data(
            input_dir=input_dir,
            redshift_values=redshift_values,
            uvlf_limits=uvlf_limits,
            n_bins_lf=n_bins_lf,
            lf_data_dir=lf_data_dir,
            colour_limits=colour_limits,
            n_bins_colour=n_bins_colour,
            colour_data_dir=colour_data_dir,
            category=category,
            bands=band_combinations[category],
            colour_pairs=None,
            mag_limits=mag_limits,
            simulation=simulation,
            dataset=dataset
        )

        # Process colour pairs
        if colour_combinations:
            print(f"\nProcessing {category} colours: {colour_combinations}")
            process_data(
                input_dir=input_dir,
                redshift_values=redshift_values,
                uvlf_limits=uvlf_limits,
                n_bins_lf=n_bins_lf,
                lf_data_dir=lf_data_dir,
                colour_limits=colour_limits,

                n_bins_colour=n_bins_colour,
                colour_data_dir=colour_data_dir,
                category=category,
                bands=None,
                colour_pairs=colour_combinations,
                mag_limits=mag_limits,
                simulation=simulation,
                dataset=dataset
            )


In [7]:

if __name__ == "__main__":
    simulations_to_run = ["IllustrisTNG"]#, "SIMBA", "Astrid"]
    datasets = ["CV"]#, "1P"]
    
    for simulation in simulations_to_run:
        for dataset in datasets:
            print(f"\nProcessing {simulation} - {dataset} dataset")
            config = get_config(dataset=dataset, simulation=simulation)
            
            process_all_data(
                input_dir=config["input_dir"],
                redshift_values=config["redshift_values"],
                uvlf_limits=config["uvlf_limits"],
                n_bins_lf=config["n_bins_lf"],
                lf_data_dir=config["lf_data_dir"],
                colour_limits=config["colour_limits"],
                n_bins_colour=config["n_bins_colour"],
                colour_data_dir=config["colour_data_dir"],
                mag_limits=config["mag_limits"],
                simulation=simulation,
                dataset=dataset
            )


Processing IllustrisTNG - CV dataset

Processing attenuated bands: ['GALEX FUV', 'GALEX NUV']

Processing CV_26
  Processing z=z2.0
    Completed processing for z=z2.0
  Processing z=z1.5
    Completed processing for z=z1.5
  Processing z=z1.0
    Completed processing for z=z1.0
  Processing z=z0.1
    Completed processing for z=z0.1

Processing CV_4
  Processing z=z2.0
    Completed processing for z=z2.0
  Processing z=z1.5
    Completed processing for z=z1.5
  Processing z=z1.0
    Completed processing for z=z1.0
  Processing z=z0.1
    Completed processing for z=z0.1

Processing CV_16
  Processing z=z2.0
    Completed processing for z=z2.0
  Processing z=z1.5
    Completed processing for z=z1.5
  Processing z=z1.0
    Completed processing for z=z1.0
  Processing z=z0.1
    Completed processing for z=z0.1

Processing CV_20
  Processing z=z2.0
    Completed processing for z=z2.0
  Processing z=z1.5
    Completed processing for z=z1.5
  Processing z=z1.0
    Completed processing for z