In [1]:
import numpy as np
import glob
import emcee as mc
import h5py
from gaussianLikelihoods import logp_powerLawPeak
from tqdm.auto import tqdm

In [2]:
#AI-generated new version
def run_mcmc_estimation(sample_dict_path, injection_dict_path, output_path, 
                       n_walkers=32, n_steps=20000, n_threads=16):
    """
    Run MCMC estimation using sample and injection data.
    
    Parameters:
    -----------
    sample_dict_path : str
        Path to the sample dictionary pickle file
    injection_dict_path : str
        Path to the injection dictionary pickle file
    output_path : str
        Path where the MCMC chain results should be saved
    n_walkers : int, optional
        Number of walkers for MCMC (default: 32)
    n_steps : int, optional
        Number of MCMC steps (default: 20000)
    n_threads : int, optional
        Number of threads to use for parallel processing (default: 16)
    
    Returns:
    --------
    str
        Path to the saved MCMC chain file
    """
    # Prior bounds dictionary
    prior_dict = {
        'lmbda': (-5, 4),
        'mMax': (60, 100),
        'm0': (20, 100),
        'sigM': (1, 10),
        'fPeak': (0, 1),
        'bq': (-2, 10),
        'sig_kappa': 6.,
        'mu_eff': (-1, 1),
        'sigma_eff': (0.05, 1.),
        'mu_p': (0.05, 1),
        'sigma_p': (0.05, 1.),
        'rho': (-0.75, 0.75),
        'mMin': 5.
    }
    
    # Load input data
    sample_dict = np.load(sample_dict_path, allow_pickle=True)
    if 'S190814bv' in sample_dict:
        sample_dict.pop('S190814bv')
    
    injection_dict = np.load(injection_dict_path, allow_pickle=True)
    
    # Parameter space dimension
    dim = 12
    
    # Initialize chain or continue from existing one
    old_chains = np.sort(glob.glob(f"{output_path}_r??.npy"))
    
    if len(old_chains) == 0:
        # Start new chain
        run_version = 0
        initial_walkers = initialize_new_walkers(n_walkers)
    else:
        # Continue existing chain
        old_chain = np.load(old_chains[-1])
        run_version = int(old_chains[-1][-6:-4]) + 1
        
        # Remove trailing zeros from incomplete runs
        good_inds = np.where(old_chain[0,:,0] != 0.0)[0]
        old_chain = old_chain[:,good_inds,:]
        
        # Use final positions as new initial positions
        initial_walkers = old_chain[:,-1,:]
    
    # Initialize and run sampler
    sampler = mc.EnsembleSampler(
        n_walkers, 
        dim, 
        logp_powerLawPeak,
        args=[sample_dict, injection_dict, prior_dict],
        threads=n_threads
    )
    
    # Run MCMC with periodic saving
    #for i, result in enumerate(sampler.sample(initial_walkers, iterations=n_steps)):
    #    if i % 10 == 0:
    #        save_path = f"{output_path}_r{run_version:02d}.npy"
    #        np.save(save_path, sampler.chain)
     
        

    # Run MCMC with progress bar and periodic saving
    print(f"\nRunning MCMC with {n_walkers} walkers for {n_steps} steps...")
    save_interval = 10  # Save every 10 steps
    
    # Initialize progress bar
    with tqdm(total=n_steps, desc="MCMC Progress") as pbar:
        for i, result in enumerate(sampler.sample(initial_walkers, iterations=n_steps)):
            if i % save_interval == 0:
                save_path = f"{output_path}_r{run_version:02d}.npy"
                np.save(save_path, sampler.chain)
            
            # Update progress bar
            pbar.update(1)
            
            # Add acceptance fraction to progress bar description every 100 steps
            if i % 100 == 0:
                acc_frac = np.mean(sampler.acceptance_fraction)
                pbar.set_description(f"MCMC Progress (acceptance: {acc_frac:.3f})")
   


    # Final save
    save_path = f"{output_path}_r{run_version:02d}.npy"
    np.save(save_path, sampler.chain)
    
    return save_path

def initialize_new_walkers(n_walkers):
    """
    Initialize walker positions for a new MCMC chain.
    
    Parameters:
    -----------
    n_walkers : int
        Number of walkers to initialize
    
    Returns:
    --------
    numpy.ndarray
        Initial walker positions
    """
    initial_lmbdas = np.random.random(n_walkers) * (-2.)
    initial_mMaxs = np.random.random(n_walkers) * 20. + 80.
    initial_m0s = np.random.random(n_walkers) * 10. + 30
    initial_sigMs = np.random.random(n_walkers) * 4 + 1.
    initial_fs = np.random.random(n_walkers)
    initial_bqs = np.random.random(n_walkers) * 2.
    initial_ks = np.random.normal(size=n_walkers, loc=0, scale=1) + 2.
    initial_mu_effs = np.random.random(n_walkers) * 0.05
    initial_sigma_effs = np.random.random(n_walkers) * 0.2 + 0.05
    initial_mu_ps = np.random.random(n_walkers) * 0.5 + 0.05
    initial_sigma_ps = np.random.random(n_walkers) * 0.2 + 0.05
    initial_rhos = np.random.random(n_walkers) * 0.4 - 0.2
    
    return np.transpose([
        initial_lmbdas, initial_mMaxs, initial_m0s, initial_sigMs,
        initial_fs, initial_bqs, initial_ks, initial_mu_effs,
        initial_sigma_effs, initial_mu_ps, initial_sigma_ps, initial_rhos
    ])

In [None]:
run_mcmc_estimation(sample_dict_path="../input/sampleDict_FAR_1_in_1_yr_rerun_K.pickle",
                    injection_dict_path="./injectionDict_rerun_directMixture_FAR_1_in_1_K.pickle",
                    output_path="../results/KDE_result",
                    n_walkers=32,
                    n_steps=20000,
                    n_threads=16)


Running MCMC with 32 walkers for 20000 steps...


MCMC Progress:   0%|          | 0/20000 [00:00<?, ?it/s]

GW170729 8.906575868447371
GW170729 9.343714263776526
S190521g 1.0000000001834606
GW170729 6.3178708760067
GW170729 9.32154326968652


In [None]:
run_mcmc_estimation(sample_dict_path="../input/sampleDict_FAR_1_in_1_yr_rerun_A.pickle",
                    injection_dict_path="./injectionDict_rerun_directMixture_FAR_1_in_1_A.pickle",
                    output_path="../results/Analytical_result",
                    n_walkers=32,
                    n_steps=20000,
                    n_threads=16)