In [1]:
import os
import numpy as np
import pandas as pd

from glob import glob
from scipy.stats import lognorm
from scipy.interpolate import interp1d
from frb.dm.igm import average_DM
from frb.dm import igm
from frb.dm import cosmic
from frb.dm import mcmc
from frb import defs
from astropy.cosmology import Planck18_arXiv_v2 as cosmo

from mockFRBhosts import draw_galaxies, observed_bands, draw_Delta
from mockFRBhosts.mcmc_simulations import do_mcmc



In [2]:
# Define where to save the posteriors.
outdir = '../Posteriors/'

if not os.path.isdir(outdir):
    os.makedirs(outdir)

In [3]:
# Load FRBs from the pickle files.
pickles = sorted(glob('../Simulated_FRBs/*.pickle'))

survey_models, z_models = [], []
for file in pickles:
    # Extract models from file names.
    params = os.path.basename(file)
    params = os.path.splitext(params)[0]
    params = params.split('_', 1)
    survey_models.append(params[0])
    z_models.append(params[1])

# Check files
pickles

['../Simulated_FRBs/askap-craco_sfr.pickle',
 '../Simulated_FRBs/askap-craco_smd.pickle',
 '../Simulated_FRBs/askap-craco_vol_co.pickle',
 '../Simulated_FRBs/askap-incoh_sfr.pickle',
 '../Simulated_FRBs/askap-incoh_smd.pickle',
 '../Simulated_FRBs/askap-incoh_vol_co.pickle',
 '../Simulated_FRBs/chime-frb_sfr.pickle',
 '../Simulated_FRBs/chime-frb_smd.pickle',
 '../Simulated_FRBs/chime-frb_vol_co.pickle',
 '../Simulated_FRBs/ska1-mid_sfr.pickle',
 '../Simulated_FRBs/ska1-mid_smd.pickle',
 '../Simulated_FRBs/ska1-mid_vol_co.pickle']

In [5]:
# Number of FRBs that should be used througout
n_frbs = 1000

# Pick an FRB survey and redshift distribution
chosen = pickles[0]
radio_survey = survey_models[0]
print(chosen)

# Weight galaxy choice depending on file name
if os.path.splitext(chosen)[0][-3:] == 'sfr':  # last thre letters before extension
    weights = 'mstardot'
else:
    weights = 'mstars_total'

frbs = np.load(chosen, allow_pickle=True)
print(frbs.shape[0], "FRBs in file, using only first", n_frbs)
frbs = frbs.iloc[:n_frbs].copy()

galaxies, snapnum = draw_galaxies(frbs['z'], weights=weights, seed=42)

# Order FRBs such that they correspond to galaxies at the same positions.
frbs.loc[:, 'snapnum'] = snapnum
frbs.sort_values('snapnum', ascending=True, inplace=True)

n_bands_obs_SDSS, n_bands_obs_LSST, n_bands_obs_Euclid, n_bands_obs_DES = observed_bands(frbs, galaxies)

frbs['n_bands_SDSS'] = n_bands_obs_SDSS.to_numpy()
frbs['n_bands_LSST'] = n_bands_obs_LSST.to_numpy()
frbs['n_bands_Euclid'] = n_bands_obs_Euclid.to_numpy()
frbs['n_bands_DES'] = n_bands_obs_DES.to_numpy()

../Simulated_FRBs/askap-craco_sfr.pickle
../Simulated_FRBs/askap-craco_sfr.pickle
1028 FRBs in file, using only first 1000


In [6]:
# Give parameter values from which to simulate the DM. Obh70 is not used at the moment, would have to give it to averag_DM.
Obh70, F, mu, lognorm_s = cosmo.Ob0*cosmo.H0.value/70, .2, 100, 1

# Calculate the average DM up to the highest redshift, interpolate to avoid using this slow function again.
# (For every neval an integral is done in frb.dm.igm.avg_rhoISM when cosmo.age(z) is called.)
DM_cum, zeval = average_DM(frbs['z'].max(), cosmo=defs.frb_cosmo, cumul=True)
avrg_DM = interp1d(zeval, DM_cum, assume_sorted=True)

# Draw a DM_IGM from it's PDF. Multiply by <DM_cosmic> to get a DM.
rng = np.random.default_rng(seed=42)
delta = [float(draw_Delta(z, f=F, n_samples=1, rng=rng)) for z in frbs['z']]
dm_cosmic = np.array(delta) * avrg_DM(frbs['z'])

# Draw a DM_host from the parameters that Macquart2020 gives.
dm_host = lognorm.rvs(lognorm_s, scale=mu, size=len(frbs['z']), random_state=rng)

frbs['DM'] = dm_host/(1+frbs['z']) + dm_cosmic

In [7]:
survey = 'SDSS'
n_bands_obs = frbs['n_bands_' + survey]
n_bands = n_bands_obs.max()

# Limit to FRBs with host in all bands and shuffle them.
frbs_w_host = frbs[n_bands_obs.to_numpy() == n_bands]
rng = np.random.default_rng(seed=42)
frbs_w_host = frbs_w_host.sample(frac=1, ignore_index=True, random_state=rng)

In [8]:
draws = 150
cores = 20

frb_set = frbs_w_host

n_frbs = len(frb_set)
print(n_frbs)

274


In [None]:
post_path = os.path.join(outdir, f"{radio_survey}_{survey}_{n_frbs}_zs_{cores}x{draws}_draws.nc")
if not os.path.isfile(post_path):
    frb_set = frbs_w_host.iloc[:n_frbs]

    idata = do_mcmc(frb_set['z'], frb_set['DM'], draws=draws, cores=cores)
    idata.to_netcdf(post_path)

else:
    print("Already existing, skip.")

Only 150 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.lo

In [29]:
# Creat random samples from the FRB population to compare with.
rndm_sample1 = frbs.sample(n=n_frbs, ignore_index=True, random_state=rng)
rndm_sample2 = frbs.sample(n=n_frbs, ignore_index=True, random_state=rng)
rndm_sample3 = frbs.sample(n=n_frbs, ignore_index=True, random_state=rng)

for frb_set in [rndm_sample1, rndm_sample2, rndm_sample3]:
    i = 0
    post_path = os.path.join(outdir, f"{radio_survey}_{survey}_random_sample_of_{len(frb_set)}_zs_run_{i}_{cores}x{draws}_draws.nc")
    # Don't overwrite existing files.
    while os.path.isfile(post_path):
        i += 1
        post_path = os.path.join(outdir, f"{radio_survey}_{survey}_random_sample_of_{len(frb_set)}_zs_run_{i}_{cores}x{draws}_draws.nc")
        
    print(f"Will save to {post_path}")

    frb_set = frbs_w_host.iloc[:n_frbs]

    idata = do_mcmc(frb_set['z'], frb_set['DM'], draws=draws, cores=cores)
    idata.to_netcdf(post_path)


Will save to Posteriors/askap-craco_random_sample_of_524_zs_run_0_20x1500_draws.nc


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
Sampling 20 chains for 300 tune and 1_500 draw iterations (6_000 + 30_000 draws total) took 3673 seconds.
The number of effective samples is smaller than 10% for some parameters.


Will save to Posteriors/askap-craco_random_sample_of_524_zs_run_1_20x1500_draws.nc


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalization

Will save to Posteriors/askap-craco_random_sample_of_524_zs_run_2_20x1500_draws.nc


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
Sampling 20 chains for 300 tune and 1_500 draw iterations (6_000 + 30_000 draws total) took 3743 seconds.
The number of effective samples is smaller than 10% for some parameters.


In [34]:
print(sorted(list(set(np.logspace(1, np.log10(len(frbs_w_host)), 30, dtype=int)))))

[10,
 11,
 13,
 15,
 17,
 19,
 22,
 26,
 29,
 34,
 39,
 44,
 51,
 58,
 67,
 77,
 88,
 101,
 116,
 133,
 153,
 175,
 201,
 230,
 264,
 303,
 347,
 398,
 457,
 523]

In [37]:
draws = 150
cores = 20

for n_frbs in sorted(list(set(np.logspace(1, np.log10(len(frbs_w_host)), 30, dtype=int)))): # [5,7]: #
    print(n_frbs)

    post_path = os.path.join(outdir, f"{radio_survey}_{survey}_{n_frbs}_zs_{cores}x{draws}_draws.nc")
    if os.path.isfile(post_path):
        continue
    
    frb_set = frbs_w_host.iloc[:n_frbs]

    idata = do_mcmc(frb_set['z'], frb_set['DM'], draws=draws, cores=cores)
    idata.to_netcdf(post_path)

5


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalization

7


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (20 chains in 20 jobs)
CompoundStep
>Slice: [lognorm_s]
>Slice: [mu]
>Slice: [F]
>Slice: [Obh70]
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalizations/avgDM))
  log_like = np.sum(np.log(likelihoods*normalization