# Estimating covariance on SkySim5000 and using it on Gaussian likelihood

(derived from <a href='SkySim500_firstcheck.ipynb'>SkySim500_firstcheck</a> notebook)

Author : Michel Aguena

In [None]:
import numpy as np
import pyccl as ccl
import matplotlib.pyplot as plt
import scipy.integrate
import astropy.units as u
import GCRCatalogs

%matplotlib inline

In [None]:
skysim_cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_small')
%%time
# get list of halos in a given redshift and mass range 
mmin_extract = 1.e13 # Msun (M_fof)
zmin_extract = 0.
zmax_extract = 1.0

dm_halos = skysim_cat.get_quantities(['halo_mass','hostHaloMass','redshift','ra', 'dec', 'halo_id'],
                                            filters=[f'halo_mass > {mmin_extract}','is_central==True',
                                            f'redshift>{zmin_extract}', f'redshift<{zmax_extract}'])

dm_halos['baseDC2/sod_halo_mass'] = dm_halos['halo_mass']

In [None]:
skysim_cat = GCRCatalogs.load_catalog('skysim5000_v1.1.1_small')

In [None]:
cosmo_ss  = skysim_cat.cosmology

In [None]:
cosmo_ss

## Extract DM haloes from the catalog in a given mass and redshift range. 

In [None]:
# get list of halos in a given redshift and mass range 
mmin_extract = 1.e13 # Msun (M_fof)
zmin_extract = 0.
zmax_extract = 1.0

dm_halos = skysim_cat.get_quantities(['halo_mass','hostHaloMass','redshift','ra', 'dec', 'halo_id',
                                             'baseDC2/sod_halo_mass','baseDC2/sod_halo_radius'],
                                            filters=[f'halo_mass > {mmin_extract}','is_central==True',
                                            f'redshift>{zmin_extract}', f'redshift<{zmax_extract}'])

In [None]:
N_cl = len(dm_halos['halo_mass'])
print(f'There are {N_cl:,} halos in this mass (Mfof) and redshift range')

In [None]:
plt.scatter(dm_halos['ra'], dm_halos['dec'], marker='.', s=0.05)
plt.xlabel('ra [deg]')
plt.ylabel('dec [deg]')

## Define a redshift and mass range for the comparison of data and prediction, and filter the data accordingly
NB: SkySim5000 M200c masses are in units of Msun/h

In [None]:
tmp = dm_halos['baseDC2/sod_halo_mass']/cosmo_ss.h # sod_halo_mass = M200,c in Msun/h, needs conversion
zmin = 0.
zmax = 1.
mmin = np.min(tmp[tmp>0]) # Msun. Filtering are there are some negative masses in the table
mmax = 1.e15 # Msun
filt1 = dm_halos['redshift'] >= zmin
filt2 = dm_halos['redshift'] <= zmax
 
filt3 = tmp >= mmin
filt4 = tmp < mmax
filt = filt1 * filt2 * filt3 * filt4

data = dm_halos['baseDC2/sod_halo_mass'][filt]/cosmo_ss.h # M200,c [Msun]

In [None]:
# Define mass bins
nbins = 15
nc_meas, logmass_bins = np.histogram(np.log10(data), bins=nbins)
mid_logmass = 0.5*(logmass_bins[:-1]+logmass_bins[1:])

Plot number counts

In [None]:
plt.figure(figsize=(10,6))
plt.plot(10**mid_logmass, nc_meas, marker='*', ls='')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('M200,c [Msun]', size=12)
plt.ylabel('Number of halos', size=12)
print(f"Total number of halos in z=[{zmin},{zmax}] and M=[{mmin/1.e14:.2f}, {mmax/1.e15}] x 1e15 Msun: {len(data):,}")

# Measure covariance of clusters

Add pixels and check outliers to define which will be used for covariance computation

In [None]:
import healpy as hp

nside = 64
dm_halos['pixel'] = hp.ang2pix(nside, dm_halos['ra'], dm_halos['dec'], lonlat=True)


f, axes = plt.subplots(1, 2, figsize=(20, 6))
map_ = np.histogram(dm_halos['pixel'], np.arange(hp.nside2npix(nside)+1))[0]
map_ = np.array(map_, dtype=float)
map_[map_==0] = np.nan

#plot histogram of counts
axes[0].hist(map_)
axes[0].set_xlabel('# of clusters in pixels')

# plot map
hp.cartview(map_, latra=[-50, -30], lonra=[55, 75], 
            hold=True, cbar=False)

hp_ax = f.axes[-1]
hp_ax.axis('on')

#ax.patch.set_alpha(0.)
hp_ax.set_xlabel('RA')
hp_ax.set_ylabel('DEC')
hp_ax.grid(color='.7')
hp_ax.set_xticklabels(-hp_ax.get_xticks()) # RA values in plot are inverted


plt.show()

Define pixels to be used in covariance computation

In [None]:
pixels_for_cov = np.arange(hp.nside2npix(nside))[map_>200]

Bin the data in mass and pixels

In [None]:
pixeled_nc = np.transpose([
    np.histogram(np.log10(data[dm_halos['pixel'][filt]==p]), bins=logmass_bins)[0]
    for p in pixels_for_cov])

Compute covariance and correlation matrices
\begin{equation}
Corr_{ij} = \frac{Cov_{ij}}{\sqrt{Cov_{ii}Cov_{jj}}}
\end{equation}

In [None]:
cov = np.cov(pixeled_nc)*(skysim_cat.sky_area/hp.nside2pixarea(nside, degrees=True))

In [None]:
corr = np.corrcoef(pixeled_nc)

See how strong correlated the different mass bins are:

In [None]:
plt.figure(figsize=(6,6))
colors = plt.pcolor(corr)
plt.colorbar(colors)
plt.xlabel('mass bin')
plt.ylabel('mass bin')

Plot number counts with variance as errorbars

In [None]:
plt.figure(figsize=(10,6))
plt.errorbar(10**mid_logmass, nc_meas, np.sqrt(np.diag(cov)), ls='', capsize=3)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('M200,c [Msun]', size=12)
plt.ylabel('Number of halos', size=12)
print(f"Total number of halos in z=[{zmin},{zmax}] and M=[{mmin/1.e14:.2f}, {mmax/1.e15}] x 1e15 Msun: {len(data):,}")

## Prediction using CCL and the Tinker08 and Bocquet16 mass functions

In [None]:
# Define CCL Cosmology from SkySim cosmology
cosmo = ccl.Cosmology(Omega_c=cosmo_ss.Om0-cosmo_ss.Ob0, Omega_b=cosmo_ss.Ob0,
                      h=cosmo_ss.h, sigma8=cosmo_ss.sigma8, n_s=cosmo_ss.n_s, Neff=3.04)

print(cosmo)


### Differential comoving volume

In [None]:
def dV_over_dOmega_dz(z):
    a = 1./(1. + z)
    da = ccl.background.angular_diameter_distance(cosmo, a) 
    E = ccl.background.h_over_h0(cosmo, a)
    return ((1.+z)**2)*(da**2)*ccl.physical_constants.CLIGHT_HMPC/cosmo['h']/E 

In [None]:
### Sanity check - comparison to the volume computed with Astropy
dV_over_dOmega_dz(0.3), cosmo_ss.differential_comoving_volume(0.3).value 

### CCL mass functions

In [None]:
hmd_200c = ccl.halos.MassDef(200, 'critical')
def tinker08(logm, z):
    mass = 10**(logm)
    hmf_200c = ccl.halos.MassFuncTinker08(cosmo, mass_def=hmd_200c)
    nm = hmf_200c.get_mass_function(cosmo, mass, 1./(1+z))
    return nm # dn/dlog10M

hmd_200c = ccl.halos.MassDef200c()

def bocquet16(logm, z):
    mass = 10**(logm)
    hmf_200c = ccl.halos.MassFuncBocquet16(cosmo, mass_def=hmd_200c)
    nm = hmf_200c.get_mass_function(cosmo, mass, 1./(1+z))
    return nm # dn/dlog10M

In [None]:
def integrand_tinker08(logm,z):
    return tinker08(logm, z)*dV_over_dOmega_dz(z)

def integrand_bocquet16(logm,z):
    return bocquet16(logm, z)*dV_over_dOmega_dz(z)

### Solid angle of the `small` catalog = 50 deg2

Need to check if it is exactly 50 deg2 or if this has been rounded

In [None]:
DeltaOmega = skysim_cat.sky_area * np.pi**2/180**2

In [None]:
# Loop over mass bins defined by plt.hist and predict the corresponding number of haloes
# for Tinker 08
N_predicted_T08 = np.array([
    scipy.integrate.dblquad(integrand_tinker08, zmin, zmax, 
                            lambda x:logmmin, lambda x:logmmax,
                            epsabs=1.e-4, epsrel=1.e-4)[0]
    for logmmin, logmmax in zip(logmass_bins, logmass_bins[1:])
])

In [None]:
# Loop over mass bins defined by plt.hist and predict the corresponding number of haloes
# for Bocquet 16
N_predicted_B16 = np.array([
    scipy.integrate.dblquad(integrand_bocquet16, zmin, zmax, 
                            lambda x:logmmin, lambda x:logmmax,
                            epsabs=1.e-4, epsrel=1.e-4)[0]
    for logmmin, logmmax in zip(logmass_bins, logmass_bins[1:])
])

## Plot measured versus predicted number of haloes

In [None]:
plt.figure(figsize=(10,6))
plt.errorbar(10**mid_logmass, nc_meas, np.sqrt(np.diag(cov)), ls='', capsize=3, 
            label=f'SkySim5000, {skysim_cat.sky_area:.2f} deg2 field, z=[{zmin},{zmax}]')
plt.plot(10**mid_logmass, N_predicted_T08*DeltaOmega, label='T08, CCL')
plt.plot(10**mid_logmass, N_predicted_B16*DeltaOmega, label='B16, CCL')
plt.legend(fontsize=12)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('$M_{200c}$ [M$_\odot$]', size=14)
plt.ylabel('Number of haloes', size=14)

# Gaussian likelihood

\begin{equation}
\mathcal{L} = \frac{1}{\sqrt{det(2\pi Cov)}}\exp{\left[-\frac{1}{2}(Obs-Theo)Cov^{-1}(Obs-Theo)^T\right]}
\end{equation}

In [None]:
def lnlike(obs, theo, cov):
    diff = obs-theo
    icov = np.linalg.inv(cov)
    chi2 = np.dot(diff, np.dot(icov, diff))
    return -0.5*chi2 - 0.5*np.log(np.linalg.det(cov))

In [None]:
lnlike(nc_meas, np.array(N_predicted_T08)*DeltaOmega, cov), lnlike(nc_meas, np.array(N_predicted_B16)*DeltaOmega, cov)