# Rascunho

In [1]:
import math

import sys

import os
import numpy as np
import pandas as pd

from astropy.io import fits
from astropy.table import Table

import matplotlib
from matplotlib import pyplot as plt
# %matplotlib inline

sys.path.insert(0, "/global/homes/c/cinlima/gcrcatalogs-new/gcr-catalogs")
sys.path.insert(0, "global/homes/c/cinlima/clevar/clevar")

import GCRCatalogs
#Put nerc if using nersc
GCRCatalogs.set_root_dir_by_site("nersc")

from numcosmo_py import Ncm, Nc
from numcosmo_py.external.pyssc import pyssc as PySSC
Ncm.cfg_init()

# from clevar import ClCatalog, MemCatalog
# from clevar.match import MembershipMatch, get_matched_pairs

# from clevar.match import output_matched_catalog
# import clevar
# clevar.__version__


# matplotlib.rcParams.update({'font.size': 22})
# matplotlib.rcParams.update({'figure.figsize': (10,8)})

sys.path.insert(0, 'NumCosmo/notebooks/richness_proxy/CatalogsMatching/')


In [2]:
# gc = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_redmapper_v0.8.1')


In [3]:
# mdata = Table.read('CatalogsMatching/test.fits')
# data = mdata[len(mdata['mass']) > 0]
# mdata

test_matchdata = pd.read_csv('/global/homes/c/cinlima/MatchingCatalogs/match_catalog.csv',  sep = ';')

rich_data = test_matchdata["richness"]
z_data = test_matchdata["redshift_y"]
mass_data = test_matchdata["halo_mass"]

data = Table([mass_data, rich_data, z_data], names=('mass', 'richness', 'redshift'))
rich_data

0        190.706650
1        179.128130
2        124.027150
3        167.336030
4        111.914700
            ...    
39595      5.251925
39596     17.501669
39597      5.251925
39598     10.819990
39599      5.282434
Name: richness, Length: 39600, dtype: float64

In [4]:
#Data threshold
logMmin = 13.0
logMmax = 15.0
zmin    = 0.0
zmax    = 1.2

area = 439.78987 

cosmo = Nc.HICosmoDEXcdm()
cosmo.omega_x2omega_k()
cosmo.param_set_by_name("H0", 67.74)
cosmo.param_set_by_name("Omegab", 0.0486)
cosmo.param_set_by_name("Omegac",0.3089 - 0.0486) #0.2603
cosmo.param_set_by_name("Omegak", 0.00)
cosmo.param_set_by_name("w", -1.0) #-1.0

prim = Nc.HIPrimPowerLaw.new()
prim.param_set_by_name("ln10e10ASA",3.0116707481421923)
prim.param_set_by_name("n_SA", 0.9667)

cosmo.add_submodel(prim)

dist = Nc.Distance.new(2.0)

tf = Nc.TransferFuncEH()

psml = Nc.PowspecMLTransfer.new(tf)
psml.require_kmin(1.0e-6)
psml.require_kmax(1.0e3)

psf = Ncm.PowspecFilter.new(psml, Ncm.PowspecFilterType.TOPHAT)
psf.set_best_lnr0()

# cosmo.set_property("Omegac_fit", True)
# cosmo.set_property("w_fit", True)
# prim.set_property("ln10e10ASA_fit", False)

#mass proxy
# cluster_m = Nc.ClusterMassNodist(lnM_min=np.log(10) * (logMmin - np.log10(0.6774)), lnM_max=np.log(10.0) * (logMmax - np.log10(0.6774)))
cluster_m = Nc.ClusterMassLnrichExt(use_ln1pz = True)
# cluster_m = Nc.ClusterMassAscaso()


cmparam = [("mu", 4.301849), ("muM1", 1.463218), ("muZ1", 0.702439), 
           ("muM2", 0.226122), ("muZ2", -1.029253), ("muMZ", 0.282406), 
           ("sigma0", 0.255932), ("sigmaM1", -0.466271), ("sigmaZ1", -0.298706),
           ("sigmaM2", -0.058500), ("sigmaZ2", 0.1567059), ("sigmaMZ", -0.0998257 )]

for p, v in cmparam:
    cluster_m.param_set_by_name(p, v)


#photo-z
cluster_z = Nc.ClusterRedshiftNodist (z_min = zmin, z_max = zmax)

mulf = Nc.MultiplicityFuncTinker.new()
mulf.set_mdef(Nc.MultiplicityFuncMassDef.CRITICAL)

hmf = Nc.HaloMassFunction.new(dist, psf, mulf)    
hmf.prepare(cosmo)
hmf.set_area(area * (np.pi / 180) ** 2)

cad = Nc.ClusterAbundance.new(hmf, None)
cad.prepare(cosmo , cluster_z , cluster_m)

mset = Ncm.MSet.new_array([cosmo, cluster_m, cluster_z])
ncdata = Nc.DataClusterNCount.new(cad, "NcClusterRedshiftNodist", "NcClusterMassLnrichExt")
ncdata.set_init(True)

lnM = np.log(np.array(mass_data))
redshift = np.array(z_data)

ncdata.set_lnM_obs(Ncm.Matrix.new_array(lnM, 1))
ncdata.set_z_obs(Ncm.Matrix.new_array(redshift, 1))
ncdata.set_lnM_true(Ncm.Vector.new_array(lnM))
ncdata.set_z_true(Ncm.Vector.new_array(redshift))
               
dset = Ncm.Dataset.new_array([ncdata])
lh = Ncm.Likelihood.new(dset)



In [5]:
mset["NcHICosmo"].param_set_desc("Omegac", {"fit": True})
mset["NcHICosmo"].param_set_desc("w", {"fit": True})
# mset.param_set_all_ftype(Ncm.ParamType.FREE)
mset.prepare_fparam_map()
mset.pretty_log()

#----------------------------------------------------------------------------------
# Model[03000]:
#   - NcHICosmo : XCDM - Constant EOS
#----------------------------------------------------------------------------------
# Model parameters
#   -         H0[00]:  67.74               [FIXED]
#   -     Omegac[01]:  0.2603              [FREE]
#   -     Omegak[02]:  0                   [FIXED]
#   -    Tgamma0[03]:  2.7245              [FIXED]
#   -         Yp[04]:  0.24                [FIXED]
#   -       ENnu[05]:  3.046               [FIXED]
#   -     Omegab[06]:  0.0486              [FIXED]
#   -          w[07]: -1                   [FREE]
#----------------------------------------------------------------------------------
# Model[05000]:
#   - NcHIPrim : Power Law model for primordial spectra
#----------------------------------------------------------------------------------
# Model parameters
#   - ln10e10ASA[00]:  3.01167074814219    [FIXED]
#   - T_SA_ratio[01]:  0.2                 

In [6]:
fit = Ncm.Fit.factory( Ncm.FitType.NLOPT, "ln-neldermead", lh, mset, Ncm.FitGradType.NUMDIFF_CENTRAL )


In [7]:
fit.run_restart(Ncm.FitRunMsgs.SIMPLE, 1.0e-3, 0.0, None, None)


#----------------------------------------------------------------------------------
# Model fitting. Interating using:
#  - solver:            NLOpt:ln-neldermead
#  - differentiation:   Numerical differentiantion (central)
...................
#  Minimum found with precision: |df|/f =  1.00000e-08 and |dx| =  1.00000e-05
#  Elapsed time: 00 days, 00:00:44.7103010
#  iteration            [000107]
#  function evaluations [000109]
#  gradient evaluations [000000]
#  degrees of freedom   [039598]
#  m2lnL     =      24411540.852412 (      24411541 )
#  Fit parameters:
#     0.0624839816873096    -4.05059969893824      
#----------------------------------------------------------------------------------
# Restarting:              yes
#  - absolute improvement: 28037.8845808879      
#  - relative improvement: 0.00114723272780673   
#  - m2lnL_0  :            24439578.7369929      
#  - m2lnL_1  :            24411540.852412       
#-------------------------------------------------------------

True

#  Minimum found with precision: |df|/f =  1.00000e-08 and |dx| =  1.00000e-05
#  Elapsed time: 00 days, 00:00:09.2120710
#  iteration            [000021]
#  function evaluations [000023]
#  gradient evaluations [000000]
#  degrees of freedom   [039598]
#  m2lnL     =      24411540.852412 (      24411541 )
#  Fit parameters:
#     0.0624839816873096    -4.05059969893824      
#----------------------------------------------------------------------------------
# Restarting:              no
#  - absolute improvement: 0                     
#  - relative improvement: 0                     
#  - m2lnL_1  :            24411540.852412       
#  - m2lnL_2  :            24411540.852412       


In [8]:
fit.log_info()

#----------------------------------------------------------------------------------
# Data used:
#   - Cluster abundance unbinned
#----------------------------------------------------------------------------------
# Model[03000]:
#   - NcHICosmo : XCDM - Constant EOS
#----------------------------------------------------------------------------------
# Model parameters
#   -         H0[00]:  67.74               [FIXED]
#   -     Omegac[01]:  0.0624839816873096  [FREE]
#   -     Omegak[02]:  0                   [FIXED]
#   -    Tgamma0[03]:  2.7245              [FIXED]
#   -         Yp[04]:  0.24                [FIXED]
#   -       ENnu[05]:  3.046               [FIXED]
#   -     Omegab[06]:  0.0486              [FIXED]
#   -          w[07]: -4.05059969893824    [FREE]
#----------------------------------------------------------------------------------
# Model[05000]:
#   - NcHIPrim : Power Law model for primordial spectra
#------------------------------------------------------------------

In [None]:
N_WALKERS = 1200
N_RUN = 500
  
Ncm.func_eval_set_max_threads(2)
Ncm.func_eval_log_pool_stats()

init_sampler = Ncm.MSetTransKernGauss.new(0)
init_sampler.set_mset(mset)
init_sampler.set_prior_from_mset()
init_sampler.set_cov_from_rescale(1.0)

apes = Ncm.FitESMCMCWalkerAPES.new(N_WALKERS, mset.fparams_len())

esmcmc = Ncm.FitESMCMC.new(fit, N_WALKERS, init_sampler, apes, Ncm.FitRunMsgs.FULL)
esmcmc.set_nthreads(2)
esmcmc.set_data_file('test1_mcmc')

esmcmc.start_run()
esmcmc.run(N_RUN)  
esmcmc.end_run()

esmcmc.mean_covar()

# NcmThreadPool:Unused:      0
# NcmThreadPool:Max Unused:  2
# NcmThreadPool:Running:     0
# NcmThreadPool:Unprocessed: 0
# NcmThreadPool:Unused:      2
#----------------------------------------------------------------------------------
# NcmFitESMCMC: Starting Ensemble Sampler Markov Chain Monte Carlo.
#   Number of walkers: 1200.
#   Number of threads: 0002.
#   Using MPI:         no - use MPI enabled but no slaves available.
#----------------------------------------------------------------------------------
# Data used:
#   - Cluster abundance unbinned
#----------------------------------------------------------------------------------
# NcmFitESMCMC: Model set:
#----------------------------------------------------------------------------------
# Model[03000]:
#   - NcHICosmo : XCDM - Constant EOS
#----------------------------------------------------------------------------------
# Model parameters
#   -         H0[00]:  67.74               [FIXED]
#   -     Omegac[01]:  0.06248398

In [None]:
# experiment = Ncm.ObjDictStr()
# experiment.set("likelihood", likelihood)
# experiment.set("model-set", mset)

In [None]:
# ser = Ncm.Serialize.new(Ncm.SerializeOpt.CLEAN_DUP)
# ser.dict_str_to_yaml_file(experiment, "test2.yaml")
# ser.array_to_yaml_file(mfunc_oa, "test2.functions.yaml")

In [None]:
## Halo catalog m200c --------------------------------------------------------------------------------------------
# DC2_halos_m200c = fits.open(
#     "/global/cfs/projectdirs/lsst/groups/CL/cosmoDC2_v1.1.4/extragal/full/halos/halos_m200c_13.0.fits"
# )
# dt_halos = Table(DC2_halos_m200c[1].data)
# # dt_halos['redshift_true', 'mass_fof', 'm200c', 'richness']



## Clevar catalog ------------------------------------------------------------------------------------------------

# min_richness = 5
# min_halo_mass = 1e13 #Msun

# #Halos/Clusters
# #SkySim + RedMaPPer: 'skysim5000_v1.1.1_redmapper_v0.8.5'
# RM_cat = ClCatalog.read('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/SSData/RM_cat_richness_min_'+str(min_richness)+'.fits', name='RM_cat', full=True, tags={'mass':'richness'})

# #SkySim: 'skysim5000_v1.1.1_image'
# halo_cat = ClCatalog.read('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/SSData/halo_cat_mass_min_'+f"{min_halo_mass:1.2e}"+'.fits', name='halo_cat', full=True, tags={'mass':'mass_fof'})

# #Members
# RM_cat.read_members('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/SSData/RM_cat_richness_min_'+str(min_richness)+'_members.fits',  full=True)
# halo_cat.read_members('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/SSData/halo_cat_mass_min_'+f"{min_halo_mass:1.2e}"+'_members.fits', full=True)

# #Match catalogs
# mt = MembershipMatch()

# mt.load_matches(RM_cat, halo_cat, out_dir='/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/SSData/SS_RM_match_cross_membership')

# RM_cat_bij, halo_cat_bij = get_matched_pairs(RM_cat, halo_cat, 'cross')

# clusters_dt = Table([halo_cat_bij['mass_fof'], RM_cat_bij['richness'], RM_cat_bij['z']], names=('m200c', 'richness', 'redshift_true'))

# dt_halos = clusters_dt

In [None]:
# nwalkers = 1000
# burnin = 40

# ntests = 100

# mcat = Ncm.MSetCatalog.new_from_file_ro("richness_cut/asc_rmin_"+str(rich_cut)+".fits", nwalkers * burnin)

# mcat.log_current_chain_stats()
# mcat.calc_max_ess_time(ntests, Ncm.FitRunMsgs.SIMPLE)
# mcat.calc_heidel_diag(ntests, 0.0, Ncm.FitRunMsgs.SIMPLE)

# # mset.pretty_log()
# # mcat.log_full_covar()
# mcat.log_current_stats()

# be2, post_lnnorm_sd = mcat.get_post_lnnorm()
# lnevol, glnvol = mcat.get_post_lnvol(0.6827)

# Ncm.cfg_msg_sepa()
# print(
#     "# Bayesian evidence:                                 % 22.15g +/- % 22.15g"
#     % (be2, post_lnnorm_sd)
# )
# print("# 1 sigma posterior volume:                          % 22.15g" % lnevol)
# print("# 1 sigma posterior volume (Gaussian approximation): % 22.15g" % glnvol)

In [None]:
# min_richness = 5
# min_halo_mass = 1e13 #Msun

# #Halos/Clusters
# #CosmoDC2 + RedMaPPer: 'cosmoDC2_v1.1.4_redmapper_v0.8.1'
# RM_cat = ClCatalog.read('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/CDC2Data/RM_cat_richness_min_'+str(min_richness)+'.fits', name='RM_cat', full=True, tags={'mass':'richness'})

# #CosmoDC2: 'cosmoDC2_v1.1.4_image'
# halo_cat = ClCatalog.read('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/CDC2Data/halo_cat_mass_min_'+f"{min_halo_mass:1.2e}"+'.fits', name='halo_cat', full=True, tags={'mass':'mass_fof'})

# #Members
# RM_cat.read_members('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/CDC2Data/RM_cat_richness_min_'+str(min_richness)+'_members.fits',  full=True)
# halo_cat.read_members('/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/CDC2Data/halo_cat_mass_min_'+f"{min_halo_mass:1.2e}"+'_members.fits', full=True)

# #Match catalogs
# mt = MembershipMatch()

# mt.load_matches(RM_cat, halo_cat, out_dir='/global/homes/c/cinlima/NumCosmo/notebooks/richness_proxy/CDC2Data/cosmoDC2_RM_match_cross_membership')

# #Matched Data
# RM_cat_bij, halo_cat_bij = get_matched_pairs(RM_cat, halo_cat, 'cross')

# cdc2_dt = Table([halo_cat_bij['mass_fof'], RM_cat_bij['richness'], RM_cat_bij['z']], names=('m200c', 'richness', 'redshift_true'))
# dt_halos = cdc2_dt


# fit = catalog_fit(dt_halos, ascaso=False)


In [None]:
# nwalkers = 1500

# Ncm.func_eval_set_max_threads(2)
# Ncm.func_eval_log_pool_stats()

# init_sampler = Ncm.MSetTransKernGauss.new(0)
# init_sampler.set_mset(mset)
# init_sampler.set_prior_from_mset()
# init_sampler.set_cov_from_rescale(1.0)

# apes = Ncm.FitESMCMCWalkerAPES.new(nwalkers, mset.fparams_len())

# esmcmc = Ncm.FitESMCMC.new(fit, nwalkers, init_sampler, apes, Ncm.FitRunMsgs.FULL)
# esmcmc.set_nthreads(2)
# esmcmc.set_data_file("mcmc2.fits")

# esmcmc.start_run()
# esmcmc.run(1000)  
# esmcmc.end_run()

# esmcmc.mean_covar()


In [None]:
# fit.log_covar()

In [None]:
# data_fit_full = pd.DataFrame(fits.open("mcmc2.fits")[1].data).iloc[:, 1:14].T
# data_fit_void = np.array(data_fit_full)
# data_fit = []
# for item in data_fit_void:
#     arr= np.array(item)
#     data_fit.append(np.asarray(arr.tolist()))

# names = [
#     '1',
#     '2',
#     '3',
#     '4',
#     '5',
#     '6',
#     '7',
#     '8',
#     '9',
#     '10',
#     '11',
#     '12',
#     '13'
# ]
# labels=["\mu_0", "\mu_{M1}", "\mu_{M2}", "\mu_{Z1}"," \mu_{Z2}", "\mu_{MZ}", "\sigma_0", "\sigma_{M1}", "\sigma_{M2}", "\sigma_{Z1}", "\sigma_{Z2}", "\sigma_{MZ}","A_0" ]
# settings = {
#     "mult_bias_correction_order": 0,
#     "smooth_scale_2D": 3,
#     "smooth_scale_1D": 3,
#     "boundary_correction_order": 0,
# }
# samples3 = MCSamples(samples=data_fit, names=names, labels=labels, settings=settings)
# samples3.removeBurn(0.3)

In [None]:
# # Triangle plot
# g2 = plots.get_subplot_plotter()

# g2.settings.lab_fontsize = 40
# g2.settings.legend_fontsize = 40
# g2.settings.axes_fontsize = 30

# g2.triangle_plot(
#     [samples3],
#     filled=True,
#     contour_ls="-",
#     contour_lws=1,
#     fine_bins=1,
#     colors=['#b186f1'],
#     fontsize=70,
#     line_args=[{'lw':1.2,'ls':'-', 'color':'#b186f1'}],
#     legend_labels=["Estendido", ],
# )
# plt.savefig('mcmc2.png')
# plt.show()


In [None]:
# for i in range(1, 14):
#     display(Math(samples3.getInlineLatex(str(i),limit=1)))


In [None]:
# Mset Catalog functions

# N_WALKERS = 1200
# N_RUN = 100
# BURNIN = 50

# MODEL = 'ext'
# FILE_NAME1 = "mass_richness_cut/"+MODEL+"_rmin_"+str(20)+"_mmin_"+str(1e14)+".fits"    
# mcat1 = Ncm.MSetCatalog.new_from_file_ro(FILE_NAME1, N_WALKERS * BURNIN)
# mset1 = mcat1.get_mset()
# muM2 = mset1.fparam_get(2)

# bla = mcat1.peek_pstats()
# print(bla.get_sd(3))
# bla.peek_mean().log_vals('blabla', '%.2f', )

# mcat1.log_full_covar()  
# pi = mcat1.fparam_get_pi_by_name("muM2")
# print(pi)
# mum2 = mset.param_get(pi.mid, pi.pid)

# print(mcat1.peek_current_e_var().dup_array())

# print(mcat1.peek_pstats().get_sd(4))

# mcmc_sd = [mcat1.peek_pstats().get_sd(3) for in range (13) 
# mcat1_bestfit = mcat1.get_bestfit_row().dup_array()
# print(mcat1_bestfit)
# print(mcat1.get_mean().dup_array())
# print(mcat1_bestfit[4], mset1.fparam_get(3))
# mcat1.calc_ci_direct(0.9545)



In [None]:
# rich_cut_list = [5,20,50]

# bef = pd.DataFrame(data = {'Min_Richness': [], 'BEQ': [], 'BEQ Err': [], 'BEL': [], 'BEL Err': [], 'BF': []})

# for rcut in rich_cut_list:
    
#     N_WALKERS = 1200
#     N_RUN = 100
#     BURNIN = 50
    
#     RICH_CUT = rcut
    
#     MODEL = 'ext'
#     FILE_NAME1 = "richness_cut/"+MODEL+"_rmin_"+str(RICH_CUT)+".fits"

#     MODEL2 = 'asc'
#     FILE_NAME2 = "richness_cut/"+MODEL2+"_rmin_"+str(RICH_CUT)+".fits"
    
#     mcat1 = Ncm.MSetCatalog.new_from_file_ro(FILE_NAME1, N_WALKERS * BURNIN)
#     be1, post_lnnorm_sd1 = mcat1.get_post_lnnorm()
#     # lnevol1, glnvol1 = mcat1.get_post_lnvol(0.6827)

#     mcat2 = Ncm.MSetCatalog.new_from_file_ro(FILE_NAME2, N_WALKERS * BURNIN)
#     be2, post_lnnorm_sd2 = mcat2.get_post_lnnorm()
#     # lnevol2, glnvol2 = mcat2.get_post_lnvol(0.6827)

#     bf = np.exp(be1 - be2)
#     bef = pd.concat([bef, pd.DataFrame([{'Min_Richness': rcut, 'BEQ': be1, 'BEQ Err': post_lnnorm_sd1, 'BEL': be2, 'BEL Err':post_lnnorm_sd2, 'BF': bf}])], ignore_index=True)


### Ext (z) Fit Params


Here we do the fitting of quadratic model parameters using training data in **data_train**. Dependence on $z$ is considered:

\begin{equation}\label{M_R_3}
\begin{split}
    \left<\ln R | M, z \right> = \mu_0 + 
    \mu_{M1} \ln\left( \frac{M}{M_0} \right) +
    \mu_{M2} (\ln\left(\frac{M}{M_0} \right))^2 +\\
    + \mu_{z1} \left(\frac{ z}{z_0}\right) +
    \mu_{z2} \left(\frac{z}{z_0}\right)^2 +
    \mu_{Mz} \left(\frac{z}{z_0}\right)\ln\left(\frac{M}{M_0} \right);
\end{split}
\end{equation}

$$\textbf{p}_{2} = \{\mu_0, \mu_{M1}, \mu_{M2}, \mu_{Z1}, \mu_{Z2}, \mu_{MZ}, \sigma_0, \sigma_{M1}, \sigma_{M2}, \sigma_{Z1}, \sigma_{Z2}, \sigma_{MZ} \}.$$

In [None]:
# lnR_mean_ext_z, lnR_std_ext_z, ext_z, data_train, data_test = fm.model_fit('ext_z', training = True)