## FKP Weight Adjustment for Redshift Catastrophics

This notebook focuses on adjusting FKP weights in DESI mock data for different tracer, addressing redshift catastrophic types (`Z_failures`).
- Calculation of FKP weight adjustments by `FKPupdate` for redshift discrepancies in catastrophic cases
- Optimized implementation with `FKPupdate_opt` for optimization version
- Comparison of FKP weight outputs across standard and optimized approaches

In [1]:
import os
import glob
import time
import fitsio
import argparse
import numpy as np
from astropy.io import fits
from astropy.table import Table
from matplotlib import pyplot as plt
from scipy.spatial import cKDTree

plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['legend.fontsize'] = 14
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
%load_ext line_profiler

In [2]:
# from mpytools import Catalog

In [3]:
c = 299792 # speed of light in km/s
P0_values = {'QSO': 6000, 'LRG': 10000, 'ELG_LOPnotqso': 4000, 'BGS': 7000}
NRAN_values = {'ELG_LOPnotqso':10, 'LRG':8, 'QSO':4}

In [4]:
def NXnorm(catalog_fn, nz_fn):
    #Calculate the NXnorm for the catalog, norm = NX/n(z)
    catalog = Table(fitsio.read(catalog_fn)) 
    nz      = np.loadtxt(nz_fn)
    tree    = cKDTree(nz[:, 0].reshape(-1, 1))
    _, ind_rawNZ = tree.query(catalog['Z'].reshape(-1, 1))
    norm         = catalog['NX']/nz[ind_rawNZ,3]
    return norm

def FKPupdate_opt(tracer, catalog_fn, nz_fn, catas_type, norm):
    P0 = P0_values.get(tracer, None)
    NRAN = NRAN_values.get(tracer, None)
    T0 = time.time()
    catalog=Table(fitsio.read(catalog_fn))
    # Check for Z_{catas_type} existence
    if f'Z_{catas_type}' not in catalog.colnames:
        raise ValueError(f"Invalid Zcatas type: '{catas_type}'.")
    catalog[f'FKP_{catas_type}'] = catalog['WEIGHT_FKP'].copy()
    # Load the nz_catas and create the cKDTree
    nz = np.loadtxt(nz_fn)
    tree = cKDTree(nz[:, 0].reshape(-1, 1)) 
    # caluclate the completeness rescaling of nz for FKP weight
    dv = (catalog[f'Z_{catas_type}'] - catalog['Z']) / (1 + catalog['Z']) * c
    dz = catalog[f'Z_{catas_type}'] - catalog['Z']
    tmp      = np.argsort(catalog['RA'], kind='mergesort')
    # tmp      = np.argsort(catalog,order=['RA', 'DEC'])
    catalog  = catalog[tmp]
    norm     = norm[tmp]
    dv       = dv[tmp]
    NX       = catalog['NX'].copy()
    norm[norm==0] = np.nan
    print('there are {} samples to find new FKP'.format(np.sum((dv!=0)&(np.isnan(norm)))))
    for ID in np.where((dv!=0)&(np.isnan(norm)))[0]:
        if (2<ID)&(ID<len(catalog)-2):
            norm[ID] = np.nanmedian(norm[[ID-2,ID-1,ID+1,ID+2]])
        elif ID<2:
            norm[ID] = np.nanmedian(norm[[ID+1,ID+2]])
        elif ID>len(catalog)-2:
            norm[ID] = np.nanmedian(norm[[ID-2,ID-1]])
        # update NX for norm ==0
        # _, ind_ID = tree.query(catalog[f'Z_{catas_type}'][ID].reshape(-1, 1))
        ind_ID = np.argmin(abs(catalog[f'Z_{catas_type}'][ID]-nz[:,0]))
        NX[ID] = norm[ID]*nz[ind_ID,3]
    # update NX and WEIGHT_FKP columns for all catastrophics
    sel = dv != 0
    _, ind_newNZ = tree.query(catalog[f'Z_{catas_type}'][sel].reshape(-1, 1))
    NX[sel] = norm[sel] * nz[ind_newNZ, 3]
    catalog[f'FKP_{catas_type}'][sel] = 1 / (NX[sel] * P0 + 1)
    catalog[f'FKP_{catas_type}'][np.isnan(catalog[f'FKP_{catas_type}'])] = 1
    print('implement {} catastrophophics took time: {:.2f}s'.format(catas_type, time.time()-T0))
    # catalog.write(catalog_fn, overwrite=True)
    print(f'{catas_type} catastrophics FKP corrected')
    return(catalog[f'FKP_{catas_type}'])


In [5]:
mock_fn   = '/pscratch/sd/s/shengyu/mocks/Y1/Abacus_v4_2/altmtl0/iron/mock0/LSScats/'

catas_type = 'failures'

catalog_fn = './example_files/ELG_LOPnotqso_NGC_0_clustering.ran.fits'
nz_fn = './example_files/ELG_LOPnotqso_NGC_nz.txt'
nz_catas_fn = f'./example_files/ELG_LOPnotqso_NGC_nz_{catas_type}.txt'

# catalog_fn = mock_fn+'./ELG_LOPnotqso_NGC_3_clustering.ran.fits'
# nz_fn = mock_fn+'./ELG_LOPnotqso_NGC_nz.txt'
# nz_catas_fn = mock_fn+f'./ELG_LOPnotqso_NGC_nz_{catas_type}.txt'

norm = NXnorm(catalog_fn, nz_fn)
# a=FKPupdate_opt('ELG_LOPnotqso', catalog_fn, nz_catas_fn, catas_type, norm)

In [6]:
# test the cKDTree and the nz
catalog = Table(fitsio.read(catalog_fn)) 
nz_catas= np.loadtxt(nz_catas_fn)
tree    = cKDTree(nz_catas[:, 0].reshape(-1, 1))
_, ind_rawNZ = tree.query(catalog[f'Z_{catas_type}'].reshape(-1, 1))

index = np.random.randint(0, len(ind_rawNZ), size=1)
print('Z_write:',catalog[f'Z_{catas_type}'][index][0])
print('Z_in_nz:',nz_catas[ind_rawNZ,0][index][0])
print(f'nz_{catas_type}:',nz_catas[ind_rawNZ,3][index][0])
# plt.scatter(catalog[f'Z'], norm, lw=0.001)

Z_write: 1.5484557
Z_in_nz: 1.5449999570846558
nz_failures: 0.0002415715837234574


In [8]:
# find the FKP difference galaxy index
catalog = Table(fitsio.read(catalog_fn))
FKP_diff_indix = np.nonzero(catalog['WEIGHT_FKP']-catalog[f'FKP_{catas_type}'])
print(FKP_diff_indix[0])

[     27     130     420 ... 9825351 9825378 9825602]


In [9]:
#test if the FKP weight is set correctly
index = 591
print('FKP_init:', catalog['WEIGHT_FKP'][index])
print(f'FKP_catas:',catalog[f'FKP_{catas_type}'][index])

P0 = P0_values.get('ELG_LOPnotqso', None)
nz_catas= np.loadtxt(nz_catas_fn)
tree    = cKDTree(nz_catas[:, 0].reshape(-1, 1))
_, ind_rawNZ = tree.query(catalog[f'Z_{catas_type}'][index].reshape(-1, 1))
# print(nz_catas[ind_rawNZ,3])

FKP = 1/(1+P0*nz_catas[ind_rawNZ,3]*norm[index])
print('calculate:', FKP[0])

FKP_init: 0.5736912128468565
FKP_catas: 0.5736912128468565
calculate: 0.5749241528103981


In [10]:
%lprun -f FKPupdate_opt FKPupdate_opt('ELG_LOPnotqso', catalog_fn, nz_catas_fn, catas_type, NXnorm(catalog_fn, nz_catas_fn)) #31262895.0 6841092.0

there are 4240 samples to find new FKP
implement failures catastrophophics took time: 5.84s
failures catastrophics FKP corrected


Timer unit: 1e-09 s

Total time: 5.82878 s
File: /tmp/ipykernel_2300783/2626825290.py
Function: FKPupdate_opt at line 10

Line #      Hits         Time  Per Hit   % Time  Line Contents
    10                                           def FKPupdate_opt(tracer, catalog_fn, nz_fn, catas_type, norm):
    11         1       5491.0   5491.0      0.0      P0 = P0_values.get(tracer, None)
    12         1       1473.0   1473.0      0.0      NRAN = NRAN_values.get(tracer, None)
    13         1       4558.0   4558.0      0.0      T0 = time.time()
    14         1 3626056086.0    4e+09     62.2      catalog=Table(fitsio.read(catalog_fn))
    15                                               # Check for Z_{catas_type} existence
    16         1      21040.0  21040.0      0.0      if f'Z_{catas_type}' not in catalog.colnames:
    17                                                   raise ValueError(f"Invalid Zcatas type: '{catas_type}'.")
    18         1   82557822.0    8e+07      1.4      catalog