In [None]:
import GCRCatalogs
import pandas as pd

In [None]:
import os
os.environ['GCR_CONFIG_SOURCE']="files"
GCRCatalogs.get_available_catalog_names(name_contains="dc2")

In [None]:
dc2sim = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_small')

In [None]:
dc2sim.list_all_native_quantities()

In [None]:
dc2sim_relevantcols = [f"LSST_filters/magnitude:LSST_{b}:observed" for b in "ugrizy"] + ['baseDC2/redshift']
dc2sim_quantities = dc2sim.get_quantities(dc2sim_relevantcols, native_filters=['healpix_pixel == 9816'])
dc2sim_df = pd.DataFrame(dc2sim_quantities) #.sample(n=200000)

In [None]:
dc2sim_df

In [None]:
from rail.core.data import PqHandle
from rail.core.stage import RailStage

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from jax import numpy as jnp

In [None]:
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

In [None]:
rename_dict = {f"LSST_filters/magnitude:LSST_{b}:observed":f"mag_{b}_lsst" for b in "ugrizy"}
rename_dict.update({"baseDC2/redshift": "redshift"})

band_dict = {b:f"mag_{b}_lsst" for b in "ugrizy"}

data_df = dc2sim_df.rename(columns=rename_dict) #.drop(columns=dropped_cols).rename(columns=rename_dict)
data_df

In [None]:
data_truth = PqHandle('input')
data_truth.set_data(data_df)

## Degrader 1 : LSST photometric Error 

In [None]:
from rail.creation.degraders.photometric_errors import LSSTErrorModel

In [None]:
errorModel_lsst = LSSTErrorModel.make_stage(
    name="error_model",
    renameDict=band_dict,
    ndFlag=np.nan) #, extendedType="auto")
samples_w_errs = errorModel_lsst(data_truth)

In [None]:
samples_w_errs()

## Degrader 2 : Quantity Cut (magnitude)

Recall how the sample above has galaxies as dim as magnitude 30. This is well beyond the LSST 5-sigma limiting magnitudes, so it will be useful to apply cuts to the data to filter out these super-dim samples. We can apply these cuts using the `QuantityCut` degrader. This degrader will cut out any samples that do not pass all of the specified cuts.

Let's make and run degraders that first adds photometric errors, then cuts at i<25.3, which is the LSST gold sample.

If you look at the i column, you will see there are no longer any samples with i > 25.3. The number of galaxies returned has been nearly cut in half from the input sample and, unlike the LSSTErrorModel degrader, is not equal to the number of input objects.  Users should note that with degraders that remove galaxies from the sample the size of the output sample will not equal that of the input sample.

One more note: it is easy to use the QuantityCut degrader as a SNR cut on the magnitudes. The magnitude equation is $m = -2.5 \log(f)$. Taking the derivative, we have
$$
dm = \frac{2.5}{\ln(10)} \frac{df}{f} = \frac{2.5}{\ln(10)} \frac{1}{\mathrm{SNR}}.
$$
So if you want to make a cut on galaxies above a certain SNR, you can make a cut
$$
dm < \frac{2.5}{\ln(10)} \frac{1}{\mathrm{SNR}}.
$$
For example, an SNR cut on the i band would look like this: `QuantityCut({"i_err": 2.5/np.log(10) * 1/SNR})`.

In [None]:
from rail.creation.degraders.quantityCut import QuantityCut

In [None]:
mag_cut = QuantityCut.make_stage(name="cuts", cuts={"mag_i_lsst": 25.3})
samples_mag_w_errs = mag_cut(samples_w_errs)

In [None]:
samples_mag_w_errs()

## Degrader 3 : Inv redshift incompleteness

Next, we will demonstrate the `InvRedshiftIncompleteness` degrader. It applies a selection function, which keeps galaxies with probability $p_{\text{keep}}(z) = \min(1, \frac{z_p}{z})$, where $z_p$ is the ''pivot'' redshift. We'll use $z_p = 0.8$.

In [None]:
from rail.creation.degraders.spectroscopic_degraders import InvRedshiftIncompleteness

In [None]:
inv_incomplete = InvRedshiftIncompleteness.make_stage(
    name="incompleteness", pivot_redshift=0.8
)
samples_incomplete_mag_w_errs = inv_incomplete(samples_mag_w_errs)

In [None]:
samples_incomplete_mag_w_errs()

## Degrader 4: LineConfusion

`LineConfusion` is a degrader that simulates spectroscopic errors resulting from the confusion of different emission lines.

For this example, let's use the degrader to simulate a scenario in which which 2% of [OII] lines are mistaken as [OIII] lines, and 1% of [OIII] lines are mistaken as [OII] lines. (note I do not know how realistic this scenario is!)

In [None]:
from rail.creation.degraders.spectroscopic_degraders import LineConfusion

In [None]:
OII = 3727
OIII = 5007

lc_2p_0II_0III = LineConfusion.make_stage(
    name="lc_2p_0II_0III", true_wavelen=OII, wrong_wavelen=OIII, frac_wrong=0.02
)
lc_1p_0III_0II = LineConfusion.make_stage(
    name="lc_1p_0III_0II", true_wavelen=OIII, wrong_wavelen=OII, frac_wrong=0.01
)
samples_conf_inc_mag_w_errs = lc_1p_0III_0II(
    lc_2p_0II_0III(samples_incomplete_mag_w_errs)
)

In [None]:
samples_conf_inc_mag_w_errs()

In [None]:
fig, ax = plt.subplots(figsize=(5, 4), dpi=100)

zmin = 0
zmax = 3.1

hist_settings = {
    "bins": 50,
    "range": (zmin, zmax),
    "density": True,
    "histtype": "step",
}

ax.hist(data_truth()["redshift"], label="CosmoDC2 sample", **hist_settings)
ax.hist(samples_mag_w_errs()["redshift"], label="Mag. cut", **hist_settings)
ax.hist(
    samples_incomplete_mag_w_errs()["redshift"],
    label="Incomplete Mag. Cut",
    **hist_settings
)
ax.hist(
    samples_conf_inc_mag_w_errs()["redshift"],
    label="Confused Incomplete Mag. Cut",
    **hist_settings
)
ax.legend(title="Sample")
ax.set(xlim=(zmin, zmax), xlabel="Redshift", ylabel="Galaxy density")
plt.show()

## Try and run BPZ on the catalog

In [None]:
from rail.tools.table_tools import ColumnMapper, TableConverter
import tables_io

rename_dict_bpz = { _key: '_'.join([_key.split('_')[0], _key.split('_')[-1]]+_key.split('_')[1:-1]) for _key in samples_conf_inc_mag_w_errs.data.keys() if "err" in _key }
rename_dict_bpz

In [None]:
col_remapper = ColumnMapper.make_stage(
    name="col_remapper",
    columns=rename_dict_bpz,
)

table_conv = TableConverter.make_stage(
    name="table_conv",
    output_format="numpyDict",
)

data_colmap = col_remapper(samples_conf_inc_mag_w_errs)
data_bpz = table_conv(data_colmap)

In [None]:
data_bpz_df = tables_io.convertObj(data_bpz.data, tables_io.types.PD_DATAFRAME)
data_bpz_df

In [None]:
train_data_df = data_bpz_df.sample(5000)
train_data_df

In [None]:
bands = ["u", "g", "r", "i", "z", "y"]
lsst_bands = [f"mag_{band}_lsst" for band in bands]
lsst_errs = [f"mag_err_{band}_lsst" for band in bands]
lsst_filts = [f"DC2LSST_{band}" for band in bands]
print(lsst_bands)
print(lsst_filts)

In [None]:
from rail.core.data import TableHandle, ModelHandle
from rail.estimation.algos.bpz_lite import BPZliteInformer, BPZliteEstimator

RAILDIR = "/global/u2/j/jcheval/rail_base/src"

train_data = DS.add_data("train_data", train_data_df, TableHandle)

table_conv_train = TableConverter.make_stage(
    name="table_conv_train",
    output_format="numpyDict",
)

train_data_conv = table_conv_train(train_data)

In [None]:
from rail.estimation.algos.bpz_lite import BPZliteInformer, BPZliteEstimator

cosmospriorfile = os.path.join(RAILDIR, "rail/examples_data/estimation_data/data/COSMOS31_HDFN_prior.pkl")
cosmosprior = DS.read_file("cosmos_prior", ModelHandle, cosmospriorfile)
sedfile = "COSMOS_seds.list" #os.path.join(RAILDIR, "rail/examples_data/estimation_data/data/SED/COSMOS_seds.list")

cosmos_dict = dict(
    hdf5_groupname="photometry",
    output="bpz_results_DC2_COSMOS_SEDs.hdf5",
    spectra_file=sedfile,
    bands=lsst_bands,
    err_bands=lsst_errs,
    filter_list=lsst_filts,
    prior_band="mag_i_lsst",
    no_prior=True
)

inform_bpz = BPZliteInformer.make_stage(
    name="inform_bpz",
    nondetect_val=np.nan,
    spectra_file=sedfile,
    model="cosmos_bpz.pkl",
    hdf5_groupname="",
    data_path="/global/u2/j/jcheval/rail_base/src/rail/examples_data/estimation_data/data"
)

inform_bpz.inform(train_data_conv)

estimate_bpz = BPZliteEstimator.make_stage(
    name="estimate_bpz_cosmoDC2",
    model=cosmosprior,
    data_path="/global/u2/j/jcheval/rail_base/src/rail/examples_data/estimation_data/data", **cosmos_dict
)

In [None]:
bpz_estimated = estimate_bpz.estimate(data_bpz)

In [None]:
z_phot = bpz_estimated.data.mode(grid=np.linspace(0.01, 3.+4.*0.15, 301, endpoint=True))
z_phot.shape

In [None]:
z_true = data_bpz()['redshift']

In [None]:
import matplotlib.pyplot as plt
f, a = plt.subplots(1,1, figsize=(6,6))
zs = np.linspace(0.01, 3., 100)
a.scatter(z_true, z_phot, alpha=0.1, s=2, label='BPZ, no prior, LSST filters\nPolletta et al 2007 and BC03 SED template set')
a.plot(zs, zs, 'k:')
a.plot(zs, zs+(1+zs)*0.15, 'k-')
a.plot(zs, zs-(1+zs)*0.15, 'k-')
a.set_xlabel('z_spec')
a.set_ylabel('z_phot')
a.set_xlim(-0.01, 3.1)
a.set_ylim(-0.01, 3.1)
a.set_aspect('equal', 'box')
a.grid()
a.legend()

## Save as appropriate input for process_fors2.photoZ

In [None]:
#rerename_dict = {"redshift": "z_spec"}
#rerename_dict.update(
#    {_key: '_'.join([_key.split('_')[0], _key.split('_')[-1], _key.split('_')[-2]]) for _key in samples_conf_inc_mag_w_errs.data.keys() if "lsst" in _key and not "err" in _key}
#)
rerename_dict = {_key: '_'.join([_key.split('_')[0], _key.split('_')[-1], _key.split('_')[-3], _key.split('_')[-2]]) for _key in samples_conf_inc_mag_w_errs.data.keys() if "lsst" in _key and "err" in _key}
rerename_dict

In [None]:
from rail.tools.table_tools import ColumnMapper

col_remapper_proF2 = ColumnMapper.make_stage(
    name="col_remapper_proF2",
    columns=rerename_dict,
)

cat_for_processf2 = col_remapper_proF2(samples_conf_inc_mag_w_errs)
cat_for_processf2()

In [None]:
lsst_err_df = tables_io.convertObj(cat_for_processf2.data, tables_io.types.PD_DATAFRAME)
lsst_err_df

In [None]:
if True: lsst_err_df.to_hdf('magszgalaxies_cosmoDC2_gold_hp9816.h5', key='photometry')