In [1]:
# usual imports
import os
import numpy as np
import matplotlib.pyplot as plt
from rail.core.utils import RAILDIR
#from rail.estimation.algos.sompz_version.utils import RAIL_SOMPZ_DIR
from rail.pipelines.estimation.estimate_all import EstimatePipeline
from rail.core import common_params
from rail.pipelines.utils.name_factory import NameFactory, DataType, CatalogType, ModelType, PdfType
import qp
import ceci


Imported rail.hub
Imported rail.astro_tools
Imported rail.core
Imported rail.stages
Imported rail.bpz
Imported rail.cmnn
Imported rail.delight
Failed to import rail.dsps because: No module named 'rail.creation.engines.galaxy_population_components'
Imported rail.flexzboost
Failed to import rail.gpz because: No module named 'rail.estimation.algos.gpz_v1'
Imported rail.pipelines
Failed to import rail.pzflow because: No module named 'rail.estimation.algos.pzflow'
Imported rail.sklearn
Imported rail.som
Attached 10 base classes and 50 fully formed stages to rail.stages


In [2]:
# change to your rail location
RAIL_SOMPZ_DIR = "/Users/jtm/repositories/LSSTDESC/rail_sompz/src"

### Define the bands, magnitude limits, and put them into a dictionary

In [3]:
bands = 'grizy'
maglims = [27.66, 27.25, 26.6, 26.24, 25.35]
maglim_dict={}
for band,limx in zip(bands, maglims):
    maglim_dict[f"HSC{band}_cmodel_dered"] = limx

### Setting common parameters to all the stages in the estimation



In [4]:
common_params.set_param_defaults(
    bands=[f'HSC{band}_cmodel_dered' for band in bands], # specify HSC bands
    err_bands=[f'{band}_cmodel_magerr' for band in bands], # specify HSC uncertainty bands
    nondetect_val=np.nan,
    ref_band='HSCi_cmodel_dered',
    redshift_col='specz_redshift',
    mag_limits=maglim_dict,
    zmax=6.0, # set zmax to 6 for HSC
)

### Set up the estimate pipeline

In [5]:
pipe = EstimatePipeline()

In [6]:
namer = NameFactory()

### Additional config update to specify the place to find the spectroscopic columns for the summarizer

In [7]:
pipe.estimate_sompz.config.update(spec_groupname = "", redshift_colname = 'specz_redshift')

AttributeError: Pipeline does not have stage estimate_sompz

### Specify input model for each stages, and the spec_input file (same as the input_file)

In [None]:
input_file = 'dered_223501_sz_match_pdr3_dud_NONDET.hdf5'

input_dict = dict(
    model_sompz=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_sompz.pkl"),
    input=input_file,
    spec_input = input_file,
)

In [None]:
pipe_info = pipe.initialize(input_dict, dict(output_dir='.', log_dir='.', resume=True), None)

### Save the pipeline to a yaml file

In [None]:
pipe.save('tmp_estimate_sompz.yml')

[For NERSC users!]

This won't work on jupyter.nersc.gov, for a test run, you need to 
1. Add "name: local" to the "site" section in the 'tmp_estimate_all.yml'
2. ssh into perlmutter.nersc.gov, set up the rail environment, and run "ceci tmp_estimate_all.yml"

In [None]:
import ceci
pr = ceci.Pipeline.read('tmp_estimate_all.yml')

In [None]:
pr.run()

# display $p(z)$ of the point estimators

### starting with simple NN

In [None]:
infile = './pdf/pz/output_simplenn.hdf5'

qp_ensemble_simplenn = qp.read(infile)


In [None]:
plt.figure()
plt.hist(qp_ensemble_simplenn.ancil['zmode'], bins=40)
plt.xlabel('z')
plt.show()

### Now read the spectroscopic data

In [2]:
import tables_io

In [3]:
sz_data = tables_io.read("dered_223501_sz_match_pdr3_dud_NONDET.hdf5")

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'dered_223501_sz_match_pdr3_dud_NONDET.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
specz = sz_data['specz_redshift']

Compare the mode of the photo-z PDF with the spectroscopic redshift

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(specz, qp_ensemble_simplenn.ancil['zmode'],s = 0.01)
plt.xlabel('Spec z')
plt.ylabel('SOMPZ Mode')
plt.xlim(0,3)
plt.ylim(0,3)
plt.plot([-1,10], [-1,10], '--', color = 'black', )

## display $n(z)$

In [None]:
# directly reading the hdf5 files with qp

qp_single_nz_sompz = qp.read('./single_NZ_estimate_simplesom.hdf5')

In [None]:
z_grid = np.linspace(0,6,600)
nz_sompz_grid = qp_single_nz_som.pdf(z_grid)

In [None]:
# Part of the spactroscopic samples failed and have z=-99, we should exclude them. 
specz_good = specz[specz>0.0]

### Make a plot to compare the two summarizers and the true n(z)

In [None]:
plt.figure()
plt.plot(z_grid, nz_som_grid[0], label = 'SOMPZ')
plt.hist(specz_good,density = True ,bins = 600,histtype = 'step', label = 'True (Spec-z)')
plt.xlim(-0.1,3)
plt.ylim(0,1.2)
plt.xlabel('z')
plt.ylabel('n(z)')
plt.legend()