In [None]:
import tables_io

from rail.evaluation.dist_to_dist_evaluator import DistToDistEvaluator
from rail.evaluation.dist_to_point_evaluator import DistToPointEvaluator
from rail.evaluation.point_to_point_evaluator import PointToPointEvaluator
from rail.core.stage import RailStage
from rail.core.data import QPHandle, TableHandle

DS = RailStage.data_store
DS.__class__.allow_overwrite = True

# Load example Data

In [None]:
import os
from rail.core.utils import find_rail_file
possible_local_file = './examples_data/evaluation_data/data/output_fzboost.hdf5'
if os.path.exists(possible_local_file):
    pdfs_file = os.path.abspath(possible_local_file)
else:
    pdfs_file = 'examples_data/evaluation_data/data/output_fzboost.hdf5'
    try:
        os.makedirs(os.path.dirname(pdfs_file))
    except FileExistsError:
        pass
    curl_com = f"curl -o {pdfs_file} https://portal.nersc.gov/cfs/lsst/PZ/output_fzboost.hdf5"
    os.system(curl_com)

ztrue_file = find_rail_file('examples_data/testdata/test_dc2_validation_9816.hdf5')

In [None]:
ensemble = DS.read_file(key='pdfs_data', handle_class=QPHandle, path=pdfs_file)
ztrue_data = DS.read_file('ztrue_data', TableHandle, ztrue_file)
#truth = DS.add_data('truth', ztrue_data()['photometry'], TableHandle, path=ztrue_file)
#truth_points = DS.add_data('truth_points', ztrue_data()['photometry']['redshift'], TableHandle, path=ztrue_file)

# Dist to Dist Evaluation

In [None]:
# 'cvm' takes about 3.5 minutes to run
# 'ad' takes about ~4 minutes to run
# 'ks' takes about 2.75 minutes to run
# 'kld' takes about X minutes to run

stage_dict = dict(
    metrics=['cvm', 'ks', 'omega', 'kld'],
    _random_state=None,
)

dtd_stage = DistToDistEvaluator.make_stage(name='dist_to_dist', **stage_dict)

In [None]:
dtd_results = dtd_stage.evaluate(ensemble, ensemble)

In [None]:
results_df = tables_io.convertObj(dtd_results['output'](), tables_io.types.PD_DATAFRAME)
results_df

# Dist to Point Evaluation

In [None]:
stage_dict = dict(
    metrics=['cdeloss'],
    _random_state=None,
)
dtp_stage = DistToPointEvaluator.make_stage(name='dist_to_point', **stage_dict)

In [None]:
dtp_results = dtp_stage.evaluate(ensemble, ztrue_data)

In [None]:
results_df = tables_io.convertObj(dtp_results['output'](), tables_io.types.PD_DATAFRAME)
results_df

# Point to Point Evaluation

In [None]:
stage_dict = dict(
    metrics=['point_stats_ez', 'point_stats_iqr'],
    _random_state=None,
    hdf5_groupname='photometry',
    point_estimate_key='zmode',
    chunk_size=10000,
)
ptp_stage = PointToPointEvaluator.make_stage(name='point_to_point', **stage_dict)

In [None]:
ptp_results = ptp_stage.evaluate(ensemble, ztrue_data)

In [None]:
results_df = tables_io.convertObj(ptp_results['output'](), tables_io.types.PD_DATAFRAME)

In [None]:
results_df

In [None]:
results_summary = tables_io.convertObj(ptp_results['summary'](), tables_io.types.PD_DATAFRAME)

In [None]:
results_summary

In [None]:
import numpy as np
truth = ztrue_data()['photometry']['redshift']
estimates = np.squeeze(ensemble().ancil['zmode'])
#truth_points = DS.add_data('truth_points', ztrue_data()['photometry']['redshift'], TableHandle, path=ztrue_file)

In [None]:
import qp

In [None]:
check_iqr = qp.metrics.point_estimate_metric_classes.PointSigmaIQR().evaluate(estimates, truth)

In [None]:
check_iqr

In [None]:
vv = (estimates- truth)/(1.+truth)

In [None]:
inputs = {
    'pdfs_data':'examples_data/evaluation_data/data/output_fzboost.hdf5',
    'ztrue_data':'examples_data/test_dc2_validation_9816.hdf5',
}
outputs = {
    'output':'output.hdf5',
    'summary':'summary.hdf5',
}

In [None]:
from rail.core import RailPipeline

In [None]:
pipe = RailPipeline()

In [None]:
pipe.add_stage(ptp_stage)

In [None]:
pipe.initialize(overall_inputs=inputs, run_config={'output_dir':'.', 'log_dir':'.', 'resume':False}, stages_config=None)

In [None]:
pipe.save('eval_pipe.yaml')