In [1]:
import tables_io

from rail.evaluation.dist_to_dist_evaluator import DistToDistEvaluator
from rail.evaluation.dist_to_point_evaluator import DistToPointEvaluator
from rail.evaluation.point_to_point_evaluator import PointToPointEvaluator
from rail.core.stage import RailStage
from rail.core.data import QPHandle, TableHandle

DS = RailStage.data_store
DS.__class__.allow_overwrite = True

# Load example Data

In [2]:
import os
from rail.core.utils import find_rail_file
possible_local_file = './examples_data/evaluation_data/data/output_fzboost.hdf5'
if os.path.exists(possible_local_file):
    pdfs_file = os.path.abspath(possible_local_file)
else:
    pdfs_file = 'examples_data/evaluation_data/data/output_fzboost.hdf5'
    try:
        os.makedirs(os.path.dirname(pdfs_file))
    except FileExistsError:
        pass
    curl_com = f"curl -o {pdfs_file} https://portal.nersc.gov/cfs/lsst/PZ/output_fzboost.hdf5"
    os.system(curl_com)

ztrue_file = find_rail_file('examples_data/testdata/test_dc2_validation_9816.hdf5')

In [3]:
ensemble = DS.read_file(key='pdfs_data', handle_class=QPHandle, path=pdfs_file)
ztrue_data = DS.read_file('ztrue_data', TableHandle, ztrue_file)
#truth = DS.add_data('truth', ztrue_data()['photometry'], TableHandle, path=ztrue_file)
#truth_points = DS.add_data('truth_points', ztrue_data()['photometry']['redshift'], TableHandle, path=ztrue_file)

# Dist to Dist Evaluation

In [4]:
# 'cvm' takes about 3.5 minutes to run
# 'ad' takes about ~4 minutes to run
# 'ks' takes about 2.75 minutes to run
# 'kld' takes about X minutes to run

stage_dict = dict(
    metrics=['cvm', 'ks', 'omega', 'kld'],
    _random_state=None,
)

dtd_stage = DistToDistEvaluator.make_stage(name='dist_to_dist', **stage_dict)

In [5]:
dtd_results = dtd_stage.evaluate(ensemble, ensemble)

Requested metrics: ['cvm', 'ks', 'omega', 'kld']
Processing 0 running evaluator on chunk 0 - 1000.
Inserting handle into data store.  output_dist_to_dist: inprogress_output_dist_to_dist.hdf5, dist_to_dist
Processing 0 running evaluator on chunk 1000 - 2000.
Processing 0 running evaluator on chunk 2000 - 3000.
Processing 0 running evaluator on chunk 3000 - 4000.
Processing 0 running evaluator on chunk 4000 - 5000.
Processing 0 running evaluator on chunk 5000 - 6000.
Processing 0 running evaluator on chunk 6000 - 7000.
Processing 0 running evaluator on chunk 7000 - 8000.
Processing 0 running evaluator on chunk 8000 - 9000.
Processing 0 running evaluator on chunk 9000 - 10000.
Processing 0 running evaluator on chunk 10000 - 11000.
Processing 0 running evaluator on chunk 11000 - 12000.
Processing 0 running evaluator on chunk 12000 - 13000.
Processing 0 running evaluator on chunk 13000 - 14000.
Processing 0 running evaluator on chunk 14000 - 15000.
Processing 0 running evaluator on chunk 15

In [6]:
results_df = tables_io.convertObj(dtd_results['output'](), tables_io.types.PD_DATAFRAME)
results_df

Unnamed: 0,cvm,kld,ks
0,0.180647,0.0,0.100664
1,0.224506,0.0,0.086585
2,0.067295,0.0,0.060202
3,0.087015,0.0,0.097484
4,0.067235,0.0,0.084706
...,...,...,...
20444,0.164662,0.0,0.097897
20445,0.041221,0.0,0.070152
20446,0.462456,0.0,0.068942
20447,0.055140,0.0,0.066168


# Dist to Point Evaluation

In [7]:
stage_dict = dict(
    metrics=['cdeloss'],
    _random_state=None,
)
dtp_stage = DistToPointEvaluator.make_stage(name='dist_to_point', **stage_dict)

In [8]:
dtp_results = dtp_stage.evaluate(ensemble, ztrue_data)

Requested metrics: ['cdeloss']
Processing 0 running evaluator on chunk 0 - 1000.
cdeloss with output type MetricOutputType.single_value does not support parallel processing yet
Inserting handle into data store.  output_dist_to_point: inprogress_output_dist_to_point.hdf5, dist_to_point
Processing 0 running evaluator on chunk 1000 - 2000.
cdeloss with output type MetricOutputType.single_value does not support parallel processing yet
Processing 0 running evaluator on chunk 2000 - 3000.
cdeloss with output type MetricOutputType.single_value does not support parallel processing yet
Processing 0 running evaluator on chunk 3000 - 4000.
cdeloss with output type MetricOutputType.single_value does not support parallel processing yet
Processing 0 running evaluator on chunk 4000 - 5000.
cdeloss with output type MetricOutputType.single_value does not support parallel processing yet
Processing 0 running evaluator on chunk 5000 - 6000.
cdeloss with output type MetricOutputType.single_value does not s

In [9]:
results_df = tables_io.convertObj(dtp_results['output'](), tables_io.types.PD_DATAFRAME)
results_df

# Point to Point Evaluation

In [10]:
stage_dict = dict(
    metrics=['point_stats_ez', 'point_stats_iqr'],
    _random_state=None,
    hdf5_groupname='photometry',
    point_estimate_key='zmode',
    chunk_size=10000,
)
ptp_stage = PointToPointEvaluator.make_stage(name='point_to_point', **stage_dict)

In [11]:
ptp_results = ptp_stage.evaluate(ensemble, ztrue_data)

Requested metrics: ['point_stats_ez', 'point_stats_iqr']
Processing 0 running evaluator on chunk 0 - 10000.
Inserting handle into data store.  output_point_to_point: inprogress_output_point_to_point.hdf5, point_to_point
Processing 0 running evaluator on chunk 10000 - 20000.
Processing 0 running evaluator on chunk 20000 - 20449.
Inserting handle into data store.  summary_point_to_point: inprogress_summary_point_to_point.hdf5, point_to_point


In [12]:
results_df = tables_io.convertObj(ptp_results['output'](), tables_io.types.PD_DATAFRAME)

In [13]:
results_df

Unnamed: 0,point_stats_ez
0,-0.022527
1,-0.021408
2,-0.042323
3,0.061233
4,0.039656
...,...
20444,-0.713773
20445,-0.145784
20446,-0.005226
20447,-0.055523


In [14]:
results_summary = tables_io.convertObj(ptp_results['summary'](), tables_io.types.PD_DATAFRAME)

In [15]:
results_summary

Unnamed: 0,point_stats_iqr
0,0.020919


In [16]:
import numpy as np
truth = ztrue_data()['photometry']['redshift']
estimates = np.squeeze(ensemble().ancil['zmode'])
#truth_points = DS.add_data('truth_points', ztrue_data()['photometry']['redshift'], TableHandle, path=ztrue_file)

In [17]:
import qp

In [18]:
check_iqr = qp.metrics.point_estimate_metric_classes.PointSigmaIQR().evaluate(estimates, truth)

In [19]:
check_iqr

0.02084700447796729

In [20]:
vv = (estimates- truth)/(1.+truth)