# NZDir as a Pipeline

**Author:** Sam Schmidt

**Last successfully run:** April 26, 2023

In [None]:
import os
import rail
import qp
import tables_io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from rail.estimation.algos.nz_dir import NZDirInformer, NZDirSummarizer
from rail.core.data import TableHandle, QPHandle
from rail.core.stage import RailStage

For interactive pipeline creation to work, your data must be input as DataHandles, so that it will pass along the appropriate tags to the stages. 

In [None]:
# Load up the example healpix 9816 data and read in as DataHandles
from rail.utils.path_utils import find_rail_file
trainFile = find_rail_file('examples_data/testdata/test_dc2_training_9816.hdf5')
testFile = find_rail_file('examples_data/testdata/test_dc2_validation_9816.hdf5')
training_data = TableHandle("training_data", path=trainFile)
test_data = TableHandle("test_data", path=testFile)

In [None]:
inf_nz = NZDirInformer.make_stage(n_neigh=8, hdf5_groupname="photometry", model="nzdir_model.pkl")
inf_nz.inform(training_data)
nzd = NZDirSummarizer.make_stage(leafsize=20, zmin=0.0, zmax=3.0, nzbins=31, model="NZDir_model.pkl", hdf5_groupname='photometry',
                       output='NZDir_samples.hdf5', single_NZ='NZDir_NZ.hdf5')

In [None]:
# I don't know why it is necessary to execute this estimate once before saving everything to a pipeline
nzd_res = nzd.estimate(test_data)

Creating a pipeline with 2 process

In [None]:
import ceci
pipe = ceci.Pipeline.interactive()
stages = [inf_nz,nzd]
for stage in stages:
    pipe.add_stage(stage)
pipe.stage_execution_config['NZDirSummarizer'].nprocess=2

In [None]:
pipe.initialize(dict(training_data=trainFile, test_data=testFile) , dict(output_dir='.', log_dir='.', resume=False), None)

In [None]:
pipe.save('nzdir.yml')

Once the pipeline is saved, we execute it

In [None]:
pr = ceci.Pipeline.read('nzdir.yml')

In [None]:
pr.run()

Reading the output

In [None]:
nzd_ens = qp.read('NZDir_samples.hdf5')
nzdir_nz = qp.read("NZDir_NZ.hdf5")

In [None]:
fig, axs = plt.subplots(figsize=(10,8))
nzdir_nz[0].plot_native(axes=axs, fc = [0, 0, 1, 0.01])
nzd_ens[1].plot_native(axes=axs, fc = [0, 1, 0, 0.01])
nzd_ens[4].plot_native(axes=axs, fc = [1, 0, 0, 0.01])
axs.set_xlim(0,3)
axs.legend()