### Imports

In [None]:
# step 1
from yaw import UniformRandoms
from rail.yaw_rail.utils import get_dc2_test_data

from rail.yaw_rail import (
    YawCacheCreate,     # step 2
    YawAutoCorrelate,   # step 3
    YawCrossCorrelate,  # step 4
    YawSummarize,       # step 5
    YawCacheDrop,       # step 6
)  # equivalent: from rail.yaw_rail import *
from rail.yaw_rail.cache import stage_helper  # utility for YawCacheCreate

from rail.core.data import TableHandle
# configure RAIL datastore to allow overwriting data
from rail.core.stage import RailStage
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

In [None]:
VERBOSE = "debug"  # verbosity level of built-in logger, disable with "error"

In [None]:
mock_data = get_dc2_test_data()  # downloads test data, cached for future calls
redshifts = mock_data["z"].to_numpy()
zmin = redshifts.min()
zmax = redshifts.max()
n_data = len(mock_data)
f"N={n_data}, {zmin:.1f}<z<{zmax:.1f}"

In [None]:
angular_rng = UniformRandoms(
    mock_data["ra"].min(),
    mock_data["ra"].max(),
    mock_data["dec"].min(),
    mock_data["dec"].max(),
    seed=12345,
)
mock_rand = angular_rng.generate(n_data * 10, draw_from=dict(z=redshifts))

In [None]:
corr_config = dict(
    rmin=100,   # in kpc
    rmax=1000,  # in kpc
    # rweight=None,
    # rbin_num=50,
    zmin=zmin,
    zmax=zmax,
    zbin_num=8,  # default: 30
    # method="linear",
    # zbins=np.linspace(zmin, zmax, zbin_num+1)
    # crosspatch=True,
    # thread_num=None,
    verbose=VERBOSE,  # default: "info"
)

## Build the stage we are going to use, making sure to give them unique names

In [None]:
cache_create_ref_stage = YawCacheCreate.make_stage(
    name="ref",
    aliases=stage_helper("ref"),
    path="./test_ref",
    overwrite=True,  # default: False
    ra_name="ra",
    dec_name="dec",
    redshift_name="z",
    # weight_name=,
    # patches=,
    # patch_name=,
    n_patches=5,
    verbose=VERBOSE,  # default: "info"
)

In [None]:
cache_create_unk_stage  = YawCacheCreate.make_stage(
    name="unk",
    aliases=stage_helper("unk"),
    path="./test_unk",
    overwrite=True,  # default: False
    ra_name="ra",
    dec_name="dec",
    # redshift_name=,
    # weight_name=,
    patches="./test_ref",
    # patch_name=,
    # n_patches=,
    verbose=VERBOSE,  # default: "info"
)

In [None]:
correlate_ss_stage  = YawAutoCorrelate.make_stage(name='corr_ss', **corr_config)

In [None]:
correlate_sp_stage  = YawCrossCorrelate.make_stage(name='corr_sp', **corr_config)

In [None]:
estimate_stage = YawSummarize.make_stage(
    name='estimate',
    # cross_est=,
    # ref_est=,
    # unk_est=,
    # crosspatch=True,
    verbose=VERBOSE,  # default: "info"
)

In [None]:
drop_ref_stage = YawCacheDrop.make_stage(name='drop_ref')

In [None]:
drop_unk_stage  = YawCacheDrop.make_stage(name='drop_unk')

### Get handles for the input data, so that we can track connections

In [None]:
mock_data_handle = DS.add_data('input_data', mock_data, TableHandle)
mock_random_handle = DS.add_data('input_random', mock_rand, TableHandle)

### Run the pipeline in NB, using handles to establish connections

In [None]:
cache_ref = cache_create_ref_stage.create(
    data=mock_data_handle,
    rand=mock_random_handle,
)

In [None]:
cache_unk = cache_create_unk_stage.create(
    data=mock_data_handle,
)

In [None]:
w_ss = correlate_ss_stage.correlate(
    sample=cache_ref,
)

In [None]:
w_sp = correlate_sp_stage.correlate(
    reference=cache_ref,
    unknown=cache_unk,
)

In [None]:
estimate = estimate_stage.summarize(
    cross_corr=w_sp,
    ref_corr=w_ss,
    # default: None
    # unk_corr=None,
)

In [None]:
drop_ref_stage.drop(cache_ref)

In [None]:
drop_unk_stage.drop(cache_unk)

### Make a ceci pipeline and add all the stages

In [None]:
import ceci
pipe = ceci.Pipeline.interactive()
stages = [
    cache_create_ref_stage,
    cache_create_unk_stage,
    correlate_ss_stage, 
    correlate_sp_stage,
    estimate_stage,
    drop_ref_stage,
    drop_unk_stage,
]
   
for stage_ in stages:
    pipe.add_stage(stage_)

### Initialize the pipeline, providing dummy / null values for unused inputs

In [None]:
pipe.initialize(
    dict(
        input_data="dummy.in",
        input_random="dummy.in",
        rand_unk="/dev/null",
        unk_corr="/dev/null",
    ), 
    dict(
        output_dir=".",
        log_dir=".",
        resume=False
    ), 
    None,
)

In [None]:
pipe.save('pipeline.yml')