# Glider Toolbox Demo Notebook

### Using a Pre-existing config file for a complete pipeline run

Note: Currently these imports are local references. In the future the toolbox will be an installable package through pip.

In [1]:
# add toolbox to src (THIS WILL NOT BE REQUIRED ONCE DEPLOYED)
import sys
from pathlib import Path
src_path = Path("../../src").resolve()

if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

### Pipeline Overview

In [2]:
# from toolbox.pipeline import Pipeline
# pipeline = Pipeline(   
#     "../configs/example_config_nelson.yaml")
# pipeline.run()

#### Building the pipeline one step at a time

In [3]:
# pipeline2 = Pipeline()

# class myCustomStep:
#     def __init__(self, param1, param2):
#         self.param1 = param1
#         self.param2 = param2

#     def run(self, data):
#         # Custom processing logic here
#         print(f"Running custom step with {self.param1} and {self.param2}")
#         return data  # Return processed data

# # add load step
# pipeline2.add_step(
#     "Load OG1",
#     parameters={
#         "file_path": "../../examples/data/OG1/Churchill_647_R.nc",  # Path to the input NetCDF file
#         "add_meta": False, # Add metadata to the data
#         "add_depth": True, # Add depth information to the data
#         "add_elapsed_time": False,
#         "lat_label": "DEPLOYMENT_LATITUDE"},
#     diagnostics=False,
#     run_immediately=True )# can run immediately upon creation

# # add export step
# pipeline2.add_step(
#     "Data Export",
#     parameters={
#         "export_format": "netcdf",  # Define the export format (e.g., OG1, CSV, etc.)
#         "output_path": "../../examples/data/OG1/exported_Churchill_647_R.nc"},
#     diagnostics=False,
#     run_immediately=False )# or run at a future time
# # run last step
# pipeline2.run_last_step()

In [4]:
# conf = pipeline2.export_config("../../examples/data/OG1/Churchill_647_R_CONFIG.yaml")  # Export the pipeline configuration to a YAML file
# conf

# Running Multiple Pipelines

```python

In [2]:
### Running multiple pipelines using the PipelineManager class
from toolbox.pipeline_manager import PipelineManager
mngr = PipelineManager()
mngr.load_mission_control("../configs/MissionControl.yaml")  # Load the mission control configuration
# This is currently set up to import data, run profiles, and export data for Doombar and Churchill sources.
mngr.run_all()  # Run all pipelines defined in the mission control configuration

[Discovery] Scanning for step modules in C:\Users\ddab1n24\OneDrive - University of Southampton\Documents\PhD\RISC\toolbox\src\toolbox\steps\custom
[Discovery] Importing step module: toolbox.steps.custom.apply_qc
[Discovery] Importing step module: toolbox.steps.custom.blank_step
[Discovery] Importing step module: toolbox.steps.custom.calibration
[Discovery] Importing step module: toolbox.steps.custom.derive_ctd
[Discovery] Importing step module: toolbox.steps.custom.export
[Discovery] Importing step module: toolbox.steps.custom.find_profiles
[Discovery] Importing step module: toolbox.steps.custom.gen_data
[Discovery] Importing step module: toolbox.steps.custom.interpolate_data
[Discovery] Importing step module: toolbox.steps.custom.load_data
[Discovery] Importing step module: toolbox.steps.custom.profile_direction
[Discovery] Importing step module: toolbox.steps.custom.qc.archive
[Discovery] Importing step module: toolbox.steps.custom.qc.blank_test
[Discovery] Importing step module: to

In [None]:
mngr.summarise_all_profiles()

In [None]:
mngr.preview_alignment("Cabot")  # Align data to the target source (e.g., Doombar)

In [None]:
mngr.fit_and_save_to_target("Cabot")  # Fit data to the target source (e.g., Doombar)

In [None]:
mngr.validate_with_device(["Churchill", "Cabot"])

In [3]:
mngr.fit_to_device()

[Fit→Device] Using device='CTD DY180', variables=['CNDC', 'TEMP']
[Fit→Device] R² thresholds: {'CNDC': 0.95, 'TEMP': 0.9}
[Pipeline Manager] Interpolating missing DEPTH values by PROFILE_NUMBER...
[Device] Loaded 15 files → 7939 rows, 15 profiles.
Variables: n_unity, nrows1, ncols1, pad_variable, altimeter, asal1, asal2, cond1, cond2, fluor, latitude, longitude, oxygen1, oxygen2, par, potemp1, potemp2, press, psal1, psal2, scan, temp1, temp2, time, transmittance, turbidity, TEMP, CNDC, psal, potemp, asal, oxygen, depth, TIME, LATITUDE, LONGITUDE, DEPTH
[Validation] Matched 46 pairs with CTD DY180.
[Pipeline Manager] Interpolating missing DEPTH values by PROFILE_NUMBER...
[Filter] Aggregated case: 4986 → 44 profiles retained.
[Filter] Aggregated case: 15 → 2 profiles retained.
[Validation] Dropped 2 target profiles with no aggregated data.
[Validation] Merged data has 44 pairs.
[Fit→Device] CNDC: device ≈ 10.02·target + -0.02112 (R²=0.992, N=5700)
[Fit→Device] TEMP: device ≈ 0.9963·targ

In [3]:
mngr.apply_adjustment(
    "Cabot", 
    {
        "TEMP": {
            "slope": 0.9963,
            "intercept": 0.08155,
            "n": 5700
        },
        "CNDC": {
            "slope": 10.02,
            "intercept": -0.02112,
            "n": 5700
        }
    }
)

{'fits': {'TEMP': {'slope': 1.007, 'intercept': -0.01955, 'n': 2364},
  'CNDC': {'slope': 10.11, 'intercept': -0.3846, 'n': 2364}},
 'device_name': 'CTD DY180'}

In [6]:
mngr.save(r"C:\Users\ddab1n24\OneDrive - University of Southampton\Documents\PhD\RISC\toolbox\examples\notebooks")

Saving raw outputs.
Saving processed outputs.


In [9]:
import xarray as xr
test = xr.load_dataset(r"C:\Users\ddab1n24\OneDrive - University of Southampton\Documents\PhD\RISC\toolbox\examples\notebooks\Churchill_Nelson_Doombar_Cabot_processed.nc")
test