---
title: State data pipeline
---

Here we define state data to be the complementary data of the magnetic field data, useful for the analysis of the magnetic field data. It may include:

- Plasma data
- Satellite location data

In [None]:
#| hide
import polars as pl

from kedro.pipeline import Pipeline, node
from kedro.pipeline.modular_pipeline import pipeline

from typing import Any, Dict

## Pipeline

In [None]:
def create_state_data_pipeline(
    sat_id,
    ts: str = '1h',  # time resolution
) -> Pipeline:
    
    node_load_data = node(
        load_state_data,
        inputs=dict(
            start="params:start_date",
            end="params:end_date",
        ),
        outputs="raw_state",
        name=f"download_{sat_id.upper()}_state_data",
    )
    
    node_preprocess_data = node(
        preprocess_state_data,
        inputs=dict(
            raw_data="raw_state",
            start="params:start_date",
            end="params:end_date",
        ),
        outputs=f"inter_state_{ts}",
        name=f"preprocess_{sat_id.upper()}_state_data",
    )
    
    node_process_data = node(
        process_state_data,
        inputs=f"inter_state_{ts}",
        outputs=f"primary_state_{ts}",
        name=f"process_{sat_id.upper()}_state_data",
    )

    nodes = [
        node_load_data,
        node_preprocess_data,
        node_process_data,
    ]

    pipelines = pipeline(
        nodes,
        namespace=sat_id,
        parameters={
            "params:start_date": "params:jno_start_date",
            "params:end_date": "params:jno_end_date",
        },
    )

    return pipelines