---
title: Mission Pipeline
subtitle: Mission impossible
description: Mission pipeline process data from a specific mission.
---

Generally, it includes the following steps:

- combine features from different sources/instruments (magnetic field, state data, etc.)
- generate new features

Additional components:

- `create_events_pipeline` | `create_candidate_pipeline` 
    - `create_sw_events_pipeline` : (Optional) pipeline for solar wind candidates (used in mission `JUNO` and `ARTEMIS`) to exclude events that may be influenced by planetary magnetosphere.



In [1]:
#| default_exp pipelines/default/mission

In [2]:
#| export
from ids_finder import PARAMS
import polars as pl
import polars.selectors as cs

from typing import Optional

## Combine features

In [3]:
# | export
def combine_features(candidates: pl.LazyFrame, states_data: pl.LazyFrame):

    return candidates.with_columns( 
        cs.datetime().dt.cast_time_unit("ns"), # issue: https://github.com/pola-rs/polars/issues/12023
    ).sort("time").join_asof(states_data.sort("time"), on="time")

## Combining magnetic field data and state data

With combined dataset, we calculate additional features for each candidate.

Length

the length along the n direction of LMN coordinate system.

$$L_{n} = v_{n}  T_{duration}$$

However this may not be accurate due to the MVA method.

$$L_{mn} = v_{mn}  T_{duration}$$

If we have the normal vector of the current sheet, we can calculate the length along the normal direction.

$$L_{normal} = L_{k} = v_{normal}  T_{duration}$$

Additionally, we can calculate the length projected into RTN coordinate system.

$$L_{R} = L_{k} \cos \theta$$

$$ j_0 = (\frac{d B}{d t})_{max} \frac{1}{v_{mn}}$$

### Calculating additional features for the combined dataset

In [4]:
#| export
import astropy.units as u
from astropy.constants import mu0, e
from plasmapy.formulary.lengths import inertial_length
from plasmapy.formulary.speeds import Alfven_speed
from xarray_einstats import linalg
from ids_finder.utils.basic import df2ts, pl_norm
import xarray as xr

In [5]:
#| export
def vector_project(v1,v2, dim="v_dim"):
    return xr.dot(v1 , v2, dims=dim) / linalg.norm(v2, dims=dim)

def vector_project_pl(df: pl.DataFrame, v1_cols, v2_cols, name=None):
    
    v1 = df2ts(df, v1_cols).assign_coords(v_dim=["r","t","n"])
    v2 = df2ts(df, v2_cols).assign_coords(v_dim=["r","t","n"]) 
    result = vector_project(v1, v2, dim="v_dim")
    
    return df.with_columns(
        pl.Series(result.data).alias(name or "v_proj")
    )

### Inertial length

In [6]:
# | export
def compute_inertial_length(ldf: pl.LazyFrame, density_col = "plasma_density"):
    df = ldf.collect()

    density = df[density_col].to_numpy() * u.cm ** (-3)
    result = inertial_length(density, "H+").to(u.km)

    return df.with_columns(ion_inertial_length=pl.Series(result.value)).lazy()

### Alfven current

In [7]:
#| export
def compute_Alfven_speed(ldf: pl.LazyFrame):
    df = ldf.collect()

    B = df["B"] if "B" in df.columns else df["b_mag"]  # backwards compatiblity
    density = df["plasma_density"].to_numpy() * u.cm ** (-3)
    result = Alfven_speed(B.to_numpy() * u.nT, density=density, ion="p+").to(u.km / u.s)

    return df.with_columns(Alfven_speed=pl.Series(result.value)).lazy()


def compute_Alfven_current(ldf: pl.LazyFrame):
    df = ldf.collect()

    Alfven_speed = df["Alfven_speed"].to_numpy() * u.km / u.s
    density = df["plasma_density"].to_numpy() * u.cm ** (-3)

    result = (e.si * Alfven_speed * density)
    result = result.to(u.nA / u.m**2)

    return df.with_columns(j_Alfven=pl.Series(result.value)).lazy()

## Pipelines

In [None]:
# | export
def calc_combined_features(df: pl.LazyFrame):
    vec_cols = ["v_x", "v_y", "v_z"]  # plasma velocity vector in any coordinate system

    j_factor = ((u.nT / u.s) * (1 / mu0 / (u.km / u.s))).to(u.nA / u.m**2)

    vector_cols = ["Vl", "Vn", "normal_direction"]
    
    result = (
        df.with_columns(
            duration=pl.col("d_tstop") - pl.col("d_tstart"),
            k_x=pl.col("normal_direction").list.get(0).abs(),
        )
        .with_columns(
            cs.by_name(vector_cols).list.to_array(3)
        )
        .pipe(vector_project_pl, vec_cols, "Vl", name="v_l") # major eigenvector in any coordinate system
        .pipe(vector_project_pl, vec_cols, "Vn", name="v_n")
        .pipe(vector_project_pl, vec_cols, "normal_direction", name="v_k")
        .with_columns(
            pl.col("v_n").abs(),
            pl.col("v_k").abs(),
            v_mn=(pl.col("plasma_speed") ** 2 - pl.col("v_l") ** 2).sqrt(),
        )
        .with_columns(
            L_n=pl.col("v_n") * pl.col("duration").dt.nanoseconds() / 1e9,
            L_mn=pl.col("v_mn") * pl.col("duration").dt.nanoseconds() / 1e9,
            L_k=pl.col("v_k") * pl.col("duration").dt.nanoseconds() / 1e9,
            j0=pl.col("d_star") / pl.col("v_mn"),
            j0_k=pl.col("d_star") / pl.col("v_k"),
        )
        .with_columns(
            L_R=pl.col("L_k") * pl.col("k_x"),
        )
        .pipe(compute_inertial_length)
        .pipe(compute_Alfven_speed)
        .pipe(compute_Alfven_current)
        .with_columns(
            j0=pl.col("j0") * j_factor.value,
            j0_k=pl.col("j0_k") * j_factor.value,
        )
        .with_columns(
            L_mn_norm=pl.col("L_mn") / pl.col("ion_inertial_length"),
            j0_norm=pl.col("j0") / pl.col("j_Alfven"),
        ).with_columns(
            cs.by_name(vector_cols).arr.to_list() # PanicException: not yet implemented: Writing FixedSizeList to parquet not yet implemented
        )
    )
    return result

## Pipelines

In [1]:
#| export
from kedro.pipeline import Pipeline, node
from kedro.pipeline.modular_pipeline import pipeline
from ids_finder.utils.basic import load_params

In [None]:
# | export
def create_combined_data_pipeline(
    sat_id, # satellite id, used for namespace
    params : Optional[dict] = None,
    **kwargs
) -> Pipeline:
    
    if params is None:
        params = PARAMS
    
    tau = params["tau"]
    ts_mag = params[sat_id]["MAG"]["time_resolution"]
    ts_state = params[sat_id]["STATE"]["time_resolution"]
    
    ts_mag_str = f"ts_{ts_mag}s"
    ts_state_str = f"ts_{ts_state}s"
    tau_str = f"tau_{tau}s"

    node_combine_features = node(
        combine_features,
        inputs=[
            f"MAG.feature_{ts_mag_str}_{tau_str}",
            f"STATE.primary_data_{ts_state_str}",
        ],
        outputs="combined_data",
    )

    node_calc_new_features = node(
        calc_combined_features,
        inputs="combined_data",
        outputs=f"events_{ts_mag_str}_{tau_str}",
    )

    nodes = [node_combine_features, node_calc_new_features]
    return pipeline(
        nodes,
        namespace=sat_id,
        outputs={
            f"events_{ts_mag_str}_{tau_str}": f"events.{sat_id}_{ts_mag_str}_{tau_str}",
        }
    )