---
title: IDs from Wind
---

## Setup

Need to run command in shell first as `pipeline` is project-specific command

```{sh}
kedro pipeline create wind
```

In [4]:
#| default_exp pipelines/wind/pipeline

In [5]:
#| hide
%load_ext autoreload
%autoreload 2

In [6]:
#| export
import polars as pl

## Magnetic field data pipeline

- For convenience, we choose magnetic field data in **GSE** coordinate system
- The `fgs` data are in 3-4s resolution

In [9]:
# | export
import pyspedas
from ids_finder.utils.basic import cdf2pl, pmap, resample

In [18]:
#| export
def download_mag_data(trange: list[str], datatype) -> list[str]:
    files = pyspedas.wind.mfi(trange, datatype=datatype, downloadonly=True)
    return files

def load_mag_data(
    start: str = None,
    end: str = None,
    trange: list[str] = None,
    datatype="h4-rtn",
):
    if trange is None:
        trange = [start, end]

    files = download_mag_data(trange=trange, datatype=datatype)
    var_names = ["BRTN", "BF1"]
    df: pl.LazyFrame = pl.concat(files | pmap(cdf2pl, var_names=var_names))

    return df

In [12]:
def preprocess_mag_data(
    raw_data: pl.LazyFrame,
) -> pl.LazyFrame:
    """
    Preprocess the raw dataset (only minor transformations)

    - Downsample the data to a given time resolution
    - Applying naming conventions for columns
    """
    name_mapping = {
        "BRTN_0": "B_x",
        "BRTN_1": "B_y",
        "BRTN_2": "B_z",
        "BF1": "B_mag",
    }

    return raw_data.rename(name_mapping)

In [None]:
# | export
from datetime import timedelta
from ids_finder.utils.basic import partition_data_by_year

In [None]:
#| export
def process_mag_data(
    raw_data: pl.LazyFrame,
    ts: str = None,  # time resolution
    coord: str = None,
):
    """
    Corresponding to primary data layer, where source data models are transformed into domain data models

    - Partitioning data, for the sake of memory
    """
    
    every = timedelta(seconds=ts)
    period = 2 * every
    
    return partition_data_by_year(raw_data)

In [None]:
#| export
def create_pipeline(
    sat_id="sta",
    tau="60s",
    ts_mag="1s",  # time resolution of magnetic field data
    ts_state="1h",  # time resolution of state data
    **kwargs
):
    pass