---
title: STEREO Magnetic field data pipeline
---

STEREO magnetic field is already in RTN coordinates, so no need to transform it.

Download data using `pyspedas`, but load it using `pycdfpp` (using `pyspedas` to load the data directly into `xarray` is very slow)

In [1]:
#| export
from datetime import timedelta

import polars as pl

from ids_finder import PARAMS
from ids_finder.utils.basic import cdf2pl, pmap, resample, partition_data_by_year
from ids_finder.pipelines.default.data_mag import create_pipeline_template

from typing import Iterable

In [None]:
#| hide
#| default_exp pipelines/stereo/mag
%load_ext autoreload
%autoreload 2

## Loading data

In [None]:
#| export
from pathlib import Path
import os

os.environ['SPEDAS_DATA_DIR'] = str(Path.home() / 'data')
import pyspedas

In [None]:
# | export
def download_data(
    start,
    end,
    probe: str = "a",
    datatype = '8hz',
) -> Iterable[str]:
    "List of CDF files"
    trange = [start, end]
    files = pyspedas.stereo.mag(trange, probe=probe, datatype=datatype, downloadonly=True)
    return files


def load_data(
    start,
    end,
    datatype = '8hz',
    probe: str = "a",
):
    files = download_data(start, end, probe, datatype)
    var_names="BFIELD"
    return pl.concat(files | pmap(cdf2pl, var_names=var_names))


## Preprocessing data

In [None]:
# | export
def preprocess_data(
    raw_data: pl.LazyFrame,
):
    """
    Preprocess the raw dataset (only minor transformations)
    - Applying naming conventions for columns
    """

    bcols = PARAMS["STEREO"]["MAG"]["bcols"]

    name_mapping = {
        "BFIELD_0": bcols[0],
        "BFIELD_1": bcols[1],
        "BFIELD_2": bcols[2],
    }

    return raw_data.rename(name_mapping)

## Processing data

In [None]:
# | export
def process_data(
    raw_data: pl.DataFrame,
    ts = None,  # time resolution, in seconds
):
    every = timedelta(seconds=ts)
    period = 2 * every

    return raw_data.pipe(resample, every=every, period=period).pipe(
        partition_data_by_year
    )

## Pipeline

In [None]:
# | export
def create_pipeline(sat_id="STA", source="MAG"):
    return create_pipeline_template(
        sat_id=sat_id,
        source=source,
        load_data_fn=load_data,
        preprocess_data_fn=preprocess_data,
        process_data_fn=process_data,
    )

## Obsolete codes

NOTE: one can also use `speasy` to download data, however this is slower for `STEREO` data.

In [None]:
%%markdown
sat_fgm_product = cda_tree.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN.BFIELD
sat_fgm_product = 'cda/STA_L1_MAG_RTN/BFIELD'
products = [sat_fgm_product]

dataset = spz.get_data(products, test_trange, disable_proxy=True)
sat_fgm_data  = dataset[0]
data_preview(sat_fgm_data)

Download data in a background thread

In [None]:
%%markdown

@threaded
def download_data(products, trange):
    logger.info("Downloading data")
    spz.get_data(products, trange, disable_proxy=True)
    logger.info("Data downloaded")
    
download_data(products, trange)

In [None]:
import speasy as spz

In [None]:
cda_tree: spz.SpeasyIndex = spz.inventories.tree.cda
product = cda_tree.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN

logger.info(product.description)
logger.info(product.ID)
logger.info(product.BFIELD.CATDESC)
logger.info(product.BFIELD.spz_uid())

# spz.inventories.data_tree.cda.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN.
# spz.inventories.data_tree.cda.STEREO.STEREOA.IMPACT_MAG.STA_LB_MAG_RTN.description
# spz.inventories.data_tree.cda.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN.MAGFLAGUC.CATDESC
spz.inventories.data_tree.cda.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN.BFIELD.CATDESC
# spz.inventories.data_tree.cda.STEREO.Ahead.IMPACT_MAG.STA_L1_MAG_RTN.BFIELD.