---
title: IDs from STEREO
order: 11
---

See following notebooks for details:

- [Magnetic field data](./mag.ipynb)
- [State data](./state.ipynb)

STEREO magnetic field is already in RTN coordinates, so no need to transform it.

Download data using `pyspedas`, but load it using `pycdfpp` (using `pyspedas` to load the data directly into `xarray` is very slow)

Note STEREO-A plastic data file is "Non compliant ISTP file: No data variable found, this is suspicious", `-9.999999848243207e+30` instead of `-1e+31` is used as fill value.

```
wget -r -np -nH -R "index.html*" https://spdf.gsfc.nasa.gov/pub/data/stereo/ahead/l2/plastic/1dmax/1min/201{1,2,3,4,5,6}/
```

In [None]:
import speasy as spz
from space_analysis.utils.speasy import Variables
from space_analysis.io.cdf import cdf2pl
from discontinuitypy.datasets import IDsDataset
from beforerr.polars import pl_norm
from discontinuitypy.utils.basic import resample

from datetime import timedelta
import polars as pl
import polars.selectors as cs

from sunpy.time import TimeRange
import pyspedas

from tqdm import tqdm

In [113]:
import speasy as spz
entries = spz.core.cache.entries()
# drop internal entries
entries = list(filter(lambda e: '__internal__' not in e, entries))
print(entries[::-1])

['https://cdaweb.gsfc.nasa.gov/tmp/wstdZ7pT/thb_l2s_fgm_20110516120000_20110521185813.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/ws7AC2YG/thb_l2s_fgm_20110510160109_20110516115959.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsdWe6Y0/thb_l2s_fgm_20110503154613_20110508141013.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wspIHacZ/thb_l2s_fgm_20110427180245_20110428180909.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/ws8Y7VVX/thb_l2s_fgm_20110418120005_20110425000941.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsVgYXlA/thb_l2s_fgm_20110411180245_20110418115757.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsDf0Pkg/thb_l2s_fgm_20110404120000_20110411000941.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsqeq6Bm/thb_l2s_fgm_20110330222053_20110404115959.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsyP7JUE/thb_l2s_fgm_20110321120000_20110328000941.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wslkgA9S/thb_l2s_fgm_20110314180245_20110321115959.cdf', 'https://cdaweb.gsfc.nasa.gov/tmp/wsoPwsBS/thb_l2s_fgm_20110313180245_20110314000941.cdf'

In [None]:
spz.update_inventories()

In [None]:
timerange = ["2011-08-25", "2016-06-30"]
# timerange = ["2011-08-25", "2011-09-01"]
mission = "STA"
ts = timedelta(seconds=1)
tau = timedelta(seconds=60)

provider = 'archive/local'
mag_dataset = "STA_L1_MAG_RTN"
mag_parameters = ["BFIELD"]

# plasma_dataset = "STA_L2_MAGPLASMA_1M"
# plasma_parameters= ["Np", "Cone_Angle", "Vt_Over_V_RTN", "Vp_RTN", "Tp"]

plasma_dataset = 'STA_L2_PLA_1DMAX_1MIN'
density_col = 'proton_number_density'
vec_cols = ['proton_Vr_RTN', 'proton_Vt_RTN', 'proton_Vn_RTN']
temperature_col = 'proton_temperature'
plasma_parameters = [density_col] + vec_cols + [temperature_col]

fmt = 'arrow'
fname = f"../../../data/05_reporting/events.{mission}.ts_{ts.total_seconds():.2f}s_tau_60s.{fmt}"

## Downloading files

In [None]:
def download(timerange):
    
    files = pyspedas.stereo.plastic(timerange, downloadonly=True)

    for file in files:
        parquet_file = file.replace('.cdf', f'.{fmt}')
        cdf2pl(file, plasma_parameters).collect().write_ipc(parquet_file)
        
    mag_files = pyspedas.stereo.mag(timerange, downloadonly=True)
    
    return files
    
        
# download(timerange)

## Loading data

In [None]:
def get_and_process_data(timerange):
    mag_vars = Variables(
        provider = provider,
        dataset=mag_dataset,
        parameters=mag_parameters,
        timerange=timerange,
    ).retrieve_data()

    bcols = mag_vars.data[0].columns[:3]

    mag_data = mag_vars.to_polars().drop('BTotal')
    
    files = pyspedas.stereo.plastic(timerange, downloadonly=True, no_update=True)
    parquet_files = [file.replace('.cdf', f'.{fmt}') for file in files]

    plasma_data = pl.scan_ipc(parquet_files).with_columns(
        plasma_density = pl.when(pl.col(density_col) <0 ).then(None).otherwise(pl.col(density_col)),
        plasma_speed=pl_norm(vec_cols), ).rename({temperature_col:'plasma_temperature'}
    )
    
    return IDsDataset(
        mag_data=mag_data.pipe(resample, every=ts),
        plasma_data=plasma_data,
        tau=tau,
        ts=ts,
        bcols=bcols,
        vec_cols=vec_cols,
        density_col="plasma_density",
        speed_col="plasma_speed",
        temperature_col='plasma_temperature',
    ).find_events(return_best_fit=False).update_candidates_with_plasma_data()

In [None]:
ids : list[pl.DataFrame] = []
for _tr in tqdm(TimeRange(timerange).split(8)):
    
    _timerange = [_tr.start.value, _tr.end.value]
    _id = get_and_process_data(_timerange)
    ids.append(_id.events)
    
id = pl.concat(ids, how='vertical_relaxed').select(cs.datetime(), cs.duration(), cs.numeric())
id.write_ipc(fname)