---
title: IDs from Wind
order: 10
---

See following notebooks for details:

- [Magnetic field data](wind/mag.ipynb)
- [State data](wind/state.ipynb)


References:

- [HPDE.io/Wind](https://hpde.io/NASA/NumericalData/Wind/index.html)
- [Wind Data Sources](https://wind.nasa.gov/data_sources.php)
- [WIND MFI high-resolution data file](https://cdaweb.gsfc.nasa.gov/misc/NotesW.html#WI_H4-RTN_MFI)
- [PySPEDAS](https://pyspedas.readthedocs.io/en/latest/wind.html)


- We use magnetic field data in **RTN** coordinate system

Notes:

- December 2014: Wind Data Notice - Data between September 24 and November 30, 2014 were not included at this definitive data set because of instrumental problem [*](https://omniweb.gsfc.nasa.gov/ftpbrowser/wind_swe_2m.html)

In [None]:
import speasy as spz

import polars as pl
import polars.selectors as cs
from space_analysis.utils.speasy import Variables
from discontinuitypy.datasets import IDsDataset
from discontinuitypy.utils.basic import resample
from beforerr.polars import pl_norm

from datetime import timedelta
from sunpy.time import TimeRange

from space_analysis.plasma.formulary import df_thermal_spd2temp

from tqdm import tqdm

In [None]:
spz.update_inventories()

In [None]:
timerange = ["2011-08-25", "2016-06-30"]
mission = "Wind"
ts = timedelta(seconds=1)
tau = timedelta(seconds=60)

provider = 'archive/local'
mag_dataset = "WI_H4-RTN_MFI"
mag_parameters = ["BRTN"]
plasma_dataset = "WI_K0_SWE"
plasma_parameters= ["Np", "V_GSM", "THERMAL_SPD"]

fmt = 'arrow'
fname = f"../../data/05_reporting/events.{mission}.ts_{ts.total_seconds():.2f}s_tau_60s.{fmt}"

## Downloading data file first

In [None]:
import pyspedas

def download(timerange):
    pyspedas.wind.swe(timerange, datatype='k0', downloadonly=True)
    pyspedas.wind.mfi(timerange, datatype="h4-rtn", downloadonly=True)

## Loading

In [None]:
def get_and_process_data(
    mag_dataset, mag_parameters, plasma_dataset, plasma_parameters, timerange, tau, ts,
    provider = 'archive/local'
):
    # define variables
    mag_vars = Variables(
        provider = provider,
        dataset=mag_dataset,
        parameters=mag_parameters,
        timerange=timerange,
    ).retrieve_data()

    plasma_vars = Variables(
        provider = provider,
        dataset=plasma_dataset,
        parameters=plasma_parameters,
        timerange=timerange,
    ).retrieve_data()

    # get column names
    bcols = mag_vars.data[0].columns
    density_col = plasma_vars.data[0].columns[0]
    vec_cols = plasma_vars.data[1].columns
    temperature_col = plasma_vars.data[2].columns[0]

    # get data
    mag_data = mag_vars.to_polars()
    plasma_data = (
        plasma_vars.to_polars()
        .with_columns(plasma_speed=pl_norm(vec_cols))
        .rename({density_col: "plasma_density"})
    )
    # process temperature data
    if plasma_vars.data[2].unit == "km/s":
        plasma_data = plasma_data.pipe(df_thermal_spd2temp, temperature_col)
    else:
        plasma_data = plasma_data.rename({temperature_col: "plasma_temperature"})

    return IDsDataset(
        mag_data=mag_data.pipe(resample, every=ts),
        plasma_data=plasma_data,
        tau=tau,
        ts=ts,
        bcols=bcols,
        vec_cols=vec_cols,
        density_col="plasma_density",
        speed_col="plasma_speed",
        temperature_col="plasma_temperature",
    ).find_events(return_best_fit=False).update_candidates_with_plasma_data()

In [None]:
ids : list[pl.DataFrame] = []
for _tr in tqdm(TimeRange(timerange).split(8)):
    
    _timerange = [_tr.start.value, _tr.end.value]
    _id = get_and_process_data(
        mag_dataset, mag_parameters, plasma_dataset, plasma_parameters, _timerange, tau, ts
    )
    ids.append(_id.events)

In [None]:
pl.concat(ids).select(cs.datetime(), cs.duration(), cs.numeric()).write_ipc(fname)

In [None]:
# from discontinuitypy.datasets import IDsDataset
# import polars as pl
# from fastcore.utils import walk

In [None]:
# ts = 1 # unit: seconds
# tau = 60 # unit: seconds

# mission = "Wind"
# data_dir = '../../data'
# dir_path = f'{data_dir}/03_primary/{mission}_MAG_ts_{ts}s'
# state_data_path = f'{data_dir}/03_primary/OMNI_LowRes_ts_3600s.parquet'
# vec_cols = ['v_x', 'v_y', 'v_z']

# format = 'arrow'
# fname = f'events.{mission}.ts_{ts}s_tau_{tau}s.{format}'
# output_path = f'{data_dir}/05_reporting/{fname}'

In [None]:
# events = []
# for mag_path in files[-1:]:

#     mag_data = pl.scan_parquet(mag_path).drop('X', 'Y', 'Z').sort('time')
#     plasma_data = pl.scan_parquet(state_data_path).sort('time')

#     _events = (
#         IDsDataset(
#             mag_data=mag_data,
#             plasma_data=plasma_data,
#             tau=tau,
#             ts=ts,
#             vec_cols=vec_cols,
#         )
#         .find_events(return_best_fit=False)
#         .update_candidates_with_plasma_data()
#         .events
#     )
    
#     events.append(_events)
    
# ids_dataset = IDsDataset(
#     events=pl.concat(events),
#     mag_data= pl.scan_parquet(list(walk(dir_path))).drop('X', 'Y', 'Z').sort('time')
# ).export(output_path)