# Events

In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
import polars as pl
from discontinuitypy.datasets import IDsDataset
from beforerr.polars import pl_norm

from sunpy.time import TimeRange
from datetime import timedelta
from space_analysis.utils.speasy import Variables

from bokeh.io import output_notebook
output_notebook()

In [17]:
import astropy.units as u
from astropy.constants import m_p

def thermal_spd2temp(speed, speed_unit=u.km / u.s):
    return (m_p * (speed * speed_unit) ** 2 / 2).to("eV").value


def df_thermal_spd2temp(ldf: pl.LazyFrame, speed_col, speed_unit=u.km / u.s):
    df = ldf.collect()
    return df.with_columns(
        plasma_temperature=thermal_spd2temp(df[speed_col].to_numpy(), speed_unit)
    ).lazy()

In [18]:
def get_and_process_data(
    mag_dataset, mag_parameters, plasma_dataset, plasma_parameters, timerange, tau, ts
):
    # define variables
    mag_vars = Variables(
        dataset=mag_dataset,
        parameters=mag_parameters,
        timerange=timerange,
    ).retrieve_data()

    plasma_vars = Variables(
        dataset=plasma_dataset,
        parameters=plasma_parameters,
        timerange=timerange,
    ).retrieve_data()

    # get column names
    bcols = mag_vars.data[0].columns
    density_col = plasma_vars.data[0].columns[0]
    vec_cols = plasma_vars.data[1].columns
    temperature_col = plasma_vars.data[2].columns[0]

    # get data
    mag_data = mag_vars.to_polars()
    plasma_data = (
        plasma_vars.to_polars()
        .with_columns(plasma_speed=pl_norm(vec_cols))
        .rename({density_col: "plasma_density"})
    )
    # process temperature data
    if plasma_vars.data[2].unit == "km/s":
        plasma_data = plasma_data.pipe(df_thermal_spd2temp, temperature_col)
    else:
        plasma_data = plasma_data.rename({temperature_col: "plasma_temperature"})

    return IDsDataset(
        mag_data=mag_data,
        plasma_data=plasma_data,
        tau=tau,
        ts=ts,
        bcols=bcols,
        vec_cols=vec_cols,
        density_col="plasma_density",
        speed_col="plasma_speed",
        temperature_col="plasma_temperature",
    )

In [19]:
start = "2019-04-07T01:00"
end = "2019-04-07T12:00"

earth_start = "2019-04-09"
earth_end = "2019-04-12"

In [20]:
psp_timerange = TimeRange(start, end)
earth_timerange = TimeRange(earth_start, earth_end)

In [21]:
def validate(timerange):
    if isinstance(timerange, TimeRange):
        return [timerange.start.to_string(), timerange.end.to_string()]

In [28]:
tau = timedelta(seconds=16)
ts = timedelta(seconds=1 / 180)

mag_dataset = "PSP_FLD_L2_MAG_RTN"
mag_parameters = ["psp_fld_l2_mag_RTN"]

# plasma_dataset = "PSP_SWP_SPI_SF00_L3_MOM"
# plasma_parameters = ["DENS", "VEL_RTN_SUN", "TEMP", "SUN_DIST"]
# fname = "../data/psp_ids_dataset.arrow"

plasma_dataset = "PSP_SWP_SPC_L3I"
plasma_parameters = ["np_moment_gd", "vp_moment_RTN_gd", "wp_moment_gd"]


timerange = validate(psp_timerange)

fname = "../data/psp_ids_dataset_spc.arrow"

In [31]:
psp_ids_dataset = get_and_process_data(
    mag_dataset, mag_parameters, plasma_dataset, plasma_parameters, timerange, tau, ts
).find_events().update_candidates_with_plasma_data().export(fname)




Distributing Dataframe:   0%           Elapsed time: 00:00, estimated remaining time: ?

Estimated completion of line 3:   0%           Elapsed time: 00:00, estimated remaining time: ?

In [33]:
psp_ids_dataset.plot(offset=timedelta(seconds=5))

[32m2024-02-10 17:59:20.159[0m | [1mINFO    [0m | [36mdiscontinuitypy.datasets[0m:[36moverview_plot[0m:[36m172[0m - [1mOverview plot: 2019-04-07 07:20:56 - 2019-04-07 07:21:12[0m
[32m2024-02-10 17:59:20.159[0m | [34m[1mDEBUG   [0m | [36mdiscontinuitypy.datasets[0m:[36mlog_event_change[0m:[36m79[0m - [34m[1mCHANGE INFO
        n.change: -15.301864624023438
        v.ion.change: -11.389994719711808
        T.change: -15.35660171508789
        v.Alfven.change: 11.979870753367976
        v.ion.change.l: 88.78097595793847
        v.Alfven.change.l: 133.87318099874705
        [0m


In [10]:
from discontinuitypy.utils.basic import resample

ts = timedelta(seconds=1 / 11)
fname = "../data/psp_ids_dataset_r.arrow"

psp_ids_dataset_r = psp_ids_dataset.model_copy(deep=True)
psp_ids_dataset_r.data = resample(psp_ids_dataset_r.data, ts)
psp_ids_dataset_r.ts = ts
psp_ids_dataset_r.find_events().update_candidates_with_plasma_data().export(fname)




Distributing Dataframe:   0%           Elapsed time: 00:00, estimated remaining time: ?

[36m(_deploy_ray_func pid=60598)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/__init__.py", line 85, in <module>
[36m(_deploy_ray_func pid=60598)[0m     from . import skintegrate
[36m(_deploy_ray_func pid=60598)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/skintegrate.py", line 20, in <module>
[36m(_deploy_ray_func pid=60598)[0m     from sklearn.base import BaseEstimator
[36m(_deploy_ray_func pid=60598)[0m ModuleNotFoundError: No module named 'sklearn'
[36m(_deploy_ray_func pid=60598)[0m 
[36m(_deploy_ray_func pid=60598)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/__init__.py", line 105, in <module>
[36m(_deploy_ray_func pid=60598)[0m     from . import nltk_stages
[36m(_deploy_ray_func pid=60598)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/nltk_stages.py", line 19, in <module>
[36

Estimated completion of line 9:   0%           Elapsed time: 00:00, estimated remaining time: ?

IDsDataset(name=None, events=shape: (89, 105)
┌────────────┬───────┬───────────┬───────────┬───┬────────────┬───────────┬────────────┬───────────┐
│ time       ┆ count ┆ B_std     ┆ B_mean    ┆ … ┆ T.change   ┆ B.change  ┆ v.Alfven.c ┆ v.Alfven. │
│ ---        ┆ ---   ┆ ---       ┆ ---       ┆   ┆ ---        ┆ ---       ┆ hange      ┆ change.l  │
│ datetime[n ┆ u32   ┆ f64       ┆ f64       ┆   ┆ f64        ┆ f64       ┆ ---        ┆ ---       │
│ s]         ┆       ┆           ┆           ┆   ┆            ┆           ┆ f64        ┆ f64       │
╞════════════╪═══════╪═══════════╪═══════════╪═══╪════════════╪═══════════╪════════════╪═══════════╡
│ 2019-04-07 ┆ 176   ┆ 42.543101 ┆ 85.393798 ┆ … ┆ -20.301338 ┆ 0.083642  ┆ -141.27939 ┆ 317.01786 │
│ 01:05:28   ┆       ┆           ┆           ┆   ┆            ┆           ┆ 1          ┆ 6         │
│ 2019-04-07 ┆ 176   ┆ 37.298421 ┆ 85.808691 ┆ … ┆ 19.339516  ┆ 2.537321  ┆ 157.987252 ┆ -343.5237 │
│ 01:06:08   ┆       ┆           ┆           

## Wind dataset


In [10]:
ts = timedelta(seconds=1 / 11)
tau = timedelta(seconds=30)

mag_dataset = "WI_H2_MFI"
# mag_parameters = ["BGSM"]
mag_parameters = ["BGSE"]

# plasma_dataset="WI_K0_SWE"
# plasma_parameters=["Np", "V_GSM", "THERMAL_SPD"]

plasma_dataset="WI_PM_3DP"
plasma_parameters=["P_DENS", "P_VELS", "P_TEMP"] # Proton velocity vector (GSE)

timerange = validate(earth_timerange)

fname = "../data/wind_ids_dataset_r.arrow"

In [11]:
wind_ids_dataset = get_and_process_data(
    mag_dataset, mag_parameters, plasma_dataset, plasma_parameters, timerange, tau, ts
).find_events().update_candidates_with_plasma_data().export(fname)




Distributing Dataframe:   0%           Elapsed time: 00:00, estimated remaining time: ?

Distributing Dataframe: 100%██████████ Elapsed time: 00:00, estimated remaining time: 00:00
[36m(_deploy_ray_func pid=58908)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/__init__.py", line 85, in <module>
[36m(_deploy_ray_func pid=58908)[0m     from . import skintegrate
[36m(_deploy_ray_func pid=58908)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/skintegrate.py", line 20, in <module>
[36m(_deploy_ray_func pid=58908)[0m     from sklearn.base import BaseEstimator
[36m(_deploy_ray_func pid=58908)[0m ModuleNotFoundError: No module named 'sklearn'
[36m(_deploy_ray_func pid=58908)[0m 
[36m(_deploy_ray_func pid=58908)[0m   File "/Users/zijin/micromamba/envs/psp_conjunction/lib/python3.11/site-packages/pdpipe/__init__.py", line 105, in <module>
[36m(_deploy_ray_func pid=58908)[0m     from . import nltk_stages
[36m(_deploy_ray_func pid=58908)[0m   File "/Users/zijin/micromamba/envs/ps

Estimated completion of line 3:   0%           Elapsed time: 00:00, estimated remaining time: ?

In [12]:
import hvplot.polars

In [16]:
wind_ids_dataset.plot()

[32m2024-02-09 16:50:46.654[0m | [1mINFO    [0m | [36mdiscontinuitypy.datasets[0m:[36moverview_plot[0m:[36m167[0m - [1mOverview plot: 2019-04-11 16:45:00 - 2019-04-11 16:45:30[0m
[32m2024-02-09 16:50:46.654[0m | [34m[1mDEBUG   [0m | [36mdiscontinuitypy.datasets[0m:[36mlog_event_change[0m:[36m79[0m - [34m[1mCHANGE INFO
        n.change: -0.0016632080078125
        v.ion.change: -0.07276110613753417
        T.change: 0.10945892333984375
        v.Alfven.change: 0.16010592925675837
        v.ion.change.l: -7.858156252181972
        v.Alfven.change.l: -15.310337071702092
        [0m
