In [1]:
import os
import pandas as pd


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/traitlets/config/application.

AttributeError: _ARRAY_API not found

In [3]:
# parameters 

INPUT_PATH = "processed_ts/train"
OUTPUT_PATH = "processed_ts/train_wake_intervals"

TIME_COL = "t_s"
VALUE_COL = "z_m"

BASELINE_WINDOW_S = 60.0
NOISE_WINDOW_S = 30.0
HIGH_Z = 2.5
LOW_Z = 1.2
MIN_DURATION_S = 2.0
MERGE_GAP_S = 1.0

In [None]:
def _rolling_median(arr: pd.Series, win: int) -> pd.Series:
    return arr.rolling(win, min_periods=1, center=True).median()

def _rolling_mad(arr: pd.Series, win: int) -> pd.Series:
    med = arr.rolling(win, min_periods=1, center=True).median()
    mad = (arr - med).abs().rolling(win, min_periods=1, center=True).median()
    sigma = mad * 1.4826
    sigma = sigma.replace(0, None).bfill().ffill().fillna(1e-6)
    return sigma

def detect_wakes_rule_based(df: pd.DataFrame) -> pd.DataFrame:
    t = df[TIME_COL].values.astype(float)
    x = df[VALUE_COL].astype(float)

    dt = pd.Series(t).diff().median()
    fs = 1.0 / dt

    bw = max(int(round(BASELINE_WINDOW_S * fs)), 3)
    nw = max(int(round(NOISE_WINDOW_S * fs)), 3)

    baseline = _rolling_median(x, bw)
    detrended = x - baseline
    sigma = _rolling_mad(detrended, nw)
    z = detrended / sigma

    in_wake = False
    start_idx = None
    intervals = []
    for i, zi in enumerate(z):
        if not in_wake and zi >= HIGH_Z:
            in_wake = True
            start_idx = i
        elif in_wake and zi <= LOW_Z:
            intervals.append((start_idx, i))
            in_wake = False

    if in_wake:
        intervals.append((start_idx, len(z)-1))

    rows = []
    for s, e in intervals:
        seg = df.iloc[s:e+1]
        seg_z = z.iloc[s:e+1]
        rows.append(dict(
            start_s=seg[TIME_COL].iloc[0],
            end_s=seg[TIME_COL].iloc[-1],
            duration_s=seg[TIME_COL].iloc[-1] - seg[TIME_COL].iloc[0],
            peak_value=seg[VALUE_COL].max(),
            peak_z=seg_z.max()
        ))
    return pd.DataFrame(rows)

def process_file(in_path, out_path):
    df = pd.read_csv(in_path)
    intervals_df = detect_wakes_rule_based(df)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    intervals_df.to_csv(out_path, index=False)
#     print(f"Saved: {out_path}")

def main():
    if os.path.isfile(INPUT_PATH):
        # single file
        out_file = os.path.join(OUTPUT_PATH, os.path.basename(INPUT_PATH).replace(".csv", "_wakes.csv"))
        process_file(INPUT_PATH, out_file)
    else:
        # folder -> mirror structure
        for root, _, files in os.walk(INPUT_PATH):
            for fn in files:
                if not fn.endswith(".csv"): continue
                in_fp = os.path.join(root, fn)
                rel = os.path.relpath(in_fp, INPUT_PATH)
                out_fp = os.path.join(OUTPUT_PATH, rel.replace(".csv", "_wakes.csv"))
                process_file(in_fp, out_fp)

if __name__ == "__main__":
    main()


Saved: processed_ts/train_wake_intervals/prince_rupert_02-1639881300-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/kitimat_arm_bish_cove-1700342400-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1716357000-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/hecate_strait_banks_island_deadman_islet-1722637200-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1635389340-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1634832240-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1645749300-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/tsouke_01-1642877400-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/bcip_04-1638643800-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/kitimat_arm_bish_cove-1722349200-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/hecate_strait_b

Saved: processed_ts/train_wake_intervals/prince_rupert_02-1643514600-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/kitimat_arm_bish_cove-1713834300-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/wright_sound_gil_island-fisherman_cove-1710100200-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_defence_island_north-1720880400-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1730764200-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1640284500-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1701201000-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/bcip_04-1644779100-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1711896600-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/tsouke_01-1639172100-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/kitimat_arm_bish_

Saved: processed_ts/train_wake_intervals/metlakatla_01-1640724600-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1642686600-v3-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/bcip_06-1641765900-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1648424100-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/kitimat_arm_bish_cove-1713016200-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_defence_island_north-1706277900-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1731800100-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/tsouke_02-1641658800-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1645405500-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1647135300-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/lewis_passage_fin_island_howard_islet-1700488200-

Saved: processed_ts/train_wake_intervals/howe_sound_defence_island_north-1701903900-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1643575800-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/tsouke_01-1642024200-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1635211140-timeseries-1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/prince_rupert_02-1636919400-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/wright_sound_gil_island-fisherman_cove-1720890300-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/bcip_05-1649715600-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1711480800-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/tsouke_03-1647022800-v1-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-1711241400-labeled_wakes.csv
Saved: processed_ts/train_wake_intervals/howe_sound_foulger_creek-171993