In [1]:
import os
import sys
import datetime
import pytz
import json

import numpy as np
from scipy.signal import medfilt
import matplotlib.pyplot as plt
import polars as pl
import pandas as pd

import synoptic

In [2]:
# Download KVEL data to look for strong wind events
stid_list = ['KVEL']
# start_date = datetime.datetime(2025, 6, 1, 0, 0, 0, tzinfo=pytz.UTC)
start_date = datetime.datetime(2018, 7, 1, 0, 0, 0, tzinfo=pytz.UTC)
end_date = datetime.datetime(2022, 7, 1, 0, 0, 0, tzinfo=pytz.UTC)
synoptic_vrbls = ['wind_speed', 'wind_direction', 'air_temp', 'dew_point_temperature', 'pressure',
                  # 'wind_gust',
                  # 'wind_gust_set_1'
                  ]

In [3]:
print("Downloading time series data...")
df_data = synoptic.TimeSeries(stid=stid_list, start=start_date, end=end_date,
                              vars=synoptic_vrbls, verbose=True,
                              # rename_set_1=False, rename_value_1=False,
                              ).df().synoptic.pivot()
print("Downloaded time series data.")
df_data.head(10)

Downloading time series data...
🚚💨 Speedy delivery from Synoptic's [32mtimeseries[0m service.
📦 Received data from [36m1[0m stations (14.04 seconds).
Downloaded time series data.


date_time,stid,latitude,longitude,elevation,air_temp,wind_direction,wind_speed,dew_point_temperature,pressure
"datetime[μs, UTC]",str,f64,f64,f64,f64,f64,f64,f64,f64
2018-07-01 00:00:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,29.0,90.0,2.06,-5.83,83777.98
2018-07-01 00:05:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,0.0,0.0,-5.86,83777.98
2018-07-01 00:10:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,70.0,2.57,-4.87,83777.98
2018-07-01 00:15:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,110.0,2.57,-5.86,83777.98
2018-07-01 00:20:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,80.0,2.06,-5.86,83777.98
2018-07-01 00:25:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,20.0,1.54,-5.86,83777.98
2018-07-01 00:30:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,0.0,0.0,-4.87,83777.98
2018-07-01 00:35:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,20.0,4.12,-3.88,83805.89
2018-07-01 00:40:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,50.0,6.17,-3.88,83777.98
2018-07-01 00:45:00 UTC,"""KVEL""",40.44295,-109.51273,5262.0,28.0,20.0,6.69,-2.89,83777.98


In [4]:
import numpy as np

# Print wind speed percentiles
wind_speed = df_data['wind_speed'].to_numpy()
percentiles = [100, 99, 98, 97, 96, 95]
ws_percentile_values = np.percentile(wind_speed[~np.isnan(wind_speed)], percentiles)

for p, v in zip(percentiles, ws_percentile_values):
    print(f"Wind speed at {p}th percentile: {v:.2f}")

Wind speed at 100th percentile: 18.52
Wind speed at 99th percentile: 9.77
Wind speed at 98th percentile: 8.75
Wind speed at 97th percentile: 7.72
Wind speed at 96th percentile: 7.20
Wind speed at 95th percentile: 6.69


In [5]:

# Function to find periods where wind speed exceeds a given percentile
def find_consecutive_periods(df, col, threshold, min_length=1):
    """
    Returns list of (start_idx, end_idx) for periods where df[col] > threshold
    and periods are at least min_length long.
    """
    mask = df[col] > threshold
    periods = []
    start = None
    for i, val in enumerate(mask):
        if val and start is None:
            start = i
        elif not val and start is not None:
            if i - start >= min_length:
                periods.append((start, i-1))
            start = None
    if start is not None and len(df) - start >= min_length:
        periods.append((start, len(df)-1))
    return periods

In [6]:

threshold = np.percentile(wind_speed[~np.isnan(wind_speed)], 99)
# TODO fix - Using int length not generalisable to different reporting intervals
periods = find_consecutive_periods(df_data, 'wind_speed', threshold, min_length=10)
print("Consecutive periods where wind speed exceeds 99th percentile:", periods)

Consecutive periods where wind speed exceeds 99th percentile: [(10264, 10276), (26562, 26572), (90148, 90163), (93900, 93915), (94459, 94469), (94471, 94480), (94482, 94498), (94521, 94539), (129205, 129214), (131306, 131315), (135604, 135615), (138434, 138444), (141902, 141913), (141915, 141951), (142015, 142026), (142116, 142125), (142172, 142184), (143793, 143832), (145341, 145356), (151571, 151584), (151587, 151599), (220898, 220909), (223035, 223047), (249893, 249929), (249931, 249959), (250096, 250106), (250109, 250119), (250121, 250136), (260958, 261002), (261768, 261779), (263204, 263224), (264145, 264180), (264182, 264215), (289552, 289577), (294578, 294601), (294605, 294614), (296444, 296453), (296455, 296470), (299202, 299230), (299245, 299260), (301519, 301528), (302271, 302288), (305642, 305654), (305656, 305667), (305669, 305682), (305712, 305732), (305740, 305751), (307548, 307558), (307768, 307783), (307802, 307814), (307861, 307872), (311540, 311550), (324853, 324881),

In [7]:
period_starts = [df_data['date_time'][start] for start, _ in periods]
print("Start dates of periods where wind speed exceeds 99th percentile:")
for start in period_starts:
    print(start)

Start dates of periods where wind speed exceeds 99th percentile:
2018-08-02 21:15:00+00:00
2018-09-24 20:40:00+00:00
2019-04-14 22:55:00+00:00
2019-04-26 23:40:00+00:00
2019-04-28 18:35:00+00:00
2019-04-28 19:30:00+00:00
2019-04-28 20:20:00+00:00
2019-04-28 23:20:00+00:00
2019-08-17 22:25:00+00:00
2019-08-25 22:35:00+00:00
2019-09-08 17:18:00+00:00
2019-09-17 17:40:00+00:00
2019-09-28 21:05:00+00:00
2019-09-28 22:05:00+00:00
2019-09-29 05:50:00+00:00
2019-09-29 13:30:00+00:00
2019-09-29 17:40:00+00:00
2019-10-04 22:10:00+00:00
2019-10-09 21:25:00+00:00
2019-10-29 19:10:00+00:00
2019-10-29 20:45:00+00:00
2020-06-06 20:25:00+00:00
2020-06-13 16:40:00+00:00
2020-09-08 04:50:00+00:00
2020-09-08 07:25:00+00:00
2020-09-08 20:05:00+00:00
2020-09-08 21:05:00+00:00
2020-09-08 22:00:00+00:00
2020-10-14 19:50:00+00:00
2020-10-17 22:15:00+00:00
2020-10-22 18:10:00+00:00
2020-10-25 18:20:00+00:00
2020-10-25 21:10:00+00:00
2021-02-26 20:45:00+00:00
2021-03-14 19:30:00+00:00
2021-03-14 21:40:00+00:00

In [8]:
import polars as pl

# Collect start and end times for each period
events = []
for start, end in periods:
    start_time = df_data['date_time'][start]
    end_time = df_data['date_time'][end]
    duration = (end_time - start_time).total_seconds() / 60  # duration in minutes
    events.append({"start_time": start_time, "end_time": end_time, "duration_minutes": duration})

# Create a Polars DataFrame for the events
events_df = pl.DataFrame(events)

# Display the "calendar of events"
print(events_df)

shape: (61, 3)
┌─────────────────────────┬─────────────────────────┬──────────────────┐
│ start_time              ┆ end_time                ┆ duration_minutes │
│ ---                     ┆ ---                     ┆ ---              │
│ datetime[μs, UTC]       ┆ datetime[μs, UTC]       ┆ f64              │
╞═════════════════════════╪═════════════════════════╪══════════════════╡
│ 2018-08-02 21:15:00 UTC ┆ 2018-08-02 22:10:00 UTC ┆ 55.0             │
│ 2018-09-24 20:40:00 UTC ┆ 2018-09-24 21:25:00 UTC ┆ 45.0             │
│ 2019-04-14 22:55:00 UTC ┆ 2019-04-15 00:05:00 UTC ┆ 70.0             │
│ 2019-04-26 23:40:00 UTC ┆ 2019-04-27 00:50:00 UTC ┆ 70.0             │
│ 2019-04-28 18:35:00 UTC ┆ 2019-04-28 19:20:00 UTC ┆ 45.0             │
│ …                       ┆ …                       ┆ …                │
│ 2022-04-15 18:15:00 UTC ┆ 2022-04-15 19:05:00 UTC ┆ 50.0             │
│ 2022-04-15 19:35:00 UTC ┆ 2022-04-15 20:50:00 UTC ┆ 75.0             │
│ 2022-04-22 17:30:00 UTC ┆ 2022-04-