In [None]:
import pandas as pd
import numpy as np
import os

from itertools import combinations
import math
import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

from dash import Dash, dcc, html, Input, Output, dash_table
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import seaborn as sns
import plotly.express as px, plotly.graph_objects as go
import ipywidgets as W
from IPython.display import display


from scipy import stats
import requests, xmltodict
from datetime import datetime, timedelta, timezone

In [34]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL LHT\Data"
files = os.listdir(base)
files

['LHT65005(JKL)-TEMP.csv',
 'LHT65006(JLK)-TEMP.csv',
 'LHT65007(JKL)-TEMP.csv',
 'LHT65008(JKL)-TEMP.csv',
 'LHT65009(JKL)-TEMP.csv',
 'LHT65010(JKL)-TEMP.csv',
 'LHT65013(JKL)-TEMP.csv']

In [35]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
df = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


df["Timestamp"] = pd.to_datetime(df["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", df["Timestamp"].isna().sum())
# Sort the ds by timestamp
df = df.sort_values(by="Timestamp").reset_index(drop=True)

NaT after parse: 0


In [70]:
lht_sensors = {
    'LHT65013': (62.234563, 25.672774),
    'LHT65010': (62.260777, 25.693876),
    'LHT65009': (62.222971, 25.804673),
    'LHT65008': (62.227604, 25.736853),
    'LHT65007': (62.286678, 25.74533),
    'LHT65006': (62.265198, 25.89008),
    'LHT65005': (62.197614, 25.720489),
    'LHT65004': (62.211111, 25.752563),
}

ws100_sensors = {
    'Saaritie':        (62.136788, 25.762473),
    'Tuulimyllyntie':  (62.221789, 25.695931),
    'Tähtiniementie':  (62.011127, 25.552755),
    'Kaakkovuorentie': (62.294362, 25.800196),
    'Kotaniementie':   (62.265705, 25.909542),
}

geolocator = Nominatim(user_agent="sensor_map")
reverse_geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1, swallow_exceptions=True)

# --- Helper: extract a reasonable street-like name from the OSM address dict ---
def extract_street(address_dict: dict) -> str:
    # Try common street keys; fall back to suburb/city if needed
    for key in ['road', 'pedestrian', 'footway', 'cycleway', 'path', 'residential']:
        if key in address_dict:
            return address_dict[key]
    for fallback in ['neighbourhood', 'suburb', 'hamlet', 'village', 'town', 'city', 'county']:
        if fallback in address_dict:
            return address_dict[fallback]
    return "Unknown street"

# Optional: simple in-memory cache to avoid re-querying the same coordinates repeatedly
geocode_cache = {}

def get_street_name(lat: float, lon: float, language: str = 'en') -> str:
    key = (round(lat, 6), round(lon, 6), language)
    if key in geocode_cache:
        return geocode_cache[key]
    location = reverse_geocode((lat, lon), language=language)  # use 'fi' for Finnish names if you prefer
    if location and hasattr(location, "raw"):
        street = extract_street(location.raw.get('address', {}))
    else:
        street = "Unknown street"
    geocode_cache[key] = street
    return street

# --- Create base map centered roughly over the area ---
map_center = [62.24, 25.75]
sensors_map = folium.Map(location=map_center, zoom_start=11, tiles="OpenStreetMap")

# Feature groups so you can toggle layers on/off
lht_layer = folium.FeatureGroup(name="LHT Sensors", show=True)
ws100_layer = folium.FeatureGroup(name="WS100 Sensors", show=True)

# --- Plot LHT sensors (blue pins) ---
for sensor_name, (lat, lon) in lht_sensors.items():
    street_name = get_street_name(lat, lon, language='en')  # change to 'fi' for Finnish
    popup_html = f"<b>{sensor_name}</b><br>{street_name}<br>({lat:.6f}, {lon:.6f})"
    tooltip_text = f"{sensor_name} – {street_name}"
    folium.Marker(
        location=[lat, lon],
        popup=popup_html,
        tooltip=tooltip_text,
        icon=folium.Icon(color="blue", icon="info-sign"),
    ).add_to(lht_layer)

# --- Plot WS100 sensors (red pins) ---
for site_name, (lat, lon) in ws100_sensors.items():
    street_name = get_street_name(lat, lon) 
    popup_html = f"<b>{site_name} (WS100)</b><br>{street_name}<br>({lat:.6f}, {lon:.6f})"
    tooltip_text = f"{site_name} – {street_name}"
    folium.Marker(
        location=[lat, lon],
        popup=popup_html,
        tooltip=tooltip_text,
        icon=folium.Icon(color="red", icon="info-sign"),
    ).add_to(ws100_layer)

# Add layers and layer control to the map
lht_layer.add_to(sensors_map)
ws100_layer.add_to(sensors_map)
folium.LayerControl(collapsed=False).add_to(sensors_map)

# Display in Jupyter
sensors_map


In [37]:
df.head()

Unnamed: 0,Timestamp,TempC_SHT,Hum_SHT
0,2020-10-27 15:00:02,23.62,35.7
1,2020-10-27 15:01:22,24.15,34.2
2,2020-10-27 15:04:38,23.66,34.0
3,2020-10-27 15:14:30,23.32,32.8
4,2020-10-27 15:20:00,20.95,36.2


In [38]:
df.tail()

Unnamed: 0,Timestamp,TempC_SHT,Hum_SHT
277075,2025-09-18 11:26:57,14.15,100.0
277076,2025-09-18 11:27:34,13.78,100.0
277077,2025-09-18 11:30:09,16.77,87.2
277078,2025-09-18 11:57:24,16.65,89.2
277079,2025-09-18 12:27:34,14.85,100.0


In [39]:
df.shape

(277080, 3)

In [40]:
df.describe()

Unnamed: 0,Timestamp,TempC_SHT,Hum_SHT
count,277080,277080.0,277080.0
mean,2023-05-01 01:09:53.334134784,5.816125,86.559604
min,2020-10-27 15:00:02,-33.75,13.0
25%,2022-02-19 09:59:29.249999872,-1.87,78.4
50%,2023-04-29 04:51:06,5.19,97.4
75%,2024-07-10 10:08:02.249999872,14.25,100.0
max,2025-09-18 12:27:34,327.67,3276.7
std,,11.147841,22.152068


In [41]:
ws100_df = pd.read_csv('Kotaniementie_cleaned.csv')

In [42]:
ws100_df.head()

Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:03:44,0.0,0.0,250.46,0.0,0
1,2021-01-01 00:13:44,0.1,0.002,250.46,0.0,70
2,2021-01-01 00:23:44,0.1,0.002,250.48,0.02,70
3,2021-01-01 00:33:45,0.1,0.002,250.49,0.01,70
4,2021-01-01 00:43:44,0.1,0.002,250.51,0.02,70


In [43]:
ws100_df.describe()

Unnamed: 0,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
count,249714.0,249714.0,249714.0,249714.0,249714.0
mean,0.058076,0.000974,161.056563,0.009374,8.100095
std,0.844741,0.014079,149.285852,0.104094,21.696574
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,46.69,0.0,0.0
50%,0.0,0.0,121.03,0.0,0.0
75%,0.0,0.0,231.51,0.0,0.0
max,184.2,3.07,676.74,13.68,70.0


In [44]:
ws100_df['precipitationType'].unique()

array([ 0, 70, 69, 67, 60])

<h1><center>Rainy day humidity behavior<h1>

**Helper Function**

In [45]:
# Mapping WS100 precipitation
def ws100_bucket(code):
    if code in (60,):
        return "Rain"
    if code in (67, 69):
        return "Mix"
    if code in (70,):
        return "Snow"
    return "Dry"

In [46]:
# Calculating temperature and humidity.
def dewpoint_C(temperature, humidity):
    # b for liquid water, Comes from the Sonntag fit to lab data for water vapor over liquid water
    # c for liquid water
    # For temperatures above freezing:
    # b = 17.625, c = 243.04
    
    # For temperatures below freezing:
    # b = 22.46, c = 272.62
    b = np.where(temperature >= 0, 17.625, 22.46)
    c = np.where(temperature >= 0, 243.04, 272.62)
    rh_frac = np.clip(humidity, 1e-6, 100) / 100.0
    gamma = np.log(rh_frac) + (b * temperature) / (c + temperature)
    return (c * gamma) / (b - gamma)

In [47]:

def dominant_bucket(series):
    order = {'Snow':3,'Mix':2,'Rain':1,'Dry':0}
    if series.empty:
        return 'Dry'
    return sorted(series, key=lambda k: order.get(k, -1), reverse=True)[0]

In [48]:
# Align multiple rain events in time (relative to start), then compute median RH and dew point spread at each hour offset.

def event_median(df_h, mask, pre_h=6, post_h=12):
    starts = df_h.index[(~mask.shift(fill_value=False)) & mask]
    wins = []
    for t0 in starts:
        idx = pd.date_range(t0 - pd.Timedelta(hours=pre_h),t0 + pd.Timedelta(hours=post_h),freq='1h')
        sub = df_h.reindex(idx).interpolate(limit_direction='both')
        sub = sub.assign(t_hours=(sub.index - t0).total_seconds()/3600)
        wins.append(sub[['t_hours','rh_pct','dp_spread_C']])
    if not wins:
        return None
    stack = pd.concat(wins, ignore_index=True)
    return (stack.groupby('t_hours').agg(RH=('rh_pct','median'),Spread=('dp_spread_C','median')).reset_index())

In [49]:
# Renaming columns, Converting types
# LHT sensor
lht = df.rename(columns={"Timestamp":"timestamp","TempC_SHT":"temp_C","Hum_SHT":"rh_pct"}).copy()
# WS100 sensor
ws = ws100_df.rename(columns={'Timestamp':'timestamp','precipitationType':'ptype','precipitationQuantityDiff_mm':'rain_mm'}).copy()

ws["timestamp"]  = pd.to_datetime(ws["timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")

lht["temp_C"]  = pd.to_numeric(lht["temp_C"], errors="coerce")
lht["rh_pct"]  = pd.to_numeric(lht["rh_pct"], errors="coerce")

In [50]:
# CLEAN DATA: Removing corrupted sensor values.
# In LHT data, I found some corrupted values like max value:327 which is not possible in reality.
bad_t  = (lht["temp_C"]<-50)|(lht["temp_C"]>60)|(lht["temp_C"].abs()>=300)  
bad_rh = (lht["rh_pct"]<0)|(lht["rh_pct"]>100)|(lht["rh_pct"].abs()>=300) 
lht.loc[bad_t,  "temp_C"] = np.nan
lht.loc[bad_rh, "rh_pct"] = np.nan


In [51]:
# Mapping types and hourly resamples

ws['ptype_bucket'] = ws['ptype'].apply(lambda c: ws100_bucket(int(c) if pd.notna(c) else 0))
ws['rain_mm'] = pd.to_numeric(ws['rain_mm'], errors='coerce').fillna(0.0)
# if amount>0 but type says dry, force Rain
ws.loc[(ws['ptype_bucket']=='Dry') & (ws['rain_mm']>0), 'ptype_bucket'] = 'Rain'

# LHT hourly means
lht_H = (lht.set_index('timestamp').resample('1h').agg(temp_C=('temp_C','mean'),rh_pct=('rh_pct','mean')).reset_index())

# Dew point + spread
lht_H['dewpoint_C']  = dewpoint_C(lht_H['temp_C'].values, lht_H['rh_pct'].values)
lht_H['dp_spread_C'] = lht_H['temp_C'] - lht_H['dewpoint_C']

# WS100 hourly rain sum + dominant type
rain_H  = (ws.set_index('timestamp')['rain_mm'].resample('1h').sum().to_frame('rain_mm_hour'))
ptype_H = (ws.set_index('timestamp')['ptype_bucket'].resample('1h').apply(dominant_bucket).to_frame('ptype_hour'))

# Join city-level hourly table
hourly = (lht_H.set_index('timestamp').join(rain_H,  how='left').join(ptype_H, how='left').reset_index())
hourly['rain_mm_hour'] = hourly['rain_mm_hour'].fillna(0.0)
hourly['ptype_hour']   = hourly['ptype_hour'].fillna('Dry')

# Add Y/M/D parts for easy filtering
hourly['year']  = hourly['timestamp'].dt.year
hourly['month'] = hourly['timestamp'].dt.month
hourly['day']   = hourly['timestamp'].dt.day

**Visualization functions**

In [52]:
def visualize(year=None, month=None, day=None):
    df = hourly.copy()
    if year is not None:  df = df[df['year']==year]
    if month is not None: df = df[df['month']==month]
    if day is not None:   df = df[df['day']==day]
    if df.empty:
        print("No data for this selection."); return

    # Helper cols
    df = df.sort_values('timestamp')
    df['date'] = df['timestamp'].dt.date
    df['hour'] = df['timestamp'].dt.hour

    # HOURLY RH HEATMAP — Rainy days only
    rainy_dates = (df.groupby('date')['ptype_hour']
                     .apply(lambda s: 'Rain' in set(s))).pipe(lambda s: s[s].index)
    if len(rainy_dates):
        pivot = df[df['date'].isin(rainy_dates)].pivot_table(index='date', columns='hour', values='rh_pct')
        fig_hm = px.imshow(pivot, origin='lower', aspect='auto',
                           labels=dict(x='Hour', y='Date', color='RH %'),
                           title='Hourly RH on Rainy Days — LHT')
        fig_hm.show()
    else:
        print("No rainy days in this selection — skipping heatmap.")

    # 18-h event-aligned median around rain onset
    h = df.set_index('timestamp')
    is_rain = h['ptype_hour'].eq('Rain') | (h['rain_mm_hour'] > 0)
    med = event_median(h, is_rain, pre_h=6, post_h=12)
    if med is not None:
        fig_ev = make_subplots(specs=[[{"secondary_y": True}]])
        fig_ev.add_trace(go.Scatter(x=med['t_hours'], y=med['RH'], mode='lines', name='RH %'),
                         secondary_y=False)
        fig_ev.add_trace(go.Scatter(x=med['t_hours'], y=med['Spread'], mode='lines', name='T−Td (°C)'),
                         secondary_y=True)
        fig_ev.add_vline(x=0, line_dash='dot')
        fig_ev.update_layout(title='Event-aligned median (t=0 = Rain start)')
        fig_ev.update_xaxes(title='Hours from rain start (− = before, + = after)')
        fig_ev.update_yaxes(title='RH %', secondary_y=False)
        fig_ev.update_yaxes(title='T−Td (°C)', secondary_y=True)
        fig_ev.show()
    else:
        print("No rain onsets found — skipping event-aligned plot.")

    # ---------------- (3) RH vs Rain cross-correlation (positive = RH leads)
    x = h[['rh_pct','rain_mm_hour']].dropna()
    if len(x) > 24:
        rows = []
        for k in range(-12, 13):  # lags in hours
            rows.append({'lag_h': k, 'corr': x['rh_pct'].corr(x['rain_mm_hour'].shift(-k))})
        ccf = pd.DataFrame(rows)
        fig_ccf = px.bar(ccf, x='lag_h', y='corr',
                         title='RH vs Rain cross-correlation (positive = RH leads)')
        fig_ccf.update_layout(xaxis_title='lag (hours)', yaxis_title='corr')
        fig_ccf.add_hline(y=0, line_dash='dot')
        fig_ccf.show()
    else:
        print("Too few points for cross-correlation (need > 24 hourly points).")

    # ---------------- (4) Drying time after rain (hours)
    df2 = h.copy()
    df2['is_rain'] = is_rain
    # label contiguous rainy blocks
    rain_block_id = ((df2['is_rain'] != df2['is_rain'].shift(fill_value=False)) & df2['is_rain']).cumsum()
    df2['event_id'] = np.where(df2['is_rain'], rain_block_id, np.nan)

    dryings = []
    for eid, g in df2.groupby('event_id', dropna=True):
        last_rain_t = g.index.max()
        look = df2.loc[last_rain_t + pd.Timedelta(hours=1): last_rain_t + pd.Timedelta(hours=24)]
        if look.empty: 
            continue
        # drying threshold: either RH < 80% OR T−Td > 2 °C
        cond = (look['rh_pct'] < 80) | (look['dp_spread_C'] > 2.0)
        if cond.any():
            dt = (cond.idxmax() - last_rain_t).total_seconds()/3600.0
            dryings.append(dt)

    if dryings:
        fig_dry = px.box(pd.DataFrame({'drying_h': dryings}), y='drying_h', points='all',
                         title='Drying time after rain (hours) — city-level (selection)')
        fig_dry.update_yaxes(title='hours')
        fig_dry.show()
    else:
        print("No complete rain events with a drying threshold in this selection.")

In [None]:
def visualize(year=None, month=None, day=None):
    df = hourly.copy()
    if year is not None:  df = df[df['year']==year]
    if month is not None: df = df[df['month']==month]
    if day is not None:   df = df[df['day']==day]
    if df.empty:
        print("No data for this selection."); return

    # Helper cols
    df = df.sort_values('timestamp')
    df['date'] = df['timestamp'].dt.date
    df['hour'] = df['timestamp'].dt.hour

    # HOURLY RH HEATMAP — Rainy days only
    rainy_dates = (df.groupby('date')['ptype_hour']
                     .apply(lambda s: 'Rain' in set(s))).pipe(lambda s: s[s].index)
    if len(rainy_dates):
        pivot = df[df['date'].isin(rainy_dates)].pivot_table(index='date', columns='hour', values='rh_pct')
        fig_hm = px.imshow(pivot, origin='lower', aspect='auto',
                           labels=dict(x='Hour', y='Date', color='RH %'),
                           title='Hourly RH on Rainy Days — LHT')
        fig_hm.show()
    else:
        print("No rainy days in this selection — skipping heatmap.")

    # 18-h event-aligned median around rain onset
    h = df.set_index('timestamp')
    is_rain = h['ptype_hour'].eq('Rain') | (h['rain_mm_hour'] > 0)
    med = event_median(h, is_rain, pre_h=6, post_h=12)
    if med is not None:
        fig_ev = make_subplots(specs=[[{"secondary_y": True}]])
        fig_ev.add_trace(go.Scatter(x=med['t_hours'], y=med['RH'], mode='lines', name='RH %'),
                         secondary_y=False)
        fig_ev.add_trace(go.Scatter(x=med['t_hours'], y=med['Spread'], mode='lines', name='T−Td (°C)'),
                         secondary_y=True)
        fig_ev.add_vline(x=0, line_dash='dot')
        fig_ev.update_layout(title='Event-aligned median (t=0 = Rain start)')
        fig_ev.update_xaxes(title='Hours from rain start (− = before, + = after)')
        fig_ev.update_yaxes(title='RH %', secondary_y=False)
        fig_ev.update_yaxes(title='T−Td (°C)', secondary_y=True)
        fig_ev.show()
    else:
        print("No rain onsets found — skipping event-aligned plot.")

    # ---------------- (3) RH vs Rain cross-correlation (positive = RH leads)
    x = h[['rh_pct','rain_mm_hour']].dropna()
    if len(x) > 24:
        rows = []
        for k in range(-12, 13):  # lags in hours
            rows.append({'lag_h': k, 'corr': x['rh_pct'].corr(x['rain_mm_hour'].shift(-k))})
        ccf = pd.DataFrame(rows)
        fig_ccf = px.bar(ccf, x='lag_h', y='corr',
                         title='RH vs Rain cross-correlation (positive = RH leads)')
        fig_ccf.update_layout(xaxis_title='lag (hours)', yaxis_title='corr')
        fig_ccf.add_hline(y=0, line_dash='dot')
        fig_ccf.show()
    else:
        print("Too few points for cross-correlation (need > 24 hourly points).")

    # ---------------- (4) Drying time after rain (hours)
    df2 = h.copy()
    df2['is_rain'] = is_rain
    # label contiguous rainy blocks
    rain_block_id = ((df2['is_rain'] != df2['is_rain'].shift(fill_value=False)) & df2['is_rain']).cumsum()
    df2['event_id'] = np.where(df2['is_rain'], rain_block_id, np.nan)

    dryings = []
    for eid, g in df2.groupby('event_id', dropna=True):
        last_rain_t = g.index.max()
        look = df2.loc[last_rain_t + pd.Timedelta(hours=1): last_rain_t + pd.Timedelta(hours=24)]
        if look.empty: 
            continue
        # drying threshold: either RH < 80% OR T−Td > 2 °C
        cond = (look['rh_pct'] < 80) | (look['dp_spread_C'] > 2.0)
        if cond.any():
            dt = (cond.idxmax() - last_rain_t).total_seconds()/3600.0
            dryings.append(dt)

    if dryings:
        fig_dry = px.box(pd.DataFrame({'drying_h': dryings}), y='drying_h', points='all',
                         title='Drying time after rain (hours) — city-level (selection)')
        fig_dry.update_yaxes(title='hours')
        fig_dry.show()

HBox(children=(Dropdown(description='Year', options=(2020, 2021, 2022, 2023, 2024, 2025), value=2020), Dropdow…

In [76]:
Wind_df = pd.read_csv('Jyväskylä airport_1.1.2023 -18.9.2025.csv')
Wind_df.head()

Unnamed: 0,Observation station,Year,Month,Day,Time [Local time],Wind direction mean [°],Gust speed mean [m/s],Wind speed mean [m/s]
0,Jyväskylä airport,2023,1,1,00:00,205.7,7.0,2.9
1,Jyväskylä airport,2023,1,1,00:10,204.8,7.7,3.0
2,Jyväskylä airport,2023,1,1,00:20,206.1,7.1,3.0
3,Jyväskylä airport,2023,1,1,00:30,195.6,5.4,2.5
4,Jyväskylä airport,2023,1,1,00:40,205.7,6.1,2.3


In [77]:
import re

In [84]:
tcol = next(c for c in Wind_df.columns if re.search(r"(time|date|aika|timestamp)", str(c), re.I))
Wind_df["timestamp"] = pd.to_datetime(Wind_df[tcol], errors="coerce", utc=True).dt.tz_convert(None)

# Helper to pick a column by regex from possible variants in the file
def pick_col(regex_list):
    for c in Wind_df.columns:
        for rgx in regex_list:
            if re.search(rgx, str(c), re.I):
                return c
    return None

# Typical FMI names are ws_10min, wg_10min, wd_10min; fallbacks included
c_ws = pick_col([r"\bws_?10", r"wind.*speed", r"mean.*10.?min"])
c_wg = pick_col([r"\bwg_?10", r"gust"])
c_wd = pick_col([r"\bwd_?10", r"dir(ection)?"])

wind = Wind_df[["timestamp", c_ws, c_wg, c_wd]].copy()
wind.columns = ["timestamp", "ws_10min", "wg_10min", "wd_10min"]

# Ensure numeric
for col in ["ws_10min", "wg_10min", "wd_10min"]:
    wind[col] = pd.to_numeric(wind[col], errors="coerce")

wind_10min = wind.dropna(subset=["timestamp"]).set_index("timestamp").sort_index()

print("\nClean 10-min wind:")
print(wind_10min.head())

# --- 3) Make hourly wind for merging with your `hourly` frame ---
def circ_mean_deg(series):
    """Circular mean for wind direction (degrees)."""
    x = np.deg2rad(series.dropna().values)
    if x.size == 0: 
        return np.nan
    return (np.rad2deg(np.arctan2(np.sin(x).mean(), np.cos(x).mean())) + 360) % 360

wind_hourly = (wind_10min
               .resample("1H")
               .agg(ws_ms=("ws_10min", "mean"),   # mean wind (m/s)
                    wg_ms=("wg_10min", "max"),    # max gust (m/s)
                    wd_deg=("wd_10min", circ_mean_deg))
               .reset_index())

print("\nHourly wind (for merge):")
wind_hourly.head()


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.




Clean 10-min wind:
            ws_10min  wg_10min  wd_10min
timestamp                               
2025-10-24       2.9       7.0     205.7
2025-10-24       NaN       NaN       NaN
2025-10-24       NaN       NaN       NaN
2025-10-24       NaN       NaN       NaN
2025-10-24       NaN       NaN       NaN

Hourly wind (for merge):



'H' is deprecated and will be removed in a future version, please use 'h' instead.



Unnamed: 0,timestamp,ws_ms,wg_ms,wd_deg
0,2025-10-24 00:00:00,2.349467,14.1,259.557834
1,2025-10-24 01:00:00,2.391067,13.1,269.995127
2,2025-10-24 02:00:00,2.3916,14.7,258.263312
3,2025-10-24 03:00:00,2.528226,15.8,267.45664
4,2025-10-24 04:00:00,2.521095,13.4,259.499372
