In [1]:
import pandas as pd
import numpy as np
import os

from itertools import combinations
import math
import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

from dash import Dash, dcc, html, Input, Output, dash_table
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import seaborn as sns
import plotly.express as px, plotly.graph_objects as go
import ipywidgets as W
from IPython.display import display


from scipy import stats
import requests, xmltodict
from datetime import datetime
from io import StringIO
from matplotlib.patches import Patch

from IPython.display import clear_output
import re


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub.utils import disable_progress_bars
from transformers.utils import logging as hf_logging
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto")



`torch_dtype` is deprecated! Use `dtype` instead!


<h1><center>Functions<h1>

**Bucket funcction**

In [None]:
# Add readable bucket
def bucket(code):
        if pd.isna(code): return "NoData"
        c = int(code)
        if c == 0:return "Dry"
        if c == 60: return "Rain"
        if 60 < c < 70: return "Mix"
        if c == 70: return "Snow"
        return "Other"

**Sampling LHT sensors hourly**

In [5]:
def make_lht_hourly(df_raw):
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp").set_index("Timestamp")
    df = df[~df.index.duplicated(keep="first")]

    hourly = df.resample("1h").mean() 
    hourly = hourly.reset_index()
    return hourly

**Resampling WS100 sensors hourly**

In [6]:
def make_ws_hourly(df_raw):
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp").set_index("Timestamp")
    df = df[~df.index.duplicated(keep="first")]

    hourly = df.resample("1h").agg(
        precipitation_mm=("precipitationQuantityDiff_mm", "sum"),
        intensity_mm_h=("precipitationIntensity_mm_h", "mean"),
        samples=("precipitationType", "count"),
        precip_type=("precipitationType",
                     lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan),)

 
    gap_mask = hourly["samples"] == 0
    hourly.loc[gap_mask, ["precipitation_mm", "intensity_mm_h", "precip_type"]] = np.nan
    hourly = hourly.drop(columns="samples").reset_index()
    hourly["precip_bucket"] = hourly["precip_type"].apply(bucket)
    hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")
    return hourly

**Resampling wind DS hourly**

In [7]:
def make_wind_hourly(df_raw):
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp").set_index("Timestamp")
    df = df[~df.index.duplicated(keep="first")]

  
    hourly = df.resample("1h").mean()
    # Remaming columns for clarity
    hourly = hourly.rename(columns={
        "wind_speed_10m (km/h)": "wind_speed_kmh",
        "wind_gusts_10m (km/h)": "wind_gust_kmh",
        "wind_direction_10m (°)": "wind_dir_deg",
        "surface_pressure (hPa)": "surface_pressure_hpa"}).reset_index()

    hourly["timestamp"] = hourly["Timestamp"]
    cols = ["timestamp", "wind_speed_kmh", "wind_gust_kmh", "wind_dir_deg", "surface_pressure_hpa"]
    return hourly[cols]


**Calculating dew point**

In [8]:
# Calculating dew point, temprature where cmondensation forms 
def dewpoint_C(temperature, humidity):
    # b for liquid water, Comes from the Sonntag fit to lab data for water vapor over liquid water
    # c for liquid water
    # For temperatures above freezing:
    # b = 17.625, c = 243.04
    
    # For temperatures below freezing:
    # b = 22.46, c = 272.62
    b = np.where(temperature >= 0, 17.625, 22.46)
    c = np.where(temperature >= 0, 243.04, 272.62)
    rh_frac = np.clip(humidity, 1e-6, 100) / 100.0
    gamma = np.log(rh_frac) + (b * temperature) / (c + temperature)
    return (c * gamma) / (b - gamma)

In [9]:
def svp_kpa_piecewise(Tc):
    # Saturation vapor pressure over water 
    es_water = 0.6108 * np.exp(17.27 * Tc / (Tc + 237.3))
    # Saturation vapor pressure over ice 
    es_ice   = 0.6108 * np.exp(21.875 * Tc / (Tc + 265.5))
    # Use water above , ice below
    return np.where(Tc >= 0, es_water, es_ice)

In [10]:
# Vapor Pressure Deficit VPD:
# how much more moisture the air could absorb before it is  completely full.
def vpd_kpa(Tc, RH):
    es = svp_kpa_piecewise(Tc)                          
    rh_frac = np.clip(RH, 0, 100) / 100.0               
    ea = es * rh_frac                                   
    return es - ea  

**Building pair hourly**

In [11]:
def build_pair_hourly(lht_hourly, ws_hourly):
    # LHT 
    lht = lht_hourly.copy()
    lht["timestamp"] = pd.to_datetime(lht["Timestamp"])
    lht = lht.sort_values("timestamp")
    lht = lht[~lht["timestamp"].duplicated(keep="first")]

    lht["temp_C"] = lht["Temperature_C"]
    lht["rh_pct"] = lht["Humidity"]
    lht["dewpoint_C"] = dewpoint_C(lht["temp_C"], lht["rh_pct"])
    lht["dp_spread_C"] = lht["temp_C"] - lht["dewpoint_C"]
    lht = lht[["timestamp", "temp_C", "rh_pct", "dewpoint_C", "dp_spread_C"]]

    # WS100
    ws = ws_hourly.copy()
    ws["timestamp"] = pd.to_datetime(ws["Timestamp"])
    ws = ws.sort_values("timestamp")
    ws = ws[~ws["timestamp"].duplicated(keep="first")]

    ws["rain_mm_hour"] = ws["precipitation_mm"]
    ws["ptype_code"] = ws["precip_type"]
    ws["ptype_hour"] = ws["ptype_code"].apply(bucket)

    mask_mis = (ws["rain_mm_hour"] > 0) & (~ws["ptype_hour"].isin(["Rain", "Mix", "Snow"]))
    ws.loc[mask_mis, "ptype_hour"] = "Rain"

    ws = ws[["timestamp", "rain_mm_hour", "ptype_hour"]]

    # Merge only hours where BOTH sensors have data
    hourly = pd.merge(lht, ws, on="timestamp", how="inner")

    hourly = hourly.dropna(subset=["rh_pct", "ptype_hour"])

    hourly["year"] = hourly["timestamp"].dt.year
    hourly["month"] = hourly["timestamp"].dt.month
    hourly["day"] = hourly["timestamp"].dt.day
    hourly["date"] = hourly["timestamp"].dt.date
    hourly["hour"] = hourly["timestamp"].dt.hour

    return hourly


**Funcion for merging wind datasets into hourly**

In [12]:
def add_wind_to_hourly(hourly_pair, wind_hourly, on = "timestamp" ):
    df = hourly_pair.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    w = wind_hourly.copy()
    w["timestamp"] = pd.to_datetime(w["timestamp"])
    df = df.sort_values("timestamp")
    w = w.sort_values("timestamp")
    merged = pd.merge_asof(df, w, on=on)
    return merged

In [None]:
def environment_conditions(hourly_pair):
    df = hourly_pair.copy()
    rain = df["rain_mm_hour"].fillna(0.0)
    rh = df["rh_pct"].astype(float)
    dp_spread = df["dp_spread_C"].astype(float)
    wind = df["wind_speed_kmh"].astype(float)
    vpd = df["vpd_kpa"].astype(float)
    
# To find the real rain condition, I am going to use both rain measurement and precipitation type.
# Because the WS100 sensor might detect heavy fogy as rain 
    df["is_raining"] = (rain > 0.02) & df["ptype_hour"].isin(["Rain", "Mix", "Snow"])
# High humidity and small dew point spread, when things get wet without rain
    df["Leaf wetness"] = (rh >= 90.0) & (dp_spread <= 2.0)

# rain OR strong saturation conditions, 
    df["wet_or_rain"] = df["is_raining"] | df["Leaf wetness"]

# Rules for dry enough:
    # No rain
    # Low humidity
    # air not close to saturation
    # air has decent drying power (VPD)
    # enough wind
    no_rain_strict = rain == 0.0
    low_rh_strict = rh <= 80.0
    good_spread_str = dp_spread >= 3.0
    strong_vpd_str = vpd >= 0.7
    enough_wind_str = wind >= 5.0
    df["dry_enough_strict"] = (no_rain_strict & low_rh_strict & good_spread_str & strong_vpd_str & enough_wind_str)



    no_rain_mod = rain == 0.0
    low_rh_mod = rh <= 85.0
    good_spread_mod = dp_spread >= 1.5
    strong_vpd_mod = vpd >= 0.4
    enough_wind_mod = wind >= 2.0
    df["dry_enough_moderate"] = (no_rain_mod & low_rh_mod & good_spread_mod & strong_vpd_mod & enough_wind_mod)



    no_rain_city = rain == 0.0
    low_rh_city = rh <= 88.0
    good_spread_city = dp_spread >= 1.0
    vpd_ok_city = vpd >= 0.3
    enough_wind_city= wind >= 1.0
    df["dry_enough_city"] = (no_rain_city & low_rh_city & good_spread_city & vpd_ok_city& enough_wind_city)
    

    return df


**Creating function to find rain, mix and snow**

In [14]:
def detect_events(hourly_pair_conditions, min_mm=0.2):
 
    hour = hourly_pair_conditions.sort_values("timestamp").set_index("timestamp").copy()

    is_precip_type = hour["ptype_hour"].isin(["Rain", "Mix", "Snow"])
    is_amount = hour["rain_mm_hour"] >= min_mm
    is_event = is_precip_type & is_amount

    # Identify start of a new event (False -> True transition)
    start_new = is_event & (~is_event.shift(fill_value=False))

    # Increment event_id at each new start
    event_id = start_new.cumsum()
    event_id[~is_event] = 0  # non-event hours as 0
    hour["event_id"] = event_id.replace(0, np.nan)

    # Build summary table
    events = []
    for eid, g in hour.groupby("event_id", dropna=True):
        start = g.index.min()
        end   = g.index.max()
        duration_h = (end - start).total_seconds()/3600.0 + 1.0  # inclusive

        mm_total = g["rain_mm_hour"].sum()

        # Main type by mm contribution (better than mode)
        type_mm = g.groupby("ptype_hour")["rain_mm_hour"].sum()
        if not type_mm.empty:
            ptype_main = type_mm.sort_values(ascending=False).index[0]
        else:
            ptype_main = g["ptype_hour"].mode().iloc[0]

        events.append({
            "event_id": int(eid),
            "start": start,
            "end": end,
            "duration_h": duration_h,
            "mm_total": mm_total,
            "ptype_main": ptype_main})

    events_df = pd.DataFrame(events).sort_values("start").reset_index(drop=True)
    hourly_with_events = hour.reset_index()

    return hourly_with_events, events_df


**Creating function to calculate how many consecutive dry hours have occurred up to each moment**

In [None]:
def compute_dry_streak(dry_flag):
    dry_int = dry_flag.astype(int)
    groups = (dry_int != dry_int.shift()).cumsum()
    streak = dry_int.groupby(groups).cumsum()
    streak = streak.where(dry_flag, 0)
    return streak


**Creating function to calculates, after each rain storm ends, how many hours does it take until conditions become dry enough?**

In [None]:
def compute_drying_times(hourly_events, events_df, dry_flag_col="dry_enough_city",min_dry_hours=2, max_lookahead_hours=200):
    df = hourly_events.sort_values("timestamp").set_index("timestamp")

    drying_rows = []

    for _, ev in events_df.iterrows():
        eid   = ev["event_id"]
        t_end = ev["end"]

        # Period after the event
        t_start_dry = t_end + pd.Timedelta(hours=1)
        t_end_dry   = t_end + pd.Timedelta(hours=max_lookahead_hours)

        post = df.loc[t_start_dry:t_end_dry].copy()
        if post.empty:
            continue
        dry_flag = post[dry_flag_col].astype(bool)

        rolling_mean = (dry_flag .astype(float) .rolling(window=min_dry_hours, min_periods=min_dry_hours).mean())
        full_dry_idx = rolling_mean[rolling_mean == 1.0].index.min()
        if pd.isna(full_dry_idx):

            continue

        drying_hours = (full_dry_idx - t_end).total_seconds() / 3600.0

        drying_rows.append({
            "event_id": eid,
            "start": ev["start"],
            "end": ev["end"],
            "duration_h": ev["duration_h"],
            "mm_total": ev["mm_total"],
            "ptype_main": ev["ptype_main"],
            "dry_flag": dry_flag_col,
            "drying_hours": drying_hours})

    return pd.DataFrame(drying_rows)
    


**Creating function for what happens to humidity and temperature before and after rain starts**

In [17]:
def event_composite_environment(hourly_events, events_df, pre_h=6, post_h=120, max_interp_gap=2):
    
    df = hourly_events.sort_values("timestamp").set_index("timestamp").copy()

    composite_windows = []

    for _, ev in events_df.iterrows():
        t0 = ev["start"]

        # Time window around the event onset
        idx = pd.date_range(t0 - pd.Timedelta(hours=pre_h),t0 + pd.Timedelta(hours=post_h),freq="1h")

        sub = df.reindex(idx)

        # Interpolate continuous variables only
        cont_cols = ["rh_pct", "dp_spread_C", "vpd_kpa", "wind_speed_kmh"]
        sub[cont_cols] = (
            sub[cont_cols]
            .interpolate(limit=max_interp_gap, limit_direction="both"))

        # Drop rows where we still miss basics
        sub = sub.dropna(subset=cont_cols)
        if sub.empty:
            continue

        # Relative time in hours from event start
        sub = sub.assign(
            t_hours=(sub.index - t0).total_seconds() / 3600.0)

        # We keep continuous and boolean flags
        composite_windows.append(sub[[
            "t_hours",
            "rh_pct",
            "dp_spread_C",
            "vpd_kpa",
            "wind_speed_kmh",
            "wet_or_rain",
            "dry_enough_strict",
            "dry_enough_moderate",
            "dry_enough_city"]])

    if not composite_windows:
        return None

    stack = pd.concat(composite_windows, ignore_index=True)

    # Group by t_hours and compute averages
    comp = (
        stack
        .groupby("t_hours")
        .agg(
            RH=("rh_pct", "mean"),
            Spread=("dp_spread_C", "mean"),
            VPD=("vpd_kpa", "mean"),
            Wind=("wind_speed_kmh", "mean"),
            frac_wet=("wet_or_rain", "mean"),
            frac_dry_strict=("dry_enough_strict", "mean"),
            frac_dry_moderate=("dry_enough_moderate", "mean"),
            frac_dry_city=("dry_enough_city", "mean"),
            n_samples=("rh_pct", "count")).reset_index().sort_values("t_hours"))

    return comp


In [18]:

def visualize_one_selection(df_sel, events_sel, drying_sel, comp_sel, title_prefix=""):
 

    events_view = events_sel.merge(drying_sel[["event_id", "drying_hours"]], on="event_id", how="left")

    print(f"=== {title_prefix} Events summary (type, duration, drying) ===")
    display(events_view[["event_id", "start", "end","duration_h", "mm_total", "ptype_main", "drying_hours" ]].head(20))

    # RH heatmap for event days only 
    is_precip_hour = ((df_sel["rain_mm_hour"] > 0) & df_sel["ptype_hour"].isin(["Rain", "Mix", "Snow"]))
    event_dates = sorted(df_sel.loc[is_precip_hour, "date"].unique())

    df_event_days = df_sel[df_sel["date"].isin(event_dates)].copy()
    if df_event_days.empty:
        print("No event days in this selection.")
    else:
        pivot = df_event_days.pivot_table( index="date", columns="hour", values="rh_pct")

        fig_hm = px.imshow( pivot, origin="lower", aspect="auto", labels=dict(x="Hour of day", y="Date", color="RH %"), title=f"{title_prefix} RH on days with Rain/Mix/Snow events")
        fig_hm.show()

    if comp_sel is None or comp_sel.empty:
        print("No composite data for this selection.")
        return

    # RH, Spread, VPD, Wind
    fig_env = make_subplots(specs=[[{"secondary_y": True}]])

    fig_env.add_trace(go.Scatter(
            x=comp_sel["t_hours"], y=comp_sel["RH"],
            mode="lines", name="RH %"),secondary_y=False)
    
    fig_env.add_trace(
        go.Scatter(
            x=comp_sel["t_hours"], y=comp_sel["Spread"],
            mode="lines", name="T−Td (°C)"),secondary_y=True)
    
    fig_env.add_trace(
        go.Scatter(
            x=comp_sel["t_hours"], y=comp_sel["VPD"],
            mode="lines", name="VPD (kPa)"), secondary_y=True)
    
    
    fig_env.add_trace(
        go.Scatter(
            x=comp_sel["t_hours"], y=comp_sel["Wind"],
            mode="lines", name="Wind (km/h)"),secondary_y=True)

    fig_env.add_vline(x=0, line_dash="dot", annotation_text="Event start")

    fig_env.update_layout(
    title=f"{title_prefix} Mean environment 6 h before to 12 h after event onset",
    xaxis_title="Hours from event start (t=0 = first precip hour)")
    
    fig_env.update_yaxes(title_text="RH %", secondary_y=False)
    fig_env.update_yaxes(title_text="T−Td (°C), VPD (kPa), Wind (km/h)", secondary_y=True)
    fig_env.show()

 
    # Dryness states as fractions
    fig_state = go.Figure()
    fig_state.add_trace(go.Scatter(
        x=comp_sel["t_hours"], y=comp_sel["frac_wet"],
        mode="lines", name="Wet or rain"))
    
    fig_state.add_trace(go.Scatter(
        x=comp_sel["t_hours"], y=comp_sel["frac_dry_city"],
        mode="lines", name="Dry enough (city)"))
    
    #fig_state.add_trace(go.Scatter(
    #    x=comp_sel["t_hours"], y=comp_sel["frac_dry_moderate"],
    #    mode="lines", name="Dry enough (moderate)"))
    
    #fig_state.add_trace(go.Scatter(
    #    x=comp_sel["t_hours"], y=comp_sel["frac_dry_strict"],
    #    mode="lines", name="Dry enough (strict)"))
    
    
    median_dry = drying_sel["drying_hours"].median()
    fig_state.add_vline(x=median_dry, line_dash="dash", line_color = 'black', annotation_text="Median drying hours")

    fig_state.update_layout(
        title=f"{title_prefix} Fraction of events that are wet/dry vs hours from onset",
        xaxis_title="Hours from event start",
        yaxis_title="Fraction of events")
    
    fig_state.show()


In [None]:
def build_weather_explanation_prompt(site_name, events_sel, comp_sel, drying_sel):


    ev = events_sel.sort_values("mm_total", ascending=False).iloc[0]
    event_start = ev["start"]
    event_end   = ev["end"]
    event_hours = float(ev["duration_h"])
    event_mm    = float(ev["mm_total"])
    event_type  = str(ev["ptype_main"])
    date_str    = event_start.strftime("%Y-%m-%d")

    median_drying = None
    if drying_sel is not None and not drying_sel.empty:
        median_drying = float(drying_sel["drying_hours"].median())

    before = comp_sel[comp_sel["t_hours"] < 0]
    during = comp_sel[(comp_sel["t_hours"] >= 0) & (comp_sel["t_hours"] <= 6)]
    after  = comp_sel[comp_sel["t_hours"] > 6]

    def mean_or_none(df, col):
        if len(df) == 0 or df[col].isna().all():
            return None
        return float(df[col].mean())

    rh_before   = mean_or_none(before, "RH")
    rh_during   = mean_or_none(during, "RH")
    rh_after    = mean_or_none(after,  "RH")

    vpd_before  = mean_or_none(before, "VPD")
    vpd_during  = mean_or_none(during, "VPD")
    vpd_after   = mean_or_none(after,  "VPD")

    wind_before = mean_or_none(before, "Wind")
    wind_during = mean_or_none(during, "Wind")
    wind_after  = mean_or_none(after,  "Wind")



    drying_comment = None
    if median_drying is not None:
        if median_drying >= 72:
            drying_comment = (
                f"This is a very slow drying time, so surfaces stayed wet for several days "
                f"after the rain ended.")
        elif median_drying >= 24:
            drying_comment = (
                f"This is a moderate drying time, so surfaces stayed wet for about one "
                f"to three days after the rain ended."
)
        else:
            drying_comment = (
                f"This is a fairly fast drying time, so most surfaces dried within about "
                f"a day after the rain ended."
            )

    # VPD: drying power
    vpd_comment = None
    if (vpd_before is not None and
        vpd_during is not None and
        vpd_after is not None):
        vpd_min = min(vpd_before, vpd_during, vpd_after)
        vpd_max = max(vpd_before, vpd_during, vpd_after)

        if vpd_max < 0.2:
            vpd_comment = (
                f"VPD stayed between about {vpd_min:.2f} and {vpd_max:.2f} kPa, "
                f"so the air had almost no drying power and wet surfaces tended to stay wet."
            )
        elif vpd_max < 0.6:
            vpd_comment = (
                f"VPD was between about {vpd_min:.2f} and {vpd_max:.2f} kPa, "
                f"which means the air only had weak drying power."
            )
        else:
            vpd_comment = (
                f"VPD ranged from about {vpd_min:.2f} to {vpd_max:.2f} kPa, "
                f"so the air could dry wet surfaces more effectively."
            )

    # Wind: airflow
    wind_comment = None
    if (wind_before is not None and
        wind_during is not None and
        wind_after is not None):
        w_min = min(wind_before, wind_during, wind_after)
        w_max = max(wind_before, wind_during, wind_after)
        if w_max < 5:
            wind_comment = (
                f"Wind speeds were mostly light, between about {w_min:.1f} and {w_max:.1f} km/h, "
                f"so air did not move very quickly over wet surfaces."
            )
        elif w_max < 15:
            wind_comment = (
                f"Wind speeds were gentle to moderate, between about {w_min:.1f} and {w_max:.1f} km/h, "
                f"giving some airflow to help drying."
            )
        else:
            wind_comment = (
                f"Wind speeds often reached {w_max:.1f} km/h (from a minimum of about {w_min:.1f} km/h), "
                f"so there was plenty of airflow to help move humid air away from surfaces."
            )

    parts = []

    parts.append(
        f"On {date_str} at {site_name}, a {event_type.lower()} event lasted about "
        f"{event_hours:.1f} hours and brought around {event_mm:.1f} mm of rain.")

    if rh_before is not None and rh_during is not None and rh_after is not None:
        parts.append(
            f"Relative humidity was very high, around {rh_before:.0f}% before the rain, "
            f"about {rh_during:.0f}% during the event, and roughly {rh_after:.0f}% afterwards, "
            f"so the air stayed close to saturated with moisture.")

    if vpd_comment is not None:
        parts.append(vpd_comment)

    if wind_comment is not None:
        parts.append(wind_comment)

    if median_drying is not None and drying_comment is not None:
        parts.append(
            f"In this dataset, surfaces typically became dry enough about {median_drying:.0f} hours "
            f"after the rain stopped. {drying_comment}")

    base_explanation = " ".join(parts)
    return base_explanation


In [20]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0


In [21]:
def build_deepseek_prompt_from_summary(base_explanation):
    prompt = (
        "You are an expert weather data analyst. Use the provided explanation to generate a detailed, simple analysis of the event.\n"
        "- First, in <think> tags, reason step-by-step: list all key values (duration, mm, RH, VPD, wind, drying time), explain their meanings simply, then analyze how they interact to reason the event's causes and effects.\n"
        "- Then, output 2-3 paragraphs (no strict limit): 1) Summarize values clearly and shortly in simple terms. 2) Explain what each value means (e.g., high RH means saturated air slowing evaporation). 3) Analyze and reason the event in a few words (e.g., why drying was slow due to low VPD despite wind).\n"
        "- Keep physics accurate: high RH/low VPD slow drying; wind helps airflow.\n"
        "- Use all numbers exactly; base on sensor data implications for surfaces like grounds or buildings.\n"
        "- Friendly, non-technical tone; end with practical tip.\n"
        "- NO meta-comments or inventions.\n\n"
        "EXPLANATION:\n"
        f"{base_explanation}\n\n"
        "<think>\n")
    return prompt


In [22]:
def deepseek_explain(prompt: str, max_new_tokens: int = 320) -> str:
    out = generator(
        prompt,
        max_new_tokens=max_new_tokens, repetition_penalty=1.05, temperature=0.5, do_sample=False)

    full = out[0]["generated_text"]
    gen = full[len(prompt):]
    gen = gen.replace("<think>", "").replace("</think>", "")

    # Final cleanup
    return gen.strip()


In [None]:
def create_event_viewer(
    hourly_events,
    events_df,
    drying_city,
    pre_h=6,
    post_h=120,
    max_interp_gap=2,
    site_name="My Site",
):
    df_all = hourly_events.copy()
    is_precip_hour = (
        (df_all["rain_mm_hour"] > 0)
        & df_all["ptype_hour"].isin(["Rain", "Mix", "Snow"])
    )
    wet = df_all[is_precip_hour].copy()

    years = sorted(wet["year"].unique().tolist())
    dd_year  = W.Dropdown(options=years, description="Year", value=years[0])
    dd_month = W.Dropdown(options=["All"], description="Month", value="All")
    dd_day   = W.Dropdown(options=["All"], description="Day", value="All")
    btn      = W.Button(description="Update", button_style="primary")

    out = W.Output()


    def _update_months(*_):
        y = dd_year.value
        months = sorted(wet.loc[wet["year"] == y, "month"].unique().tolist())
        dd_month.options = ["All"] + months
        dd_month.value = "All"
        dd_day.options = ["All"]
        dd_day.value = "All"

    def _update_days(*_):
        y = dd_year.value
        if dd_month.value == "All":
            days = sorted(wet.loc[wet["year"] == y, "day"].unique().tolist())
        else:
            m = dd_month.value
            days = sorted(
                wet.loc[
                    (wet["year"] == y) & (wet["month"] == m),
                    "day"
                ].unique().tolist()
            )
        dd_day.options = ["All"] + days if days else ["All"]
        dd_day.value = "All"

    def _run(*_):
        # Everything inside goes into the `out` box
        with out:
            clear_output(wait=True)

            y = dd_year.value
            m = dd_month.value
            d = dd_day.value

            # Filter hourly_events by year/month/day
            df_sel = df_all[df_all["year"] == y].copy()
            if m != "All":
                df_sel = df_sel[df_sel["month"] == m]
            if d != "All":
                df_sel = df_sel[df_sel["day"] == d]

            if df_sel.empty:
                print("No data for this selection (year/month/day).")
                return

            # Keep only event days (Rain/Mix/Snow & >0 mm)
            is_precip_sel = (
                (df_sel["rain_mm_hour"] > 0)
                & df_sel["ptype_hour"].isin(["Rain", "Mix", "Snow"])
            )
            event_dates = sorted(df_sel.loc[is_precip_sel, "date"].unique())

            df_sel = df_sel[df_sel["date"].isin(event_dates)].copy()
            if df_sel.empty:
                print("No Rain/Mix/Snow events in this selection.")
                return

            min_date = df_sel["date"].min()
            max_date = df_sel["date"].max()

            events_sel = events_df[
                (events_df["start"].dt.date >= min_date)
                & (events_df["start"].dt.date <= max_date)
            ].copy()

            if events_sel.empty:
                print("No events found in events_df for this period.")
                return

            # Select drying info for these events
            drying_sel = drying_city[
                drying_city["event_id"].isin(events_sel["event_id"])
            ].copy()

            # Build composite for this selection
            comp_sel = event_composite_environment(
                df_sel,
                events_sel,
                pre_h=pre_h,
                post_h=post_h,
                max_interp_gap=max_interp_gap,)

         
            print(
                f"\nSelection: year={y}, month={m}, day={d}\n"
                f"  Hours in df_sel: {len(df_sel)}\n"
                f"  Event days: {len(event_dates)} "
                f"({event_dates[0]} .. {event_dates[-1]})\n"
                f"  Events in period: {len(events_sel)}\n"
                f"  Events with drying info: {len(drying_sel)}")

            title_prefix = f"{y}"
            if m != "All":
                title_prefix += f"-{m:02d}"
            if d != "All":
                title_prefix += f"-{d:02d}"

            # plots
            visualize_one_selection(
                df_sel, events_sel, drying_sel,
                comp_sel, title_prefix=title_prefix)

            # LLM explanation
            base = build_weather_explanation_prompt(
              site_name=site_name,
              events_sel=events_sel,
              comp_sel=comp_sel,
              drying_sel=drying_sel,)
            
            prompt = build_deepseek_prompt_from_summary(base)

            print("\n=== DeepSeek explanation for this period ===\n")
            explanation = deepseek_explain(prompt, max_new_tokens=320)
            print(explanation)

    dd_year.observe(_update_months, names="value")
    dd_month.observe(_update_days, names="value")
    btn.on_click(_run)

    _update_months()


    ui = W.VBox([W.HBox([dd_year, dd_month, dd_day, btn]),out])
    display(ui)


In [24]:
""""
def create_event_viewer(hourly_events, events_df, drying_city,  pre_h=6, post_h=120, max_interp_gap=2, site_name="My Site"):

    df_all = hourly_events.copy()
    is_precip_hour = (
        (df_all["rain_mm_hour"] > 0)
        & df_all["ptype_hour"].isin(["Rain", "Mix", "Snow"]))
    wet = df_all[is_precip_hour].copy()

    # Widget definitions
    years = sorted(wet["year"].unique().tolist())
    dd_year  = W.Dropdown(options=years, description="Year", value=years[0])
    dd_month = W.Dropdown(options=["All"], description="Month", value="All")
    dd_day   = W.Dropdown(options=["All"], description="Day", value="All")
    btn      = W.Button(description="Update", button_style="primary")

   
    def _update_months(*_):
        y = dd_year.value
        months = sorted(wet.loc[wet["year"] == y, "month"].unique().tolist())
        dd_month.options = ["All"] + months
        dd_month.value = "All"
        dd_day.options = ["All"]
        dd_day.value = "All"


    def _update_days(*_):
        y = dd_year.value
        if dd_month.value == "All":
            days = sorted(wet.loc[wet["year"] == y, "day"].unique().tolist())
        else:
            m = dd_month.value
            days = sorted(
                wet.loc[
                    (wet["year"] == y) & (wet["month"] == m),
                    "day"
                ].unique().tolist()
            )
        dd_day.options = ["All"] + days if days else ["All"]
        dd_day.value = "All"

    def _run(*_):
        y = dd_year.value
        m = dd_month.value
        d = dd_day.value

        # Filter hourly_events by year/month/day ---
        df_sel = df_all[df_all["year"] == y].copy()
        if m != "All":
            df_sel = df_sel[df_sel["month"] == m]
        if d != "All":
            df_sel = df_sel[df_sel["day"] == d]

        if df_sel.empty:
            print("No data for this selection (year/month/day).")
            return

        # Keep only event days (Rain/Mix/Snow & >0 mm)
        is_precip_sel = (
            (df_sel["rain_mm_hour"] > 0) & df_sel["ptype_hour"].isin(["Rain", "Mix", "Snow"]))
        event_dates = sorted(df_sel.loc[is_precip_sel, "date"].unique())

        df_sel = df_sel[df_sel["date"].isin(event_dates)].copy()
        if df_sel.empty:
            print("No Rain/Mix/Snow events in this selection.")
            return

        min_date = df_sel["date"].min()
        max_date = df_sel["date"].max()

        events_sel = events_df[
            (events_df["start"].dt.date >= min_date)
            & (events_df["start"].dt.date <= max_date)].copy()

        if events_sel.empty:
            print("No events found in events_df for this period.")
            return

        # Select drying info for these events 
        drying_sel = drying_city[
            drying_city["event_id"].isin(events_sel["event_id"])].copy()

        #Build composite for this selection 
        comp_sel = event_composite_environment(
            df_sel,
            events_sel,
            pre_h=pre_h,
            post_h=post_h,
            max_interp_gap=max_interp_gap)
        
        # Small debug prints to check logic:
        print(
            f"\nSelection: year={y}, month={m}, day={d}\n"
            f"  Hours in df_sel: {len(df_sel)}\n"
            f"  Event days: {len(event_dates)} "
            f"({event_dates[0]} .. {event_dates[-1]})\n"
            f"  Events in period: {len(events_sel)}\n"
            f"  Events with drying info: {len(drying_sel)}")

        # Visualize for this selection ---
        title_prefix = f"{y}"
        if m != "All":
            title_prefix += f"-{m:02d}"
        if d != "All":
            title_prefix += f"-{d:02d}"
            
            
        visualize_one_selection(df_sel, events_sel, drying_sel,
                                comp_sel, title_prefix=title_prefix)
        
          # ---- LLM explanation part ----
    
        if comp_sel is not None and not comp_sel.empty:
            try:
                print("\n[DEBUG] Building prompt for DeepSeek...")
                prompt = build_weather_explanation_prompt(
                    site_name=site_name,
                    events_sel=events_sel,
                    comp_sel=comp_sel,
                    drying_sel=drying_sel,
                )
                print("[DEBUG] Prompt length:", len(prompt))

                print("\n=== DeepSeek explanation for this period ===\n")
                explanation = deepseek_explain(prompt)
                print("[DEBUG] Explanation length:", len(explanation))
                print(explanation)

            except Exception as e:
                # If anything goes wrong, you see the error in the cell
                print("\n[ERROR] DeepSeek explanation failed:\n", repr(e))
        else:
            print("\nNo composite data for this selection – skipping LLM explanation.")
       

            
    # Connect callbacks
    dd_year.observe(_update_months, names="value")
    dd_month.observe(_update_days, names="value")
    btn.on_click(_run)

    # Initialize
    _update_months()
    display(W.HBox([dd_year, dd_month, dd_day, btn]))
    
    """
    

'"\ndef create_event_viewer(hourly_events, events_df, drying_city,  pre_h=6, post_h=120, max_interp_gap=2, site_name="My Site"):\n\n    df_all = hourly_events.copy()\n    is_precip_hour = (\n        (df_all["rain_mm_hour"] > 0)\n        & df_all["ptype_hour"].isin(["Rain", "Mix", "Snow"]))\n    wet = df_all[is_precip_hour].copy()\n\n    # Widget definitions\n    years = sorted(wet["year"].unique().tolist())\n    dd_year  = W.Dropdown(options=years, description="Year", value=years[0])\n    dd_month = W.Dropdown(options=["All"], description="Month", value="All")\n    dd_day   = W.Dropdown(options=["All"], description="Day", value="All")\n    btn      = W.Button(description="Update", button_style="primary")\n\n\n    def _update_months(*_):\n        y = dd_year.value\n        months = sorted(wet.loc[wet["year"] == y, "month"].unique().tolist())\n        dd_month.options = ["All"] + months\n        dd_month.value = "All"\n        dd_day.options = ["All"]\n        dd_day.value = "All"\n\n\n 

<h1><center>Rainy day humidity behavior<h1>

<h3><center>Loading wind Data<h3>

In [25]:
# Loading the wind DS
wind_df = pd.read_csv("cleaned_datasets\cleaned_wind_data.csv")  

  wind_df = pd.read_csv("cleaned_datasets\cleaned_wind_data.csv")


In [26]:
wind_df

Unnamed: 0,snow_depth (m),wind_speed_10m (km/h),wind_direction_10m (°),wind_gusts_10m (km/h),surface_pressure (hPa),Timestamp
0,0.22,1.3,304,2.9,990.9,2021-01-01 00:00:00
1,0.22,2.5,225,5.0,991.2,2021-01-01 01:00:00
2,0.22,4.8,222,9.7,991.2,2021-01-01 02:00:00
3,0.22,6.1,230,11.9,991.2,2021-01-01 03:00:00
4,0.22,7.9,227,16.2,991.5,2021-01-01 04:00:00
...,...,...,...,...,...,...
41323,0.00,7.9,221,16.9,987.3,2025-09-18 19:00:00
41324,0.00,6.9,221,15.1,987.0,2025-09-18 20:00:00
41325,0.00,6.6,223,12.6,986.7,2025-09-18 21:00:00
41326,0.00,7.3,230,13.3,986.4,2025-09-18 22:00:00


In [27]:
wind_df["Timestamp"] = pd.to_datetime(wind_df["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", wind_df["Timestamp"].isna().sum())
# Sort the ds by timestamp
wind_df = wind_df.sort_values(by="Timestamp").reset_index(drop=True)
wind_df.head()

NaT after parse: 0


Unnamed: 0,snow_depth (m),wind_speed_10m (km/h),wind_direction_10m (°),wind_gusts_10m (km/h),surface_pressure (hPa),Timestamp
0,0.22,1.3,304,2.9,990.9,2021-01-01 00:00:00
1,0.22,2.5,225,5.0,991.2,2021-01-01 01:00:00
2,0.22,4.8,222,9.7,991.2,2021-01-01 02:00:00
3,0.22,6.1,230,11.9,991.2,2021-01-01 03:00:00
4,0.22,7.9,227,16.2,991.5,2021-01-01 04:00:00


In [28]:
tart_time_wind_data = wind_df["Timestamp"].min()
end_time_wind_data = wind_df["Timestamp"].max()
total_days_wind_data = (end_time_wind_data - tart_time_wind_data).days

print("Wind data at the airport :")
print(f"Data starts from: {tart_time_wind_data}")
print(f"Data ends at: {end_time_wind_data}")
print(f"Total days of data: {total_days_wind_data} days\n")

Wind data at the airport :
Data starts from: 2021-01-01 00:00:00
Data ends at: 2025-09-18 23:00:00
Total days of data: 1721 days



**Resampling wind data hourly**

In [29]:
#wind_hourly = make_wind_hourly(wind_df)
wind_hourly  = make_wind_hourly(wind_df)
wind_hourly.head()

Unnamed: 0,timestamp,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa
0,2021-01-01 00:00:00,1.3,2.9,304.0,990.9
1,2021-01-01 01:00:00,2.5,5.0,225.0,991.2
2,2021-01-01 02:00:00,4.8,9.7,222.0,991.2
3,2021-01-01 03:00:00,6.1,11.9,230.0,991.2
4,2021-01-01 04:00:00,7.9,16.2,227.0,991.5


<h3><center>LHT65006/Kaunisharjuntie VS Kotaniementie<h3>


In [30]:
#  LHT65006/Kaunisharjuntie 1 KM  from Kotaniementie

**Loading LHT dataset and WS100 sensors**

In [31]:
Kaunisharjuntie = pd.read_csv("cleaned_datasets/LHT/Kaunisharjuntie.csv")
Kaunisharjuntie.head()

Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-08 00:59:57,-5.24,53.7
1,2021-01-08 01:59:57,-5.61,54.1
2,2021-01-08 02:59:56,-5.91,54.4
3,2021-01-08 03:59:56,-6.13,54.6
4,2021-01-08 04:59:56,-6.29,55.0


In [32]:
Kotaniementie = pd.read_csv("cleaned_datasets\wes100\df_Kotaniementie.csv")
Kotaniementie.head()

  Kotaniementie = pd.read_csv("cleaned_datasets\wes100\df_Kotaniementie.csv")


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:04:00,0.0,0.0,250.46,0.0,0
1,2021-01-01 00:14:00,0.1,0.002,250.46,0.0,70
2,2021-01-01 00:24:00,0.1,0.002,250.48,0.02,70
3,2021-01-01 00:34:00,0.1,0.002,250.49,0.01,70
4,2021-01-01 00:44:00,0.1,0.002,250.51,0.02,70


In [33]:
Kaunisharjuntie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39147 entries, 0 to 39146
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Timestamp      39147 non-null  object 
 1   Temperature_C  39147 non-null  float64
 2   Humidity       39147 non-null  float64
dtypes: float64(2), object(1)
memory usage: 917.6+ KB


In [34]:
Kotaniementie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245223 entries, 0 to 245222
Data columns (total 6 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   Timestamp                      245223 non-null  object 
 1   precipitationIntensity_mm_h    245223 non-null  float64
 2   precipitationIntensity_mm_min  245223 non-null  float64
 3   precipitationQuantityAbs_mm    245223 non-null  float64
 4   precipitationQuantityDiff_mm   245223 non-null  float64
 5   precipitationType              245223 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 11.2+ MB


In [35]:
Kaunisharjuntie["Timestamp"] = pd.to_datetime(Kaunisharjuntie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Kaunisharjuntie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Kaunisharjuntie = Kaunisharjuntie.sort_values(by="Timestamp").reset_index(drop=True)
Kaunisharjuntie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-08 00:59:57,-5.24,53.7
1,2021-01-08 01:59:57,-5.61,54.1
2,2021-01-08 02:59:56,-5.91,54.4
3,2021-01-08 03:59:56,-6.13,54.6
4,2021-01-08 04:59:56,-6.29,55.0


In [36]:
Kotaniementie["Timestamp"] = pd.to_datetime(Kotaniementie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Kotaniementie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Kotaniementie = Kotaniementie.sort_values(by="Timestamp").reset_index(drop=True)
Kotaniementie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:04:00,0.0,0.0,250.46,0.0,0
1,2021-01-01 00:14:00,0.1,0.002,250.46,0.0,70
2,2021-01-01 00:24:00,0.1,0.002,250.48,0.02,70
3,2021-01-01 00:34:00,0.1,0.002,250.49,0.01,70
4,2021-01-01 00:44:00,0.1,0.002,250.51,0.02,70


**Applying function to resample LHT sensor**

In [37]:
Kaunisharjuntie_h = make_lht_hourly(Kaunisharjuntie)
Kaunisharjuntie_h.head()

Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-08 00:00:00,-5.24,53.7
1,2021-01-08 01:00:00,-5.61,54.1
2,2021-01-08 02:00:00,-5.91,54.4
3,2021-01-08 03:00:00,-6.13,54.6
4,2021-01-08 04:00:00,-6.29,55.0


**Applying function to resample WS100 sensor**

In [38]:
Kotaniementie_h = make_ws_hourly(Kotaniementie)
Kotaniementie_h.head()

  hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")


Unnamed: 0,Timestamp,precipitation_mm,intensity_mm_h,precip_type,precip_bucket
0,2021-01-01 00:00:00,0.06,0.083333,70.0,Snow
1,2021-01-01 01:00:00,0.13,0.133333,70.0,Snow
2,2021-01-01 02:00:00,0.05,0.05,70.0,Snow
3,2021-01-01 03:00:00,0.03,0.0,0.0,Dry
4,2021-01-01 04:00:00,0.1,0.15,0.0,Dry


In [39]:
# Let's find out start and end time of the data and the total days 
start_time_ws = Kotaniementie_h["Timestamp"].min()
end_time_ws = Kotaniementie_h["Timestamp"].max()
total_days_ws = (end_time_ws - start_time_ws).days


start_time_LHT = Kaunisharjuntie_h["Timestamp"].min()
end_time_LHT = Kaunisharjuntie_h["Timestamp"].max()
total_days_LHT = (end_time_LHT - start_time_LHT).days

print("WS100 Sensor at Kotaniementie:")
print(f"Data starts from: {start_time_ws}")
print(f"Data ends at: {end_time_ws}")
print(f"Total days of data: {total_days_ws} days\n")

print("LHT Sensor at Kaunisharjuntie:")
print(f"Data starts from: {start_time_LHT}")
print(f"Data ends at: {end_time_LHT}")
print(f"Total days of data: {total_days_LHT} days")

WS100 Sensor at Kotaniementie:
Data starts from: 2021-01-01 00:00:00
Data ends at: 2025-09-18 10:00:00
Total days of data: 1721 days

LHT Sensor at Kaunisharjuntie:
Data starts from: 2021-01-08 00:00:00
Data ends at: 2025-09-18 12:00:00
Total days of data: 1714 days


**Applying hourly function to merge WS100 DS with LHT DS** 

In [40]:
hourly_Kaunisharjuntie_Kotaniementie = build_pair_hourly(Kaunisharjuntie_h, Kotaniementie_h)
hourly_Kaunisharjuntie_Kotaniementie.head()

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour
0,2021-01-08 00:00:00,-5.24,53.7,-12.307682,7.067682,0.08,Snow,2021,1,8,2021-01-08,0
1,2021-01-08 01:00:00,-5.61,54.1,-12.576459,6.966459,0.07,Snow,2021,1,8,2021-01-08,1
2,2021-01-08 02:00:00,-5.91,54.4,-12.800064,6.890064,0.07,Snow,2021,1,8,2021-01-08,2
3,2021-01-08 03:00:00,-6.13,54.6,-12.968449,6.838449,0.05,Snow,2021,1,8,2021-01-08,3
4,2021-01-08 04:00:00,-6.29,55.0,-13.040043,6.750043,0.03,Snow,2021,1,8,2021-01-08,4


**Merging wind data with hourly**

In [41]:
hourly_wind_Kaunisharjuntie_Kotaniementie = add_wind_to_hourly(hourly_Kaunisharjuntie_Kotaniementie, wind_hourly, on = "timestamp")
hourly_wind_Kaunisharjuntie_Kotaniementie

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa
0,2021-01-08 00:00:00,-5.24,53.7,-12.307682,7.067682e+00,0.08,Snow,2021,1,8,2021-01-08,0,7.9,16.9,66.0,1002.9
1,2021-01-08 01:00:00,-5.61,54.1,-12.576459,6.966459e+00,0.07,Snow,2021,1,8,2021-01-08,1,7.4,17.3,67.0,1003.1
2,2021-01-08 02:00:00,-5.91,54.4,-12.800064,6.890064e+00,0.07,Snow,2021,1,8,2021-01-08,2,7.2,15.8,63.0,1003.0
3,2021-01-08 03:00:00,-6.13,54.6,-12.968449,6.838449e+00,0.05,Snow,2021,1,8,2021-01-08,3,7.1,15.8,60.0,1003.0
4,2021-01-08 04:00:00,-6.29,55.0,-13.040043,6.750043e+00,0.03,Snow,2021,1,8,2021-01-08,4,6.5,15.1,56.0,1003.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39121,2025-09-18 05:00:00,11.29,100.0,11.290000,1.776357e-15,0.00,Dry,2025,9,18,2025-09-18,5,10.9,20.2,224.0,988.3
39122,2025-09-18 06:00:00,11.39,100.0,11.390000,1.776357e-15,0.00,Dry,2025,9,18,2025-09-18,6,12.2,25.6,225.0,988.7
39123,2025-09-18 07:00:00,11.96,100.0,11.960000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,7,11.3,25.9,214.0,988.4
39124,2025-09-18 08:00:00,12.44,100.0,12.440000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,8,11.4,26.3,207.0,988.6


In [42]:
print("Hours:", len(hourly_wind_Kaunisharjuntie_Kotaniementie), " Unique timestamps:", hourly_wind_Kaunisharjuntie_Kotaniementie["timestamp"].nunique())
print(hourly_wind_Kaunisharjuntie_Kotaniementie[["ptype_hour"]].value_counts().head())

Hours: 39126  Unique timestamps: 39126
ptype_hour
Dry           32004
Rain           4298
Snow           1656
Mix             948
NoData          220
Name: count, dtype: int64


**Adding VPD to  merged dataframe**

In [43]:
hourly_wind_Kaunisharjuntie_Kotaniementie['vpd_kpa'] = vpd_kpa(hourly_wind_Kaunisharjuntie_Kotaniementie['temp_C'], hourly_wind_Kaunisharjuntie_Kotaniementie['rh_pct'])
hourly_wind_Kaunisharjuntie_Kotaniementie

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa,vpd_kpa
0,2021-01-08 00:00:00,-5.24,53.7,-12.307682,7.067682e+00,0.08,Snow,2021,1,8,2021-01-08,0,7.9,16.9,66.0,1002.9,0.182056
1,2021-01-08 01:00:00,-5.61,54.1,-12.576459,6.966459e+00,0.07,Snow,2021,1,8,2021-01-08,1,7.4,17.3,67.0,1003.1,0.174840
2,2021-01-08 02:00:00,-5.91,54.4,-12.800064,6.890064e+00,0.07,Snow,2021,1,8,2021-01-08,2,7.2,15.8,63.0,1003.0,0.169268
3,2021-01-08 03:00:00,-6.13,54.6,-12.968449,6.838449e+00,0.05,Snow,2021,1,8,2021-01-08,3,7.1,15.8,60.0,1003.0,0.165358
4,2021-01-08 04:00:00,-6.29,55.0,-13.040043,6.750043e+00,0.03,Snow,2021,1,8,2021-01-08,4,6.5,15.1,56.0,1003.0,0.161651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39121,2025-09-18 05:00:00,11.29,100.0,11.290000,1.776357e-15,0.00,Dry,2025,9,18,2025-09-18,5,10.9,20.2,224.0,988.3,0.000000
39122,2025-09-18 06:00:00,11.39,100.0,11.390000,1.776357e-15,0.00,Dry,2025,9,18,2025-09-18,6,12.2,25.6,225.0,988.7,0.000000
39123,2025-09-18 07:00:00,11.96,100.0,11.960000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,7,11.3,25.9,214.0,988.4,0.000000
39124,2025-09-18 08:00:00,12.44,100.0,12.440000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,8,11.4,26.3,207.0,988.6,0.000000


In [44]:
print(hourly_wind_Kaunisharjuntie_Kotaniementie[["temp_C", "rh_pct", "vpd_kpa"]].describe())

             temp_C        rh_pct       vpd_kpa
count  39126.000000  39126.000000  39126.000000
mean       4.923084     90.169868      0.171773
std       10.537910     15.910297      0.365386
min      -30.640000     22.500000      0.000000
25%       -2.250000     85.300000      0.000000
50%        4.190000    100.000000      0.000000
75%       13.400000    100.000000      0.154654
max       33.450000    100.000000      3.063714


**Appyling environment conditions function**

In [45]:
Kaunisharjuntie_Kotaniementie_conditions = environment_conditions(hourly_wind_Kaunisharjuntie_Kotaniementie)
Kaunisharjuntie_Kotaniementie_conditions

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,...,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa,vpd_kpa,is_raining,Leaf wetness,wet_or_rain,dry_enough_strict,dry_enough_moderate,dry_enough_city
0,2021-01-08 00:00:00,-5.24,53.7,-12.307682,7.067682e+00,0.08,Snow,2021,1,8,...,16.9,66.0,1002.9,0.182056,True,False,True,False,False,False
1,2021-01-08 01:00:00,-5.61,54.1,-12.576459,6.966459e+00,0.07,Snow,2021,1,8,...,17.3,67.0,1003.1,0.174840,True,False,True,False,False,False
2,2021-01-08 02:00:00,-5.91,54.4,-12.800064,6.890064e+00,0.07,Snow,2021,1,8,...,15.8,63.0,1003.0,0.169268,True,False,True,False,False,False
3,2021-01-08 03:00:00,-6.13,54.6,-12.968449,6.838449e+00,0.05,Snow,2021,1,8,...,15.8,60.0,1003.0,0.165358,True,False,True,False,False,False
4,2021-01-08 04:00:00,-6.29,55.0,-13.040043,6.750043e+00,0.03,Snow,2021,1,8,...,15.1,56.0,1003.0,0.161651,True,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39121,2025-09-18 05:00:00,11.29,100.0,11.290000,1.776357e-15,0.00,Dry,2025,9,18,...,20.2,224.0,988.3,0.000000,False,True,True,False,False,False
39122,2025-09-18 06:00:00,11.39,100.0,11.390000,1.776357e-15,0.00,Dry,2025,9,18,...,25.6,225.0,988.7,0.000000,False,True,True,False,False,False
39123,2025-09-18 07:00:00,11.96,100.0,11.960000,0.000000e+00,0.00,Dry,2025,9,18,...,25.9,214.0,988.4,0.000000,False,True,True,False,False,False
39124,2025-09-18 08:00:00,12.44,100.0,12.440000,0.000000e+00,0.00,Dry,2025,9,18,...,26.3,207.0,988.6,0.000000,False,True,True,False,False,False


**Applying Detect event function**

In [46]:
Hourly_events_Kaunisharjuntie_Kotaniementie, Events_Kaunisharjuntie_Kotaniementie = detect_events(Kaunisharjuntie_Kotaniementie_conditions, min_mm=0.2)

In [47]:
print("Total events:", len(Events_Kaunisharjuntie_Kotaniementie))
big_events = Events_Kaunisharjuntie_Kotaniementie.sort_values("mm_total", ascending=False).head(5)
print(big_events[["event_id","start","end","duration_h","mm_total","ptype_main"]])

Total events: 914
     event_id               start                 end  duration_h  mm_total  \
503       504 2023-07-28 08:00:00 2023-07-29 14:00:00        31.0     48.26   
741       742 2024-08-30 22:00:00 2024-08-31 01:00:00         4.0     31.22   
478       479 2023-06-30 20:00:00 2023-06-30 22:00:00         3.0     31.06   
87         88 2021-06-22 22:00:00 2021-06-22 23:00:00         2.0     30.53   
287       288 2022-07-13 10:00:00 2022-07-13 10:00:00         1.0     24.79   

    ptype_main  
503       Rain  
741       Rain  
478       Rain  
87        Rain  
287       Rain  


**Applying Computing drying time per event function**

In [48]:
drying_stats_Kaunisharjuntie_Kotaniementie = compute_drying_times(Hourly_events_Kaunisharjuntie_Kotaniementie, Events_Kaunisharjuntie_Kotaniementie, dry_flag_col="dry_enough_city", min_dry_hours=2, max_lookahead_hours=200)
drying_stats_Kaunisharjuntie_Kotaniementie.head()

Unnamed: 0,event_id,start,end,duration_h,mm_total,ptype_main,dry_flag,drying_hours
0,41,2021-03-29 06:00:00,2021-03-29 06:00:00,1.0,0.29,Snow,dry_enough_city,57.0
1,42,2021-03-29 19:00:00,2021-03-29 21:00:00,3.0,2.73,Mix,dry_enough_city,42.0
2,43,2021-03-30 17:00:00,2021-03-30 17:00:00,1.0,0.38,Rain,dry_enough_city,22.0
3,44,2021-04-02 23:00:00,2021-04-03 00:00:00,2.0,0.73,Snow,dry_enough_city,15.0
4,45,2021-04-05 05:00:00,2021-04-05 05:00:00,1.0,0.65,Mix,dry_enough_city,154.0


In [49]:
print("Events with drying:", len(drying_stats_Kaunisharjuntie_Kotaniementie))
print("Median drying (h):", drying_stats_Kaunisharjuntie_Kotaniementie["drying_hours"].median())

print("\nDrying by main type:")
print(drying_stats_Kaunisharjuntie_Kotaniementie.groupby("ptype_main")["drying_hours"].median())

Events with drying: 425
Median drying (h): 30.0

Drying by main type:
ptype_main
Mix     102.0
Rain     26.0
Snow    101.0
Name: drying_hours, dtype: float64


In [50]:
print("Events with city drying:", len(drying_stats_Kaunisharjuntie_Kotaniementie))
print(drying_stats_Kaunisharjuntie_Kotaniementie[["event_id","ptype_main","duration_h","drying_hours"]].head())

print("Median drying time (h):", drying_stats_Kaunisharjuntie_Kotaniementie["drying_hours"].median())

Events with city drying: 425
   event_id ptype_main  duration_h  drying_hours
0        41       Snow         1.0          57.0
1        42        Mix         3.0          42.0
2        43       Rain         1.0          22.0
3        44       Snow         2.0          15.0
4        45        Mix         1.0         154.0
Median drying time (h): 30.0


In [51]:
def add_season(df, ts_col="start"):
    df = df.copy()
    m = df[ts_col].dt.month
    season = pd.Series(index=df.index, dtype="object")
    season[(m == 12) | (m <= 2)] = "winter"
    season[(m >= 3) & (m <= 5)] = "spring"
    season[(m >= 6) & (m <= 8)] = "summer"
    season[(m >= 9) & (m <= 11)] = "autumn"
    df["season"] = season
    return df

drying_stats_season = add_season(drying_stats_Kaunisharjuntie_Kotaniementie)

median_strict_by_season = (drying_stats_season.groupby("season")["drying_hours"].median())


print("Strict median drying by season (h):")
print(median_strict_by_season)

Strict median drying by season (h):
season
autumn    88.5
spring    36.0
summer    26.0
Name: drying_hours, dtype: float64


In [52]:
#comp_all_comp_all_Kaunisharjuntie_Kotaniementie = event_composite_environment( 
#  Hourly_events_Kaunisharjuntie_Kotaniementie, Events_Kaunisharjuntie_Kotaniementie, pre_h=6, post_h=12, max_interp_gap=2)
# comp_all_comp_all_Kaunisharjuntie_Kotaniementie

In [53]:
Events_Kaunisharjuntie_Kotaniementie

Unnamed: 0,event_id,start,end,duration_h,mm_total,ptype_main
0,1,2021-01-11 04:00:00,2021-01-11 05:00:00,2.0,0.70,Mix
1,2,2021-01-11 09:00:00,2021-01-11 10:00:00,2.0,0.77,Mix
2,3,2021-01-12 13:00:00,2021-01-12 17:00:00,5.0,1.35,Snow
3,4,2021-01-13 04:00:00,2021-01-13 06:00:00,3.0,0.71,Mix
4,5,2021-01-18 02:00:00,2021-01-18 02:00:00,1.0,0.21,Snow
...,...,...,...,...,...,...
909,910,2025-09-14 14:00:00,2025-09-14 18:00:00,5.0,2.24,Rain
910,911,2025-09-15 14:00:00,2025-09-15 16:00:00,3.0,1.17,Rain
911,912,2025-09-16 13:00:00,2025-09-16 14:00:00,2.0,1.10,Rain
912,913,2025-09-16 18:00:00,2025-09-16 18:00:00,1.0,1.08,Rain


In [54]:
create_event_viewer(
 Hourly_events_Kaunisharjuntie_Kotaniementie,
    Events_Kaunisharjuntie_Kotaniementie, 
    drying_stats_Kaunisharjuntie_Kotaniementie, pre_h=6, post_h=120, max_interp_gap=2, site_name="Kaunisharjuntie–Kotaniementie")


VBox(children=(HBox(children=(Dropdown(description='Year', options=(2021, 2022, 2023, 2024, 2025), value=2021)…

<h3><center>Keltimaentie-LHT65013 VS TuulimyllyntieWS100<h3>

In [55]:
# These two sensors are close to each other, it is around 2 km distance.

# loading Keltimaantie LHT sensor data 
keltimaantie = pd.read_csv("cleaned_datasets\LHT\Keltimaentie.csv")
Survantie = pd.read_csv("cleaned_datasets/wes100/df_Saaritie.csv")
Keilonkankaantie = pd.read_csv("cleaned_datasets\LHT\Keilonkankaantie.csv")
# loading Tuulimyllyntie WS100 sensor data
Tuulimyllyntie = pd.read_csv("cleaned_datasets\wes100\df_Tuulimyllyntie.csv")

  keltimaantie = pd.read_csv("cleaned_datasets\LHT\Keltimaentie.csv")
  Keilonkankaantie = pd.read_csv("cleaned_datasets\LHT\Keilonkankaantie.csv")
  Tuulimyllyntie = pd.read_csv("cleaned_datasets\wes100\df_Tuulimyllyntie.csv")


In [56]:
Tuulimyllyntie ["Timestamp"] = pd.to_datetime(Tuulimyllyntie ["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Tuulimyllyntie ["Timestamp"].isna().sum())
# Sort the ds by timestamp
Tuulimyllyntie  = Tuulimyllyntie .sort_values(by="Timestamp").reset_index(drop=True)
Tuulimyllyntie .head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:04:00,0.1,0.002,507.53,0.02,70
1,2021-01-01 00:14:00,0.0,0.0,507.54,0.01,0
2,2021-01-01 00:24:00,0.0,0.0,507.54,0.0,0
3,2021-01-01 00:34:00,0.0,0.0,507.54,0.0,0
4,2021-01-01 00:44:00,0.0,0.0,507.54,0.0,0


In [57]:
keltimaantie["Timestamp"] = pd.to_datetime(keltimaantie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", keltimaantie["Timestamp"].isna().sum())
# Sort the ds by timestamp
keltimaantie = keltimaantie.sort_values(by="Timestamp").reset_index(drop=True)
keltimaantie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-20 00:00:00,-2.24,100.0
1,2021-01-20 01:00:00,-2.26,100.0
2,2021-01-20 02:00:00,-2.04,100.0
3,2021-01-20 03:00:00,-1.87,100.0
4,2021-01-20 04:00:00,-1.95,100.0


In [58]:
keltimaantie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40848 entries, 0 to 40847
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Timestamp      40848 non-null  datetime64[ns]
 1   Temperature_C  40769 non-null  float64       
 2   Humidity       40769 non-null  float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 957.5 KB


**Resampling the LHT and WS100 sensors hourly**

In [59]:
Keltimaantie_h = make_lht_hourly(keltimaantie)
Keltimaantie_h.head()

Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-20 00:00:00,-2.24,100.0
1,2021-01-20 01:00:00,-2.26,100.0
2,2021-01-20 02:00:00,-2.04,100.0
3,2021-01-20 03:00:00,-1.87,100.0
4,2021-01-20 04:00:00,-1.95,100.0


In [60]:
Tuulimyllyntie_h = make_ws_hourly(Tuulimyllyntie)
Tuulimyllyntie_h.head()

  hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")


Unnamed: 0,Timestamp,precipitation_mm,intensity_mm_h,precip_type,precip_bucket
0,2021-01-01 00:00:00,0.03,0.016667,0.0,Dry
1,2021-01-01 01:00:00,0.13,0.133333,70.0,Snow
2,2021-01-01 02:00:00,0.08,0.1,70.0,Snow
3,2021-01-01 03:00:00,0.01,0.0,0.0,Dry
4,2021-01-01 04:00:00,0.1,0.116667,70.0,Snow


In [61]:
# Let's find out start and end time of the data and the total days 
start_time_ws =Tuulimyllyntie_h["Timestamp"].min()
end_time_ws = Tuulimyllyntie_h["Timestamp"].max()
total_days_ws = (end_time_ws - start_time_ws).days


start_time_LHT = Keltimaantie_h["Timestamp"].min()
end_time_LHT = Keltimaantie_h["Timestamp"].max()
total_days_LHT = (end_time_LHT - start_time_LHT).days

print("WS100 Sensor at Tuulimyllyntie:")
print(f"Data starts from: {start_time_ws}")
print(f"Data ends at: {end_time_ws}")
print(f"Total days of data: {total_days_ws} days\n")

print("LHT Sensor at Keltimaantie:")
print(f"Data starts from: {start_time_LHT}")
print(f"Data ends at: {end_time_LHT}")
print(f"Total days of data: {total_days_LHT} days")

WS100 Sensor at Tuulimyllyntie:
Data starts from: 2021-01-01 00:00:00
Data ends at: 2025-09-18 10:00:00
Total days of data: 1721 days

LHT Sensor at Keltimaantie:
Data starts from: 2021-01-20 00:00:00
Data ends at: 2025-09-17 23:00:00
Total days of data: 1701 days


In [62]:
hourly_Keltimaantie_Tuulimyllyntie = build_pair_hourly(Keltimaantie_h, Tuulimyllyntie_h)
hourly_Keltimaantie_Tuulimyllyntie.head()


Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour
0,2021-01-20 00:00:00,-2.24,100.0,-2.24,4.440892e-16,0.0,Dry,2021,1,20,2021-01-20,0
1,2021-01-20 01:00:00,-2.26,100.0,-2.26,0.0,0.0,Dry,2021,1,20,2021-01-20,1
2,2021-01-20 02:00:00,-2.04,100.0,-2.04,0.0,0.28,Snow,2021,1,20,2021-01-20,2
3,2021-01-20 03:00:00,-1.87,100.0,-1.87,0.0,0.12,Rain,2021,1,20,2021-01-20,3
4,2021-01-20 04:00:00,-1.95,100.0,-1.95,2.220446e-16,0.07,Rain,2021,1,20,2021-01-20,4


In [63]:
hourly_wind_Keltimaantie_Tuulimyllyntie = add_wind_to_hourly(hourly_Keltimaantie_Tuulimyllyntie, wind_hourly, on = "timestamp")
hourly_wind_Keltimaantie_Tuulimyllyntie

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa
0,2021-01-20 00:00:00,-2.24,100.0,-2.24,4.440892e-16,0.00,Dry,2021,1,20,2021-01-20,0,6.0,15.1,197.0,991.6
1,2021-01-20 01:00:00,-2.26,100.0,-2.26,0.000000e+00,0.00,Dry,2021,1,20,2021-01-20,1,5.8,13.3,202.0,991.5
2,2021-01-20 02:00:00,-2.04,100.0,-2.04,0.000000e+00,0.28,Snow,2021,1,20,2021-01-20,2,7.4,14.0,203.0,991.6
3,2021-01-20 03:00:00,-1.87,100.0,-1.87,0.000000e+00,0.12,Rain,2021,1,20,2021-01-20,3,7.0,15.8,201.0,991.6
4,2021-01-20 04:00:00,-1.95,100.0,-1.95,2.220446e-16,0.07,Rain,2021,1,20,2021-01-20,4,11.0,22.0,221.0,991.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40764,2025-09-17 19:00:00,13.27,100.0,13.27,0.000000e+00,0.02,Rain,2025,9,17,2025-09-17,19,11.8,23.4,234.0,985.1
40765,2025-09-17 20:00:00,12.39,100.0,12.39,0.000000e+00,0.00,Dry,2025,9,17,2025-09-17,20,11.7,22.7,238.0,985.7
40766,2025-09-17 21:00:00,11.20,100.0,11.20,1.776357e-15,0.00,Dry,2025,9,17,2025-09-17,21,11.2,22.7,240.0,986.1
40767,2025-09-17 22:00:00,12.05,100.0,12.05,0.000000e+00,0.00,Dry,2025,9,17,2025-09-17,22,10.2,21.6,236.0,986.4


In [64]:
print("Hours:", len(hourly_wind_Keltimaantie_Tuulimyllyntie), " Unique timestamps:", hourly_wind_Keltimaantie_Tuulimyllyntie["timestamp"].nunique())
print(hourly_wind_Keltimaantie_Tuulimyllyntie[["ptype_hour"]].value_counts().head())

Hours: 40769  Unique timestamps: 40769
ptype_hour
Dry           32059
Rain           5600
Mix            1474
Snow           1310
NoData          322
Name: count, dtype: int64


In [65]:
hourly_wind_Keltimaantie_Tuulimyllyntie['vpd_kpa'] = vpd_kpa(hourly_wind_Keltimaantie_Tuulimyllyntie['temp_C'], hourly_wind_Keltimaantie_Tuulimyllyntie['rh_pct'])

In [66]:
Keltimaantie_Tuulimyllyntie_conditions = environment_conditions(hourly_wind_Keltimaantie_Tuulimyllyntie)
Keltimaantie_Tuulimyllyntie_conditions

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,...,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa,vpd_kpa,is_raining,Leaf wetness,wet_or_rain,dry_enough_strict,dry_enough_moderate,dry_enough_city
0,2021-01-20 00:00:00,-2.24,100.0,-2.24,4.440892e-16,0.00,Dry,2021,1,20,...,15.1,197.0,991.6,0.0,False,True,True,False,False,False
1,2021-01-20 01:00:00,-2.26,100.0,-2.26,0.000000e+00,0.00,Dry,2021,1,20,...,13.3,202.0,991.5,0.0,False,True,True,False,False,False
2,2021-01-20 02:00:00,-2.04,100.0,-2.04,0.000000e+00,0.28,Snow,2021,1,20,...,14.0,203.0,991.6,0.0,True,True,True,False,False,False
3,2021-01-20 03:00:00,-1.87,100.0,-1.87,0.000000e+00,0.12,Rain,2021,1,20,...,15.8,201.0,991.6,0.0,True,True,True,False,False,False
4,2021-01-20 04:00:00,-1.95,100.0,-1.95,2.220446e-16,0.07,Rain,2021,1,20,...,22.0,221.0,991.7,0.0,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40764,2025-09-17 19:00:00,13.27,100.0,13.27,0.000000e+00,0.02,Rain,2025,9,17,...,23.4,234.0,985.1,0.0,False,True,True,False,False,False
40765,2025-09-17 20:00:00,12.39,100.0,12.39,0.000000e+00,0.00,Dry,2025,9,17,...,22.7,238.0,985.7,0.0,False,True,True,False,False,False
40766,2025-09-17 21:00:00,11.20,100.0,11.20,1.776357e-15,0.00,Dry,2025,9,17,...,22.7,240.0,986.1,0.0,False,True,True,False,False,False
40767,2025-09-17 22:00:00,12.05,100.0,12.05,0.000000e+00,0.00,Dry,2025,9,17,...,21.6,236.0,986.4,0.0,False,True,True,False,False,False


In [67]:
Hourly_events_Keltimaantie_Tuulimyllyntie, Events_Keltimaantie_Tuulimyllyntie = detect_events(Keltimaantie_Tuulimyllyntie_conditions, min_mm=0.2)
drying_stats_Keltimaantie_Tuulimyllyntie = compute_drying_times(Hourly_events_Keltimaantie_Tuulimyllyntie, Events_Keltimaantie_Tuulimyllyntie, dry_flag_col="dry_enough_city", min_dry_hours=2, max_lookahead_hours=120)

drying_stats_Keltimaantie_Tuulimyllyntie.head()

Unnamed: 0,event_id,start,end,duration_h,mm_total,ptype_main,dry_flag,drying_hours
0,49,2021-03-20 17:00:00,2021-03-20 18:00:00,2.0,0.87,Snow,dry_enough_city,43.0
1,50,2021-03-20 22:00:00,2021-03-20 23:00:00,2.0,0.69,Mix,dry_enough_city,38.0
2,51,2021-03-29 06:00:00,2021-03-29 06:00:00,1.0,0.38,Mix,dry_enough_city,32.0
3,52,2021-03-29 18:00:00,2021-03-29 21:00:00,4.0,2.81,Mix,dry_enough_city,17.0
4,53,2021-04-02 23:00:00,2021-04-03 00:00:00,2.0,1.08,Snow,dry_enough_city,14.0


In [68]:
print("Events with drying:", len(drying_stats_Keltimaantie_Tuulimyllyntie))
print("Median drying (h):", drying_stats_Keltimaantie_Tuulimyllyntie["drying_hours"].median())

print("\nDrying by main type:")
print(drying_stats_Keltimaantie_Tuulimyllyntie.groupby("ptype_main")["drying_hours"].median())

Events with drying: 487
Median drying (h): 20.0

Drying by main type:
ptype_main
Mix     35.0
Rain    20.0
Snow    37.0
Name: drying_hours, dtype: float64


In [69]:
print("Events with city drying:", len(drying_stats_Keltimaantie_Tuulimyllyntie))
print(drying_stats_Keltimaantie_Tuulimyllyntie[["event_id","ptype_main","duration_h","drying_hours"]].head())

print("Median drying time (h):", drying_stats_Keltimaantie_Tuulimyllyntie["drying_hours"].median())

Events with city drying: 487
   event_id ptype_main  duration_h  drying_hours
0        49       Snow         2.0          43.0
1        50        Mix         2.0          38.0
2        51        Mix         1.0          32.0
3        52        Mix         4.0          17.0
4        53       Snow         2.0          14.0
Median drying time (h): 20.0


In [70]:
def add_season(df, ts_col="start"):
    df = df.copy()
    m = df[ts_col].dt.month
    season = pd.Series(index=df.index, dtype="object")
    season[(m == 12) | (m <= 2)] = "winter"
    season[(m >= 3) & (m <= 5)] = "spring"
    season[(m >= 6) & (m <= 8)] = "summer"
    season[(m >= 9) & (m <= 11)] = "autumn"
    df["season"] = season
    return df

drying_stats_season = add_season(drying_stats_Keltimaantie_Tuulimyllyntie)

median_strict_by_season = (drying_stats_season.groupby("season")["drying_hours"].median())


print("Strict median drying by season (h):")
print(median_strict_by_season)

Strict median drying by season (h):
season
autumn    45.0
spring    25.0
summer    18.0
winter    95.0
Name: drying_hours, dtype: float64


In [1]:
create_event_viewer(
Hourly_events_Keltimaantie_Tuulimyllyntie,
    Events_Keltimaantie_Tuulimyllyntie,
    drying_stats_Keltimaantie_Tuulimyllyntie, pre_h=6, post_h=120, max_interp_gap=2, site_name="Keltimaantie–Tuulimyllyntie")

NameError: name 'create_event_viewer' is not defined

<h3><center>Ritopohantie-LHT65007 VS Kaakkovuorentie-WS100<h3>

In [72]:
# Ritopohantie LHT and Kaakkovuoentie WS100 are around  3 KM

In [73]:
# Loading Ritopohantie LHT sensor data and Kaakkovuoentie WS100 sensor data
Ritopohantie = pd.read_csv("cleaned_datasets\LHT\Ritopohantie.csv")
Kaakkovuoentie = pd.read_csv("cleaned_datasets\wes100\df_Kaakkovuorentie.csv")

  Ritopohantie = pd.read_csv("cleaned_datasets\LHT\Ritopohantie.csv")
  Kaakkovuoentie = pd.read_csv("cleaned_datasets\wes100\df_Kaakkovuorentie.csv")


In [74]:
Kaakkovuoentie["Timestamp"] = pd.to_datetime(Kaakkovuoentie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Kaakkovuoentie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Kaakkovuoentie = Kaakkovuoentie .sort_values(by="Timestamp").reset_index(drop=True)
Kaakkovuoentie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2024-04-02 08:15:00,0.0,0.0,36.78,0.0,0
1,2024-04-02 08:25:00,0.0,0.0,36.78,0.0,0
2,2024-04-02 08:35:00,0.0,0.0,36.78,0.0,0
3,2024-04-02 08:45:00,0.0,0.0,36.78,0.0,0
4,2024-04-02 08:55:00,0.0,0.0,36.78,0.0,0


In [75]:
Ritopohantie["Timestamp"] = pd.to_datetime(Ritopohantie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Ritopohantie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Ritopohantie = Ritopohantie.sort_values(by="Timestamp").reset_index(drop=True)
Ritopohantie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-20 00:00:00,-1.86,99.6
1,2021-01-20 01:00:00,-1.87,99.8
2,2021-01-20 02:00:00,-1.83,99.8
3,2021-01-20 03:00:00,-1.79,100.0
4,2021-01-20 04:00:00,-1.67,99.8


**Resampling the LHT and WS100 sensors hourly**

In [76]:
Ritopohantie_h = make_lht_hourly(Ritopohantie)
Ritopohantie_h.head()

Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-20 00:00:00,-1.86,99.6
1,2021-01-20 01:00:00,-1.87,99.8
2,2021-01-20 02:00:00,-1.83,99.8
3,2021-01-20 03:00:00,-1.79,100.0
4,2021-01-20 04:00:00,-1.67,99.8


In [77]:
Kaakkovuoentie_h = make_ws_hourly(Kaakkovuoentie)
Kaakkovuoentie_h.head()

  hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")


Unnamed: 0,Timestamp,precipitation_mm,intensity_mm_h,precip_type,precip_bucket
0,2024-04-02 08:00:00,0.0,0.0,0.0,Dry
1,2024-04-02 09:00:00,0.0,0.0,0.0,Dry
2,2024-04-02 10:00:00,0.0,0.0,0.0,Dry
3,2024-04-02 11:00:00,0.0,0.0,0.0,Dry
4,2024-04-02 12:00:00,0.0,0.0,0.0,Dry


In [78]:
# Let's find out start and end time of the data and the total days 
start_time_ws =Kaakkovuoentie_h ["Timestamp"].min()
end_time_ws = Kaakkovuoentie_h["Timestamp"].max()
total_days_ws = (end_time_ws - start_time_ws).days


start_time_LHT = Ritopohantie_h["Timestamp"].min()
end_time_LHT = Ritopohantie_h["Timestamp"].max()
total_days_LHT = (end_time_LHT - start_time_LHT).days

print("WS100 Sensor at Kaakkovuoentie:")
print(f"Data starts from: {start_time_ws}")
print(f"Data ends at: {end_time_ws}")
print(f"Total days of data: {total_days_ws} days\n")

print("LHT Sensor at Ritopohantie:")
print(f"Data starts from: {start_time_LHT}")
print(f"Data ends at: {end_time_LHT}")
print(f"Total days of data: {total_days_LHT} days")

WS100 Sensor at Kaakkovuoentie:
Data starts from: 2024-04-02 08:00:00
Data ends at: 2025-09-18 09:00:00
Total days of data: 534 days

LHT Sensor at Ritopohantie:
Data starts from: 2021-01-20 00:00:00
Data ends at: 2025-09-17 23:00:00
Total days of data: 1701 days


In [79]:
hourly_Ritopohantie_h_Kaakkovuoentie = build_pair_hourly(Ritopohantie_h, Kaakkovuoentie_h)
hourly_Ritopohantie_h_Kaakkovuoentie.head()


Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour
0,2024-04-02 08:00:00,-0.9,96.8,-1.2916,0.3916,0.0,Dry,2024,4,2,2024-04-02,8
1,2024-04-02 09:00:00,0.0,93.8,-0.879408,0.879408,0.0,Dry,2024,4,2,2024-04-02,9
2,2024-04-02 10:00:00,1.0,88.5,-0.686783,1.686783,0.0,Dry,2024,4,2,2024-04-02,10
3,2024-04-02 11:00:00,0.99,87.5,-0.852348,1.842348,0.0,Dry,2024,4,2,2024-04-02,11
4,2024-04-02 12:00:00,1.29,85.7,-0.841845,2.131845,0.0,Dry,2024,4,2,2024-04-02,12


In [80]:
hourly_wind_Ritopohantie_h_Kaakkovuoentie = add_wind_to_hourly(hourly_Ritopohantie_h_Kaakkovuoentie, wind_hourly, on = "timestamp")
hourly_wind_Ritopohantie_h_Kaakkovuoentie

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa
0,2024-04-02 08:00:00,-0.90,96.8,-1.291600,3.915999e-01,0.00,Dry,2024,4,2,2024-04-02,8,16.4,37.4,40.0,991.4
1,2024-04-02 09:00:00,0.00,93.8,-0.879408,8.794081e-01,0.00,Dry,2024,4,2,2024-04-02,9,16.6,37.8,41.0,991.6
2,2024-04-02 10:00:00,1.00,88.5,-0.686783,1.686783e+00,0.00,Dry,2024,4,2,2024-04-02,10,16.1,38.2,40.0,991.7
3,2024-04-02 11:00:00,0.99,87.5,-0.852348,1.842348e+00,0.00,Dry,2024,4,2,2024-04-02,11,17.1,38.9,41.0,991.3
4,2024-04-02 12:00:00,1.29,85.7,-0.841845,2.131845e+00,0.00,Dry,2024,4,2,2024-04-02,12,18.4,41.0,42.0,990.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12803,2025-09-17 19:00:00,13.33,100.0,13.330000,1.776357e-15,0.04,Rain,2025,9,17,2025-09-17,19,11.8,23.4,234.0,985.1
12804,2025-09-17 20:00:00,12.62,100.0,12.620000,1.776357e-15,0.00,Dry,2025,9,17,2025-09-17,20,11.7,22.7,238.0,985.7
12805,2025-09-17 21:00:00,12.58,100.0,12.580000,1.776357e-15,0.27,Rain,2025,9,17,2025-09-17,21,11.2,22.7,240.0,986.1
12806,2025-09-17 22:00:00,12.34,100.0,12.340000,0.000000e+00,0.02,Rain,2025,9,17,2025-09-17,22,10.2,21.6,236.0,986.4


In [81]:
print("Hours:", len(hourly_wind_Ritopohantie_h_Kaakkovuoentie), " Unique timestamps:", hourly_wind_Ritopohantie_h_Kaakkovuoentie["timestamp"].nunique())
print(hourly_wind_Ritopohantie_h_Kaakkovuoentie[["ptype_hour"]].value_counts().head())

Hours: 12808  Unique timestamps: 12808
ptype_hour
Dry           10145
Rain           1574
NoData          519
Mix             432
Snow            138
Name: count, dtype: int64


In [82]:
hourly_wind_Ritopohantie_h_Kaakkovuoentie['vpd_kpa'] = vpd_kpa(hourly_wind_Ritopohantie_h_Kaakkovuoentie['temp_C'], hourly_wind_Ritopohantie_h_Kaakkovuoentie['rh_pct'])

In [83]:
Ritopohantie_Kaakkovuoentie_conditions = environment_conditions(hourly_wind_Ritopohantie_h_Kaakkovuoentie)
Hourly_event_Ritopohantie_Kaakkovuoentie, Events_Ritopohantie_Kaakkovuoentie = detect_events(Ritopohantie_Kaakkovuoentie_conditions, min_mm=0.2)

In [84]:
print("Total events:", len(Events_Ritopohantie_Kaakkovuoentie))
big_events = Events_Ritopohantie_Kaakkovuoentie.sort_values("mm_total", ascending=False).head(5)
print(big_events[["event_id","start","end","duration_h","mm_total","ptype_main"]])

Total events: 313
     event_id               start                 end  duration_h  mm_total  \
102       103 2024-08-30 20:00:00 2024-08-31 01:00:00         6.0     55.17   
302       303 2025-09-09 04:00:00 2025-09-09 08:00:00         5.0     35.92   
109       110 2024-09-13 10:00:00 2024-09-13 16:00:00         7.0     17.42   
28         29 2024-06-09 13:00:00 2024-06-09 17:00:00         5.0     16.14   
258       259 2025-06-24 16:00:00 2025-06-25 05:00:00        14.0     15.06   

    ptype_main  
102       Rain  
302       Rain  
109       Rain  
28        Rain  
258       Rain  


In [85]:
drying_stats_Ritopohantie_Kaakkovuoentie = compute_drying_times(Hourly_event_Ritopohantie_Kaakkovuoentie, Events_Ritopohantie_Kaakkovuoentie, dry_flag_col="dry_enough_city", min_dry_hours=2, max_lookahead_hours=120)

In [86]:
print("Events with drying:", len(drying_stats_Ritopohantie_Kaakkovuoentie))
print("Median drying (h):", drying_stats_Ritopohantie_Kaakkovuoentie["drying_hours"].median())

print("\nDrying by main type:")
print(drying_stats_Ritopohantie_Kaakkovuoentie.groupby("ptype_main")["drying_hours"].median())

Events with drying: 227
Median drying (h): 16.0

Drying by main type:
ptype_main
Mix     28.0
Rain    14.0
Snow    36.5
Name: drying_hours, dtype: float64


In [87]:
def add_season(df, ts_col="start"):
    df = df.copy()
    m = df[ts_col].dt.month
    season = pd.Series(index=df.index, dtype="object")
    season[(m == 12) | (m <= 2)] = "winter"
    season[(m >= 3) & (m <= 5)] = "spring"
    season[(m >= 6) & (m <= 8)] = "summer"
    season[(m >= 9) & (m <= 11)] = "autumn"
    df["season"] = season
    return df

drying_stats_season = add_season(drying_stats_Ritopohantie_Kaakkovuoentie)

median_strict_by_season = (drying_stats_season.groupby("season")["drying_hours"].median())


print("Strict median drying by season (h):")
print(median_strict_by_season)

Strict median drying by season (h):
season
autumn    23.0
spring    20.0
summer    13.0
Name: drying_hours, dtype: float64


In [88]:
create_event_viewer(
Hourly_event_Ritopohantie_Kaakkovuoentie,
    Events_Ritopohantie_Kaakkovuoentie,
    drying_stats_Ritopohantie_Kaakkovuoentie, pre_h=6, post_h=120, max_interp_gap=2, site_name="Ritopohantie–Kaakkovuoentie")

VBox(children=(HBox(children=(Dropdown(description='Year', options=(2024, 2025), value=2024), Dropdown(descrip…

<h3><center>Keilonkankaantie-LHT65005 VS Saaritie-WS100<h3>

In [89]:
# These two sites are 7.1 KM far from each other.

In [90]:
Keilonkankaantie = pd.read_csv("cleaned_datasets\LHT\Keilonkankaantie.csv")
Saaritie = pd.read_csv('cleaned_datasets\wes100\df_Saaritie.csv')

  Keilonkankaantie = pd.read_csv("cleaned_datasets\LHT\Keilonkankaantie.csv")
  Saaritie = pd.read_csv('cleaned_datasets\wes100\df_Saaritie.csv')


In [91]:
Saaritie["Timestamp"] = pd.to_datetime(Saaritie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Saaritie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Saaritie = Saaritie.sort_values(by="Timestamp").reset_index(drop=True)
Saaritie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:01:00,0.1,0.002,160.92,0.02,70
1,2021-01-01 00:11:00,0.3,0.005,160.94,0.02,70
2,2021-01-01 00:21:00,0.1,0.002,160.96,0.02,70
3,2021-01-01 00:31:00,0.0,0.0,160.97,0.01,69
4,2021-01-01 00:41:00,0.0,0.0,160.97,0.0,0


In [92]:
Keilonkankaantie["Timestamp"] = pd.to_datetime(Keilonkankaantie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Keilonkankaantie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Keilonkankaantie = Keilonkankaantie.sort_values(by="Timestamp").reset_index(drop=True)
Keilonkankaantie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-08 00:58:17,-7.19,93.3
1,2021-01-08 01:58:17,-7.39,93.4
2,2021-01-08 02:58:18,-7.3,93.4
3,2021-01-08 03:58:17,-7.4,93.3
4,2021-01-08 04:58:17,-7.41,93.3


**Resampling the LHT and WS100 sensors hourly**

In [93]:
Keilonkankaantie_h = make_lht_hourly(Keilonkankaantie)
Keilonkankaantie_h.head()

Unnamed: 0,Timestamp,Temperature_C,Humidity
0,2021-01-08 00:00:00,-7.19,93.3
1,2021-01-08 01:00:00,-7.39,93.4
2,2021-01-08 02:00:00,-7.3,93.4
3,2021-01-08 03:00:00,-7.4,93.3
4,2021-01-08 04:00:00,-7.41,93.3


In [94]:
Saaritie_h = make_ws_hourly(Saaritie)
Saaritie_h.head()

  hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")


Unnamed: 0,Timestamp,precipitation_mm,intensity_mm_h,precip_type,precip_bucket
0,2021-01-01 00:00:00,0.08,0.083333,70.0,Snow
1,2021-01-01 01:00:00,0.01,0.0,0.0,Dry
2,2021-01-01 02:00:00,0.0,0.0,0.0,Dry
3,2021-01-01 03:00:00,0.01,0.016667,0.0,Dry
4,2021-01-01 04:00:00,0.09,0.1,70.0,Snow


In [95]:
# Let's find out start and end time of the data and the total days 
start_time_ws =Saaritie_h ["Timestamp"].min()
end_time_ws = Saaritie_h["Timestamp"].max()
total_days_ws = (end_time_ws - start_time_ws).days


start_time_LHT = Keilonkankaantie_h["Timestamp"].min()
end_time_LHT = Keilonkankaantie_h["Timestamp"].max()
total_days_LHT = (end_time_LHT - start_time_LHT).days

print("WS100 Sensor at Kaakkovuoentie:")
print(f"Data starts from: {start_time_ws}")
print(f"Data ends at: {end_time_ws}")
print(f"Total days of data: {total_days_ws} days\n")

print("LHT Sensor at Ritopohantie:")
print(f"Data starts from: {start_time_LHT}")
print(f"Data ends at: {end_time_LHT}")
print(f"Total days of data: {total_days_LHT} days")

WS100 Sensor at Kaakkovuoentie:
Data starts from: 2021-01-01 00:00:00
Data ends at: 2025-09-18 09:00:00
Total days of data: 1721 days

LHT Sensor at Ritopohantie:
Data starts from: 2021-01-08 00:00:00
Data ends at: 2025-09-18 11:00:00
Total days of data: 1714 days


In [96]:
hourly_Keilonkankaantie_Saaritie = build_pair_hourly(Keilonkankaantie_h, Saaritie_h)
hourly_Keilonkankaantie_Saaritie.head()

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour
0,2021-01-08 00:00:00,-7.19,93.3,-7.985565,0.795565,0.0,Dry,2021,1,8,2021-01-08,0
1,2021-01-08 01:00:00,-7.39,93.4,-8.172134,0.782134,0.02,Snow,2021,1,8,2021-01-08,1
2,2021-01-08 02:00:00,-7.3,93.4,-8.082665,0.782665,0.01,Rain,2021,1,8,2021-01-08,2
3,2021-01-08 03:00:00,-7.4,93.3,-8.194309,0.794309,0.01,Rain,2021,1,8,2021-01-08,3
4,2021-01-08 04:00:00,-7.41,93.3,-8.204249,0.794249,0.0,Dry,2021,1,8,2021-01-08,4


In [97]:
hourly_wind_Keilonkankaantie_Saaritie = add_wind_to_hourly(hourly_Keilonkankaantie_Saaritie, wind_hourly, on = "timestamp")

In [98]:
print("Hours:", len(hourly_wind_Keilonkankaantie_Saaritie), " Unique timestamps:", hourly_wind_Keilonkankaantie_Saaritie["timestamp"].nunique())
print(hourly_wind_Keilonkankaantie_Saaritie[["ptype_hour"]].value_counts().head())

Hours: 39977  Unique timestamps: 39977
ptype_hour
Dry           32655
Rain           4178
Mix            1113
Snow           1109
NoData          922
Name: count, dtype: int64


In [99]:
hourly_wind_Keilonkankaantie_Saaritie['vpd_kpa'] = vpd_kpa(hourly_wind_Keilonkankaantie_Saaritie['temp_C'], hourly_wind_Keilonkankaantie_Saaritie['rh_pct'])
hourly_wind_Keilonkankaantie_Saaritie

Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,date,hour,wind_speed_kmh,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa,vpd_kpa
0,2021-01-08 00:00:00,-7.19,93.3,-7.985565,7.955653e-01,0.00,Dry,2021,1,8,2021-01-08,0,7.9,16.9,66.0,1002.9,0.022261
1,2021-01-08 01:00:00,-7.39,93.4,-8.172134,7.821344e-01,0.02,Snow,2021,1,8,2021-01-08,1,7.4,17.3,67.0,1003.1,0.021550
2,2021-01-08 02:00:00,-7.30,93.4,-8.082665,7.826645e-01,0.01,Rain,2021,1,8,2021-01-08,2,7.2,15.8,63.0,1003.0,0.021719
3,2021-01-08 03:00:00,-7.40,93.3,-8.194309,7.943089e-01,0.01,Rain,2021,1,8,2021-01-08,3,7.1,15.8,60.0,1003.0,0.021857
4,2021-01-08 04:00:00,-7.41,93.3,-8.204249,7.942491e-01,0.00,Dry,2021,1,8,2021-01-08,4,6.5,15.1,56.0,1003.0,0.021838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39972,2025-09-18 05:00:00,11.38,100.0,11.380000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,5,10.9,20.2,224.0,988.3,0.000000
39973,2025-09-18 06:00:00,11.58,100.0,11.580000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,6,12.2,25.6,225.0,988.7,0.000000
39974,2025-09-18 07:00:00,12.02,100.0,12.020000,0.000000e+00,0.00,Dry,2025,9,18,2025-09-18,7,11.3,25.9,214.0,988.4,0.000000
39975,2025-09-18 08:00:00,12.44,100.0,12.440000,0.000000e+00,0.03,Rain,2025,9,18,2025-09-18,8,11.4,26.3,207.0,988.6,0.000000


In [100]:
Keilonkankaantie_Saaritie_conditions = environment_conditions(hourly_wind_Keilonkankaantie_Saaritie)
Keilonkankaantie_Saaritie_conditions


Unnamed: 0,timestamp,temp_C,rh_pct,dewpoint_C,dp_spread_C,rain_mm_hour,ptype_hour,year,month,day,...,wind_gust_kmh,wind_dir_deg,surface_pressure_hpa,vpd_kpa,is_raining,Leaf wetness,wet_or_rain,dry_enough_strict,dry_enough_moderate,dry_enough_city
0,2021-01-08 00:00:00,-7.19,93.3,-7.985565,7.955653e-01,0.00,Dry,2021,1,8,...,16.9,66.0,1002.9,0.022261,False,True,True,False,False,False
1,2021-01-08 01:00:00,-7.39,93.4,-8.172134,7.821344e-01,0.02,Snow,2021,1,8,...,17.3,67.0,1003.1,0.021550,False,True,True,False,False,False
2,2021-01-08 02:00:00,-7.30,93.4,-8.082665,7.826645e-01,0.01,Rain,2021,1,8,...,15.8,63.0,1003.0,0.021719,False,True,True,False,False,False
3,2021-01-08 03:00:00,-7.40,93.3,-8.194309,7.943089e-01,0.01,Rain,2021,1,8,...,15.8,60.0,1003.0,0.021857,False,True,True,False,False,False
4,2021-01-08 04:00:00,-7.41,93.3,-8.204249,7.942491e-01,0.00,Dry,2021,1,8,...,15.1,56.0,1003.0,0.021838,False,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39972,2025-09-18 05:00:00,11.38,100.0,11.380000,0.000000e+00,0.00,Dry,2025,9,18,...,20.2,224.0,988.3,0.000000,False,True,True,False,False,False
39973,2025-09-18 06:00:00,11.58,100.0,11.580000,0.000000e+00,0.00,Dry,2025,9,18,...,25.6,225.0,988.7,0.000000,False,True,True,False,False,False
39974,2025-09-18 07:00:00,12.02,100.0,12.020000,0.000000e+00,0.00,Dry,2025,9,18,...,25.9,214.0,988.4,0.000000,False,True,True,False,False,False
39975,2025-09-18 08:00:00,12.44,100.0,12.440000,0.000000e+00,0.03,Rain,2025,9,18,...,26.3,207.0,988.6,0.000000,True,True,True,False,False,False


In [101]:
Hourly_events_Keilonkankaantie_Saaritie, Events_Keilonkankaantie_Saaritie = detect_events(Keilonkankaantie_Saaritie_conditions, min_mm=0.2)

In [102]:
print("Total events:", len(Events_Keilonkankaantie_Saaritie))
big_events = Events_Keilonkankaantie_Saaritie.sort_values("mm_total", ascending=False).head(5)
print(big_events[["event_id","start","end","duration_h","mm_total","ptype_main"]])

Total events: 861
     event_id               start                 end  duration_h  mm_total  \
434       435 2023-07-28 08:00:00 2023-07-29 15:00:00        32.0     43.68   
662       663 2024-08-30 20:00:00 2024-08-31 01:00:00         6.0     21.86   
412       413 2023-06-30 21:00:00 2023-06-30 22:00:00         2.0     21.19   
245       246 2022-07-13 09:00:00 2022-07-13 10:00:00         2.0     20.62   
604       605 2024-05-30 15:00:00 2024-05-30 16:00:00         2.0     18.77   

    ptype_main  
434       Rain  
662       Rain  
412       Rain  
245       Rain  
604       Rain  


In [103]:
drying_stats_Keilonkankaantie_Saaritie = compute_drying_times(Hourly_events_Keilonkankaantie_Saaritie, Events_Keilonkankaantie_Saaritie, dry_flag_col="dry_enough_city", min_dry_hours=2, max_lookahead_hours=120)
drying_stats_Keilonkankaantie_Saaritie.head()

Unnamed: 0,event_id,start,end,duration_h,mm_total,ptype_main,dry_flag,drying_hours
0,21,2021-02-23 11:00:00,2021-02-23 12:00:00,2.0,0.82,Mix,dry_enough_city,100.0
1,22,2021-02-24 11:00:00,2021-02-24 12:00:00,2.0,0.67,Snow,dry_enough_city,76.0
2,23,2021-02-26 00:00:00,2021-02-26 01:00:00,2.0,0.59,Mix,dry_enough_city,39.0
3,24,2021-02-26 03:00:00,2021-02-26 03:00:00,1.0,0.4,Rain,dry_enough_city,37.0
4,25,2021-02-27 21:00:00,2021-02-27 21:00:00,1.0,0.4,Snow,dry_enough_city,42.0


In [104]:
print("Events with drying:", len(drying_stats_Keilonkankaantie_Saaritie))
print("Median drying (h):", drying_stats_Keilonkankaantie_Saaritie["drying_hours"].median())

print("\nDrying by main type:")
print(drying_stats_Keilonkankaantie_Saaritie.groupby("ptype_main")["drying_hours"].median())

Events with drying: 509
Median drying (h): 21.0

Drying by main type:
ptype_main
Mix     40.5
Rain    19.0
Snow    41.5
Name: drying_hours, dtype: float64


In [105]:
def add_season(df, ts_col="start"):
    df = df.copy()
    m = df[ts_col].dt.month
    season = pd.Series(index=df.index, dtype="object")
    season[(m == 12) | (m <= 2)] = "winter"
    season[(m >= 3) & (m <= 5)] = "spring"
    season[(m >= 6) & (m <= 8)] = "summer"
    season[(m >= 9) & (m <= 11)] = "autumn"
    df["season"] = season
    return df

drying_stats_season = add_season(drying_stats_Keilonkankaantie_Saaritie)

median_strict_by_season = (drying_stats_season.groupby("season")["drying_hours"].median())


print("Strict median drying by season (h):")
print(median_strict_by_season)

Strict median drying by season (h):
season
autumn    32.5
spring    26.0
summer    17.0
winter    94.0
Name: drying_hours, dtype: float64
