In [None]:
import pandas as pd
import numpy as np
import os

import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

import plotly.express as px
from dash import Dash, dcc, html, Input, Output, dash_table
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
locations = {
    'Saaritie': (62.136788, 25.762473),
    'Tuulimyllyntie': (62.221789, 25.695931),
    'Tähtiniementie': (62.011127, 25.552755),
    'Kaakkovuorentie': (62.294362, 25.800196),
    'Kotaniementie': (62.265705, 25.909542)}

# Geocoding setup:
geolocator = Nominatim(user_agent="sensor_map")
reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1)

# Creating the map:
sensors_location = folium.Map(location=[62.24, 25.75], zoom_start=12)
# Adding markers with street names:
for name, (lat, lon) in locations.items():
    location = reverse((lat, lon))
    street = location.raw['address'].get('road')
    folium.Marker([lat, lon], popup=f"{name}: {street}").add_to(sensors_location)

sensors_location

<h2><center>Kaakkovuorentie<h2>

In [None]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL WS100\Data\Kaakkovuorentie"
files = os.listdir(base)
files

['Kaakkovuorentie_202404-202406.csv',
 'Kaakkovuorentie_202407-202412.csv',
 'Kaakkovuorentie_202501-202506.csv',
 'Kaakkovuorentie_202507-202509.csv']

In [None]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
Kaakkovuorentie = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


Kaakkovuorentie["Timestamp"] = pd.to_datetime(Kaakkovuorentie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Kaakkovuorentie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Kaakkovuorentie = Kaakkovuorentie.sort_values(by="Timestamp").reset_index(drop=True)

NaT after parse: 0


In [None]:
Kaakkovuorentie.head()

Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2024-04-02 08:15:00,0.0,0.0,36.78,0.0,0
1,2024-04-02 08:25:02,0.0,0.0,36.78,0.0,0
2,2024-04-02 08:35:00,0.0,0.0,36.78,0.0,0
3,2024-04-02 08:45:02,0.0,0.0,36.78,0.0,0
4,2024-04-02 08:55:03,0.0,0.0,36.78,0.0,0


In [None]:
Kaakkovuorentie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101772 entries, 0 to 101771
Data columns (total 6 columns):
 #   Column                         Non-Null Count   Dtype         
---  ------                         --------------   -----         
 0   Timestamp                      101772 non-null  datetime64[ns]
 1   precipitationIntensity_mm_h    101772 non-null  float64       
 2   precipitationIntensity_mm_min  101772 non-null  float64       
 3   precipitationQuantityAbs_mm    101772 non-null  float64       
 4   precipitationQuantityDiff_mm   101772 non-null  float64       
 5   precipitationType              101772 non-null  int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 4.7 MB


In [None]:
Kaakkovuorentie.shape

(101772, 6)

In [None]:
# Check for duplicates
Kaakkovuorentie.duplicated().sum()


np.int64(26976)

In [None]:
# Let's find out start and end time of the data and the total days 
start_time = Kaakkovuorentie["Timestamp"].min()
end_time = Kaakkovuorentie["Timestamp"].max()
total_days = (end_time - start_time).days
print(f"Data starts from: {start_time}")
print(f"Data ends at: {end_time}")
print(f"Total days of data: {total_days} days")

Data starts from: 2024-04-02 08:15:00
Data ends at: 2025-09-18 09:49:27
Total days of data: 534 days


In [None]:
# Remove duplicates
Kaakkovuorentie = Kaakkovuorentie.drop_duplicates()
Kaakkovuorentie.shape


(74796, 6)

In [None]:
Kaakkovuorentie['precipitationType'].unique()

array([ 0, 67, 70, 69, 60])

In [None]:
df = Kaakkovuorentie.copy()

def map_event(code: int):
    # Your rules
    if code == 0:
        return "None"
    if code == 60:
        return "Rain"
    if 61 <= code <= 69:
        return "Mix"
    if code == 70:
        return "Snow"
    return "Other"

def prepare(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp")
    df["precipitationQuantityDiff_mm"] = df["precipitationQuantityDiff_mm"].clip(lower=0)
    df["event"] = df["precipitationType"].fillna(0).astype(int).map(map_event)

    # duration = time the row's event is "active" until the next timestamp
    dt = df["Timestamp"].shift(-1) - df["Timestamp"]
    dt_sec = dt.dt.total_seconds()

    # Fill last/invalid intervals with median positive step
    median_step = np.nanmedian(dt_sec[dt_sec > 0]) if np.any(dt_sec > 0) else 60.0
    dt_sec = np.where((dt_sec <= 0) | np.isnan(dt_sec), median_step, dt_sec)

    df["duration_h"] = dt_sec / 3600.0
    df.set_index("Timestamp", inplace=True)
    return df

df = prepare(df)

# Helper: aggregate by freq within range
FREQS = {"D": "Daily", "W-MON": "Weekly (Mon start)", "MS": "Monthly (Start)"}

def aggregate(df: pd.DataFrame, start: str, end: str, freq: str):
    sub = df.loc[start:end]
    g = pd.Grouper(freq=freq)

    # Duration by event
    dur_event = (
        sub.groupby([g, "event"])["duration_h"]
           .sum().reset_index()
    )

    # Precip by event
    mm_event = (
        sub.groupby([g, "event"])["precipitationQuantityDiff_mm"]
           .sum().reset_index()
    )

    # Total precip (for trend)
    mm_total = (
        sub["precipitationQuantityDiff_mm"]
           .resample(freq).sum().reset_index()
           .rename(columns={"precipitationQuantityDiff_mm":"total_mm", "Timestamp":"period"})
    )

    # Share % by event (time share)
    dur_total = dur_event.groupby("Timestamp")["duration_h"].sum().rename("dur_total")
    dur_event = dur_event.merge(dur_total, on="Timestamp", how="left")
    dur_event["share_%"] = np.where(dur_event["dur_total"] > 0,
                                    100 * dur_event["duration_h"] / dur_event["dur_total"], 0)

    # Round for display
    for col in ["duration_h", "share_%"]:
        dur_event[col] = dur_event[col].round(1)
    mm_event["precip_mm"] = mm_event["precipitationQuantityDiff_mm"].round(1)
    mm_event.drop(columns=["precipitationQuantityDiff_mm"], inplace=True)
    mm_total["total_mm"] = mm_total["total_mm"].round(1)

    return dur_event, mm_event, mm_total

# ------------------------------------------------------------
# 2) Dash app
app = Dash(__name__)
min_date = df.index.min().date()
max_date = df.index.max().date()

app.layout = html.Div(
    style={"fontFamily":"system-ui","padding":"16px","maxWidth":"1200px","margin":"0 auto"},
    children=[
        html.H2("WS100 – Dynamic Precipitation Dashboard"),
        html.Div([
            html.Div([
                html.Label("Date range"),
                dcc.DatePickerRange(
                    id="date-range", start_date=min_date, end_date=max_date,
                    min_date_allowed=min_date, max_date_allowed=max_date
                ),
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Aggregation"),
                dcc.Dropdown(
                    id="freq", value="MS",
                    options=[{"label":v, "value":k} for k,v in FREQS.items()],
                    clearable=False, style={"width":"220px"}
                )
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Metric"),
                dcc.RadioItems(
                    id="metric", value="duration",
                    options=[
                        {"label":"Duration (hours)", "value":"duration"},
                        {"label":"Precipitation (mm)", "value":"precip"},
                        {"label":"Time share (%)", "value":"share"},
                    ],
                    inline=True
                )
            ], style={"display":"inline-block"}),
        ], style={"marginBottom":"12px"}),

        dcc.Graph(id="stacked-graph", style={"height":"480px"}),
        dcc.Graph(id="total-graph", style={"height":"360px"}),

        html.H3("Summary (by period × event)"),
        dash_table.DataTable(
            id="summary-table",
            page_size=12,
            style_table={"overflowX":"auto"},
            style_cell={"padding":"6px", "fontSize":"14px"},
            style_header={"fontWeight":"600"}
        ),
        html.Div("Notes: Duration is computed from the time difference to the next timestamp; "
                 "‘Share’ is duration share per period. Precip negatives are clipped to 0.",
                 style={"marginTop":"8px","color":"#555"})
    ]
)

# ------------------------------------------------------------
# 3) Callbacks
@app.callback(
    [Output("stacked-graph","figure"),
     Output("total-graph","figure"),
     Output("summary-table","data"),
     Output("summary-table","columns")],
    [Input("date-range","start_date"),
     Input("date-range","end_date"),
     Input("freq","value"),
     Input("metric","value")]
)
def update(start_date, end_date, freq, metric):
    dur_event, mm_event, mm_total = aggregate(df, start_date, end_date, freq)

    # Build stacked bar data & labels
    if metric == "duration":
        ycol, ytitle = "duration_h", "Duration (hours)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})
    elif metric == "precip":
        ycol, ytitle = "precip_mm", "Precipitation (mm)"
        df_plot = mm_event.rename(columns={"Timestamp":"period"})
    else:  # share
        ycol, ytitle = "share_%", "Time share (%)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})

    fig_stack = px.bar(
        df_plot, x="period", y=ycol, color="event",
        barmode="stack", text_auto=".1f",
        labels={"period":"Period", ycol:ytitle, "event":"Event"},
        title=f"{FREQS.get(freq, freq)} — {ytitle}"
    )
    if metric == "share":
        fig_stack.update_yaxes(range=[0, 100])

    # Total precip trend
    fig_total = px.line(
        mm_total, x="period", y="total_mm",
        markers=True, labels={"period":"Period","total_mm":"Total Precip (mm)"},
        title=f"{FREQS.get(freq, freq)} — Total Precipitation"
    )

    # Summary table: pivot by period × event with both duration and precip
    # Merge duration + precip into one wide table
    pivot_dur = dur_event.pivot_table(index="Timestamp", columns="event", values="duration_h", fill_value=0)
    pivot_mm  = mm_event.pivot_table(index="Timestamp", columns="event", values="precip_mm",  fill_value=0)
    out = pd.concat(
        {"Dur(h)": pivot_dur, "MM": pivot_mm},
        axis=1
    ).reset_index().rename(columns={"Timestamp":"period"})

    # Round nicely and format period
    out = out.sort_values("period")
    out["period"] = out["period"].dt.strftime("%Y-%m-%d")

    columns = [{"name": col if isinstance(col, str) else " / ".join(col), "id": col if isinstance(col, str) else " / ".join(col)}
               for col in out.columns]
    out.columns = [c["id"] for c in columns]
    data = out.round(1).to_dict("records")

    return fig_stack, fig_total, data, columns


# ------------------------------------------------------------
if __name__ == "__main__":
    app.run(debug=True)


<h2><center>Kotaniementie<h3>

In [None]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL WS100\Data\Kotaniementie"
files = os.listdir(base)
files

['Kotaniementie_202101-202106.csv',
 'Kotaniementie_202107-202112.csv',
 'Kotaniementie_202201-202206.csv',
 'Kotaniementie_202207-202212.csv',
 'Kotaniementie_202301-202306.csv',
 'Kotaniementie_202306-202312.csv',
 'Kotaniementie_202401-202406.csv',
 'Kotaniementie_202406-202412.csv',
 'Kotaniementie_202501-202509.csv']

In [None]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
Kotaniementie = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


Kotaniementie["Timestamp"] = pd.to_datetime(Kotaniementie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Kotaniementie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Kotaniementie = Kotaniementie.sort_values(by="Timestamp").reset_index(drop=True)

NaT after parse: 0


In [None]:
Kotaniementie.head()

Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:03:44,0.0,0.0,250.46,0.0,0
1,2021-01-01 00:13:44,0.1,0.002,250.46,0.0,70
2,2021-01-01 00:23:44,0.1,0.002,250.48,0.02,70
3,2021-01-01 00:33:45,0.1,0.002,250.49,0.01,70
4,2021-01-01 00:43:44,0.1,0.002,250.51,0.02,70


In [None]:
Kotaniementie.shape

(330119, 6)

In [None]:
# Check for duplicates
Kotaniementie.duplicated().sum()

np.int64(80405)

In [None]:
# remove duplicates
Kotaniementie = Kotaniementie.drop_duplicates()
Kotaniementie.shape

(249714, 6)

In [None]:
# Let's find out start and end time of the data and the total days 
start_time = Kotaniementie["Timestamp"].min()
end_time = Kotaniementie["Timestamp"].max()
total_days = (end_time - start_time).days
print(f"Data starts from: {start_time}")
print(f"Data ends at: {end_time}")
print(f"Total days of data: {total_days} days")

Data starts from: 2021-01-01 00:03:44
Data ends at: 2025-09-18 10:06:27
Total days of data: 1721 days


In [None]:
Kotaniementie['precipitationType'].unique()

array([ 0, 70, 69, 67, 60])

In [None]:
df = Kotaniementie.copy()

def map_event(code: int):
    # Your rules
    if code == 0:
        return "None"
    if code == 60:
        return "Rain"
    if 61 <= code <= 69:
        return "Mix"
    if code == 70:
        return "Snow"
    return "Other"

def prepare(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp")
    df["precipitationQuantityDiff_mm"] = df["precipitationQuantityDiff_mm"].clip(lower=0)
    df["event"] = df["precipitationType"].fillna(0).astype(int).map(map_event)

    # duration = time the row's event is "active" until the next timestamp
    dt = df["Timestamp"].shift(-1) - df["Timestamp"]
    dt_sec = dt.dt.total_seconds()

    # Fill last/invalid intervals with median positive step
    median_step = np.nanmedian(dt_sec[dt_sec > 0]) if np.any(dt_sec > 0) else 60.0
    dt_sec = np.where((dt_sec <= 0) | np.isnan(dt_sec), median_step, dt_sec)

    df["duration_h"] = dt_sec / 3600.0
    df.set_index("Timestamp", inplace=True)
    return df

df = prepare(df)

# Helper: aggregate by freq within range
FREQS = {"D": "Daily", "W-MON": "Weekly (Mon start)", "MS": "Monthly (Start)"}

def aggregate(df: pd.DataFrame, start: str, end: str, freq: str):
    sub = df.loc[start:end]
    g = pd.Grouper(freq=freq)

    # Duration by event
    dur_event = (
        sub.groupby([g, "event"])["duration_h"]
           .sum().reset_index()
    )

    # Precip by event
    mm_event = (
        sub.groupby([g, "event"])["precipitationQuantityDiff_mm"]
           .sum().reset_index()
    )

    # Total precip (for trend)
    mm_total = (
        sub["precipitationQuantityDiff_mm"]
           .resample(freq).sum().reset_index()
           .rename(columns={"precipitationQuantityDiff_mm":"total_mm", "Timestamp":"period"})
    )

    # Share % by event (time share)
    dur_total = dur_event.groupby("Timestamp")["duration_h"].sum().rename("dur_total")
    dur_event = dur_event.merge(dur_total, on="Timestamp", how="left")
    dur_event["share_%"] = np.where(dur_event["dur_total"] > 0,
                                    100 * dur_event["duration_h"] / dur_event["dur_total"], 0)

    # Round for display
    for col in ["duration_h", "share_%"]:
        dur_event[col] = dur_event[col].round(1)
    mm_event["precip_mm"] = mm_event["precipitationQuantityDiff_mm"].round(1)
    mm_event.drop(columns=["precipitationQuantityDiff_mm"], inplace=True)
    mm_total["total_mm"] = mm_total["total_mm"].round(1)

    return dur_event, mm_event, mm_total

# ------------------------------------------------------------
# 2) Dash app
app = Dash(__name__)
min_date = df.index.min().date()
max_date = df.index.max().date()

app.layout = html.Div(
    style={"fontFamily":"system-ui","padding":"16px","maxWidth":"1200px","margin":"0 auto"},
    children=[
        html.H2("WS100 – Dynamic Precipitation Dashboard"),
        html.Div([
            html.Div([
                html.Label("Date range"),
                dcc.DatePickerRange(
                    id="date-range", start_date=min_date, end_date=max_date,
                    min_date_allowed=min_date, max_date_allowed=max_date
                ),
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Aggregation"),
                dcc.Dropdown(
                    id="freq", value="MS",
                    options=[{"label":v, "value":k} for k,v in FREQS.items()],
                    clearable=False, style={"width":"220px"}
                )
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Metric"),
                dcc.RadioItems(
                    id="metric", value="duration",
                    options=[
                        {"label":"Duration (hours)", "value":"duration"},
                        {"label":"Precipitation (mm)", "value":"precip"},
                        {"label":"Time share (%)", "value":"share"},
                    ],
                    inline=True
                )
            ], style={"display":"inline-block"}),
        ], style={"marginBottom":"12px"}),

        dcc.Graph(id="stacked-graph", style={"height":"480px"}),
        dcc.Graph(id="total-graph", style={"height":"360px"}),

        html.H3("Summary (by period × event)"),
        dash_table.DataTable(
            id="summary-table",
            page_size=12,
            style_table={"overflowX":"auto"},
            style_cell={"padding":"6px", "fontSize":"14px"},
            style_header={"fontWeight":"600"}
        ),
        html.Div("Notes: Duration is computed from the time difference to the next timestamp; "
                 "‘Share’ is duration share per period. Precip negatives are clipped to 0.",
                 style={"marginTop":"8px","color":"#555"})
    ]
)

# ------------------------------------------------------------
# 3) Callbacks
@app.callback(
    [Output("stacked-graph","figure"),
     Output("total-graph","figure"),
     Output("summary-table","data"),
     Output("summary-table","columns")],
    [Input("date-range","start_date"),
     Input("date-range","end_date"),
     Input("freq","value"),
     Input("metric","value")]
)
def update(start_date, end_date, freq, metric):
    dur_event, mm_event, mm_total = aggregate(df, start_date, end_date, freq)

    # Build stacked bar data & labels
    if metric == "duration":
        ycol, ytitle = "duration_h", "Duration (hours)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})
    elif metric == "precip":
        ycol, ytitle = "precip_mm", "Precipitation (mm)"
        df_plot = mm_event.rename(columns={"Timestamp":"period"})
    else:  # share
        ycol, ytitle = "share_%", "Time share (%)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})

    fig_stack = px.bar(
        df_plot, x="period", y=ycol, color="event",
        barmode="stack", text_auto=".1f",
        labels={"period":"Period", ycol:ytitle, "event":"Event"},
        title=f"{FREQS.get(freq, freq)} — {ytitle}"
    )
    if metric == "share":
        fig_stack.update_yaxes(range=[0, 100])

    # Total precip trend
    fig_total = px.line(
        mm_total, x="period", y="total_mm",
        markers=True, labels={"period":"Period","total_mm":"Total Precip (mm)"},
        title=f"{FREQS.get(freq, freq)} — Total Precipitation"
    )

    # Summary table: pivot by period × event with both duration and precip
    # Merge duration + precip into one wide table
    pivot_dur = dur_event.pivot_table(index="Timestamp", columns="event", values="duration_h", fill_value=0)
    pivot_mm  = mm_event.pivot_table(index="Timestamp", columns="event", values="precip_mm",  fill_value=0)
    out = pd.concat(
        {"Dur(h)": pivot_dur, "MM": pivot_mm},
        axis=1
    ).reset_index().rename(columns={"Timestamp":"period"})

    # Round nicely and format period
    out = out.sort_values("period")
    out["period"] = out["period"].dt.strftime("%Y-%m-%d")

    columns = [{"name": col if isinstance(col, str) else " / ".join(col), "id": col if isinstance(col, str) else " / ".join(col)}
               for col in out.columns]
    out.columns = [c["id"] for c in columns]
    data = out.round(1).to_dict("records")

    return fig_stack, fig_total, data, columns


# ------------------------------------------------------------
if __name__ == "__main__":
    app.run(debug=True)


<h2><center>Saaritie<h2>

In [None]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL WS100\Data\Saaritie"
files = os.listdir(base)
files

['Saaritie_202101-202106.csv',
 'Saaritie_202107-202112.csv',
 'Saaritie_202201-202206.csv',
 'Saaritie_202207-202212.csv',
 'Saaritie_202301-202306.csv',
 'Saaritie_202307-202312.csv',
 'Saaritie_202401-202406.csv',
 'Saaritie_202407-202412.csv',
 'Saaritie_202501-202509.csv']

In [None]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
Saaritie = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


Saaritie["Timestamp"] = pd.to_datetime(Saaritie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Saaritie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Saaritie = Saaritie.sort_values(by="Timestamp").reset_index(drop=True)
Saaritie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:00:40,0.1,0.002,160.92,0.02,70
1,2021-01-01 00:10:46,0.3,0.005,160.94,0.02,70
2,2021-01-01 00:20:41,0.1,0.002,160.96,0.02,70
3,2021-01-01 00:30:50,0.0,0.0,160.97,0.01,69
4,2021-01-01 00:40:43,0.0,0.0,160.97,0.0,0


In [None]:
Saaritie.shape

(326985, 6)

In [None]:
Saaritie.duplicated().sum()

np.int64(82397)

In [None]:
Saaritie = Saaritie.drop_duplicates()
Saaritie.shape

(244588, 6)

In [None]:
# Let's find out start and end time of the data and the total days 
start_time = Saaritie["Timestamp"].min()
end_time = Saaritie["Timestamp"].max()
total_days = (end_time - start_time).days
print(f"Data starts from: {start_time}")
print(f"Data ends at: {end_time}")
print(f"Total days of data: {total_days} days")

Data starts from: 2021-01-01 00:00:40
Data ends at: 2025-09-18 09:29:53
Total days of data: 1721 days


In [None]:
df = Saaritie.copy()

def map_event(code: int):
    # Your rules
    if code == 0:
        return "None"
    if code == 60:
        return "Rain"
    if 61 <= code <= 69:
        return "Mix"
    if code == 70:
        return "Snow"
    return "Other"

def prepare(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp")
    df["precipitationQuantityDiff_mm"] = df["precipitationQuantityDiff_mm"].clip(lower=0)
    df["event"] = df["precipitationType"].fillna(0).astype(int).map(map_event)

    # duration = time the row's event is "active" until the next timestamp
    dt = df["Timestamp"].shift(-1) - df["Timestamp"]
    dt_sec = dt.dt.total_seconds()

    # Fill last/invalid intervals with median positive step
    median_step = np.nanmedian(dt_sec[dt_sec > 0]) if np.any(dt_sec > 0) else 60.0
    dt_sec = np.where((dt_sec <= 0) | np.isnan(dt_sec), median_step, dt_sec)

    df["duration_h"] = dt_sec / 3600.0
    df.set_index("Timestamp", inplace=True)
    return df

df = prepare(df)

# Helper: aggregate by freq within range
FREQS = {"D": "Daily", "W-MON": "Weekly (Mon start)", "MS": "Monthly (Start)"}

def aggregate(df: pd.DataFrame, start: str, end: str, freq: str):
    sub = df.loc[start:end]
    g = pd.Grouper(freq=freq)

    # Duration by event
    dur_event = (
        sub.groupby([g, "event"])["duration_h"]
           .sum().reset_index()
    )

    # Precip by event
    mm_event = (
        sub.groupby([g, "event"])["precipitationQuantityDiff_mm"]
           .sum().reset_index()
    )

    # Total precip (for trend)
    mm_total = (
        sub["precipitationQuantityDiff_mm"]
           .resample(freq).sum().reset_index()
           .rename(columns={"precipitationQuantityDiff_mm":"total_mm", "Timestamp":"period"})
    )

    # Share % by event (time share)
    dur_total = dur_event.groupby("Timestamp")["duration_h"].sum().rename("dur_total")
    dur_event = dur_event.merge(dur_total, on="Timestamp", how="left")
    dur_event["share_%"] = np.where(dur_event["dur_total"] > 0,
                                    100 * dur_event["duration_h"] / dur_event["dur_total"], 0)

    # Round for display
    for col in ["duration_h", "share_%"]:
        dur_event[col] = dur_event[col].round(1)
    mm_event["precip_mm"] = mm_event["precipitationQuantityDiff_mm"].round(1)
    mm_event.drop(columns=["precipitationQuantityDiff_mm"], inplace=True)
    mm_total["total_mm"] = mm_total["total_mm"].round(1)

    return dur_event, mm_event, mm_total

# ------------------------------------------------------------
# 2) Dash app
app = Dash(__name__)
min_date = df.index.min().date()
max_date = df.index.max().date()

app.layout = html.Div(
    style={"fontFamily":"system-ui","padding":"16px","maxWidth":"1200px","margin":"0 auto"},
    children=[
        html.H2("WS100 – Dynamic Precipitation Dashboard"),
        html.Div([
            html.Div([
                html.Label("Date range"),
                dcc.DatePickerRange(
                    id="date-range", start_date=min_date, end_date=max_date,
                    min_date_allowed=min_date, max_date_allowed=max_date
                ),
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Aggregation"),
                dcc.Dropdown(
                    id="freq", value="MS",
                    options=[{"label":v, "value":k} for k,v in FREQS.items()],
                    clearable=False, style={"width":"220px"}
                )
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Metric"),
                dcc.RadioItems(
                    id="metric", value="duration",
                    options=[
                        {"label":"Duration (hours)", "value":"duration"},
                        {"label":"Precipitation (mm)", "value":"precip"},
                        {"label":"Time share (%)", "value":"share"},
                    ],
                    inline=True
                )
            ], style={"display":"inline-block"}),
        ], style={"marginBottom":"12px"}),

        dcc.Graph(id="stacked-graph", style={"height":"480px"}),
        dcc.Graph(id="total-graph", style={"height":"360px"}),

        html.H3("Summary (by period × event)"),
        dash_table.DataTable(
            id="summary-table",
            page_size=12,
            style_table={"overflowX":"auto"},
            style_cell={"padding":"6px", "fontSize":"14px"},
            style_header={"fontWeight":"600"}
        ),
        html.Div("Notes: Duration is computed from the time difference to the next timestamp; "
                 "‘Share’ is duration share per period. Precip negatives are clipped to 0.",
                 style={"marginTop":"8px","color":"#555"})
    ]
)

# ------------------------------------------------------------
# 3) Callbacks
@app.callback(
    [Output("stacked-graph","figure"),
     Output("total-graph","figure"),
     Output("summary-table","data"),
     Output("summary-table","columns")],
    [Input("date-range","start_date"),
     Input("date-range","end_date"),
     Input("freq","value"),
     Input("metric","value")]
)
def update(start_date, end_date, freq, metric):
    dur_event, mm_event, mm_total = aggregate(df, start_date, end_date, freq)

    # Build stacked bar data & labels
    if metric == "duration":
        ycol, ytitle = "duration_h", "Duration (hours)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})
    elif metric == "precip":
        ycol, ytitle = "precip_mm", "Precipitation (mm)"
        df_plot = mm_event.rename(columns={"Timestamp":"period"})
    else:  # share
        ycol, ytitle = "share_%", "Time share (%)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})

    fig_stack = px.bar(
        df_plot, x="period", y=ycol, color="event",
        barmode="stack", text_auto=".1f",
        labels={"period":"Period", ycol:ytitle, "event":"Event"},
        title=f"{FREQS.get(freq, freq)} — {ytitle}"
    )
    if metric == "share":
        fig_stack.update_yaxes(range=[0, 100])

    # Total precip trend
    fig_total = px.line(
        mm_total, x="period", y="total_mm",
        markers=True, labels={"period":"Period","total_mm":"Total Precip (mm)"},
        title=f"{FREQS.get(freq, freq)} — Total Precipitation"
    )

    # Summary table: pivot by period × event with both duration and precip
    # Merge duration + precip into one wide table
    pivot_dur = dur_event.pivot_table(index="Timestamp", columns="event", values="duration_h", fill_value=0)
    pivot_mm  = mm_event.pivot_table(index="Timestamp", columns="event", values="precip_mm",  fill_value=0)
    out = pd.concat(
        {"Dur(h)": pivot_dur, "MM": pivot_mm},
        axis=1
    ).reset_index().rename(columns={"Timestamp":"period"})

    # Round nicely and format period
    out = out.sort_values("period")
    out["period"] = out["period"].dt.strftime("%Y-%m-%d")

    columns = [{"name": col if isinstance(col, str) else " / ".join(col), "id": col if isinstance(col, str) else " / ".join(col)}
               for col in out.columns]
    out.columns = [c["id"] for c in columns]
    data = out.round(1).to_dict("records")

    return fig_stack, fig_total, data, columns


# ------------------------------------------------------------
if __name__ == "__main__":
    app.run(debug=True)


<h2><center>Tahtiniementie<h2>

In [None]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL WS100\Data\Tähtiniementie"
files = os.listdir(base)
files

['Tähtiniementie_202101-202106.csv',
 'Tähtiniementie_202107-202112.csv',
 'Tähtiniementie_202201-202206.csv',
 'Tähtiniementie_202207-202212.csv',
 'Tähtiniementie_202301-202306.csv',
 'Tähtiniementie_202307-202312.csv',
 'Tähtiniementie_202401-202406.csv',
 'Tähtiniementie_202407-202412.csv',
 'Tähtiniementie_202501-202509.csv']

In [None]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
Tahtiniementie = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


Tahtiniementie["Timestamp"] = pd.to_datetime(Tahtiniementie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Tahtiniementie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Tahtiniementie = Tahtiniementie.sort_values(by="Timestamp").reset_index(drop=True)
Tahtiniementie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:00:43,0.1,0.002,33.73,0.02,70
1,2021-01-01 00:10:44,0.2,0.003,33.75,0.02,70
2,2021-01-01 00:20:43,0.0,0.0,33.76,0.01,70
3,2021-01-01 00:30:44,0.1,0.002,33.77,0.01,70
4,2021-01-01 00:40:44,0.2,0.003,33.78,0.01,70


In [None]:
Tahtiniementie.shape

(304151, 6)

In [None]:
Tahtiniementie.duplicated().sum()

np.int64(57908)

In [None]:
Tahtiniementie = Tahtiniementie.drop_duplicates()
Tahtiniementie.shape

(246243, 6)

In [None]:
# Let's find out start and end time of the data and the total days 
start_time = Tahtiniementie["Timestamp"].min()
end_time = Tahtiniementie["Timestamp"].max()
total_days = (end_time - start_time).days
print(f"Data starts from: {start_time}")
print(f"Data ends at: {end_time}")
print(f"Total days of data: {total_days} days")

Data starts from: 2021-01-01 00:00:43
Data ends at: 2025-09-18 10:40:12
Total days of data: 1721 days


In [None]:
df = Tahtiniementie.copy()

def map_event(code: int):
    # Your rules
    if code == 0:
        return "None"
    if code == 60:
        return "Rain"
    if 61 <= code <= 69:
        return "Mix"
    if code == 70:
        return "Snow"
    return "Other"

def prepare(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp")
    df["precipitationQuantityDiff_mm"] = df["precipitationQuantityDiff_mm"].clip(lower=0)
    df["event"] = df["precipitationType"].fillna(0).astype(int).map(map_event)

    # duration = time the row's event is "active" until the next timestamp
    dt = df["Timestamp"].shift(-1) - df["Timestamp"]
    dt_sec = dt.dt.total_seconds()

    # Fill last/invalid intervals with median positive step
    median_step = np.nanmedian(dt_sec[dt_sec > 0]) if np.any(dt_sec > 0) else 60.0
    dt_sec = np.where((dt_sec <= 0) | np.isnan(dt_sec), median_step, dt_sec)

    df["duration_h"] = dt_sec / 3600.0
    df.set_index("Timestamp", inplace=True)
    return df

df = prepare(df)

# Helper: aggregate by freq within range
FREQS = {"D": "Daily", "W-MON": "Weekly (Mon start)", "MS": "Monthly (Start)"}

def aggregate(df: pd.DataFrame, start: str, end: str, freq: str):
    sub = df.loc[start:end]
    g = pd.Grouper(freq=freq)

    # Duration by event
    dur_event = (
        sub.groupby([g, "event"])["duration_h"]
           .sum().reset_index()
    )

    # Precip by event
    mm_event = (
        sub.groupby([g, "event"])["precipitationQuantityDiff_mm"]
           .sum().reset_index()
    )

    # Total precip (for trend)
    mm_total = (
        sub["precipitationQuantityDiff_mm"]
           .resample(freq).sum().reset_index()
           .rename(columns={"precipitationQuantityDiff_mm":"total_mm", "Timestamp":"period"})
    )

    # Share % by event (time share)
    dur_total = dur_event.groupby("Timestamp")["duration_h"].sum().rename("dur_total")
    dur_event = dur_event.merge(dur_total, on="Timestamp", how="left")
    dur_event["share_%"] = np.where(dur_event["dur_total"] > 0,
                                    100 * dur_event["duration_h"] / dur_event["dur_total"], 0)

    # Round for display
    for col in ["duration_h", "share_%"]:
        dur_event[col] = dur_event[col].round(1)
    mm_event["precip_mm"] = mm_event["precipitationQuantityDiff_mm"].round(1)
    mm_event.drop(columns=["precipitationQuantityDiff_mm"], inplace=True)
    mm_total["total_mm"] = mm_total["total_mm"].round(1)

    return dur_event, mm_event, mm_total

# ------------------------------------------------------------
# 2) Dash app
app = Dash(__name__)
min_date = df.index.min().date()
max_date = df.index.max().date()

app.layout = html.Div(
    style={"fontFamily":"system-ui","padding":"16px","maxWidth":"1200px","margin":"0 auto"},
    children=[
        html.H2("WS100 – Dynamic Precipitation Dashboard"),
        html.Div([
            html.Div([
                html.Label("Date range"),
                dcc.DatePickerRange(
                    id="date-range", start_date=min_date, end_date=max_date,
                    min_date_allowed=min_date, max_date_allowed=max_date
                ),
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Aggregation"),
                dcc.Dropdown(
                    id="freq", value="MS",
                    options=[{"label":v, "value":k} for k,v in FREQS.items()],
                    clearable=False, style={"width":"220px"}
                )
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Metric"),
                dcc.RadioItems(
                    id="metric", value="duration",
                    options=[
                        {"label":"Duration (hours)", "value":"duration"},
                        {"label":"Precipitation (mm)", "value":"precip"},
                        {"label":"Time share (%)", "value":"share"},
                    ],
                    inline=True
                )
            ], style={"display":"inline-block"}),
        ], style={"marginBottom":"12px"}),

        dcc.Graph(id="stacked-graph", style={"height":"480px"}),
        dcc.Graph(id="total-graph", style={"height":"360px"}),

        html.H3("Summary (by period × event)"),
        dash_table.DataTable(
            id="summary-table",
            page_size=12,
            style_table={"overflowX":"auto"},
            style_cell={"padding":"6px", "fontSize":"14px"},
            style_header={"fontWeight":"600"}
        ),
        html.Div("Notes: Duration is computed from the time difference to the next timestamp; "
                 "‘Share’ is duration share per period. Precip negatives are clipped to 0.",
                 style={"marginTop":"8px","color":"#555"})
    ]
)

# ------------------------------------------------------------
# 3) Callbacks
@app.callback(
    [Output("stacked-graph","figure"),
     Output("total-graph","figure"),
     Output("summary-table","data"),
     Output("summary-table","columns")],
    [Input("date-range","start_date"),
     Input("date-range","end_date"),
     Input("freq","value"),
     Input("metric","value")]
)
def update(start_date, end_date, freq, metric):
    dur_event, mm_event, mm_total = aggregate(df, start_date, end_date, freq)

    # Build stacked bar data & labels
    if metric == "duration":
        ycol, ytitle = "duration_h", "Duration (hours)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})
    elif metric == "precip":
        ycol, ytitle = "precip_mm", "Precipitation (mm)"
        df_plot = mm_event.rename(columns={"Timestamp":"period"})
    else:  # share
        ycol, ytitle = "share_%", "Time share (%)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})

    fig_stack = px.bar(
        df_plot, x="period", y=ycol, color="event",
        barmode="stack", text_auto=".1f",
        labels={"period":"Period", ycol:ytitle, "event":"Event"},
        title=f"{FREQS.get(freq, freq)} — {ytitle}"
    )
    if metric == "share":
        fig_stack.update_yaxes(range=[0, 100])

    # Total precip trend
    fig_total = px.line(
        mm_total, x="period", y="total_mm",
        markers=True, labels={"period":"Period","total_mm":"Total Precip (mm)"},
        title=f"{FREQS.get(freq, freq)} — Total Precipitation"
    )

    # Summary table: pivot by period × event with both duration and precip
    # Merge duration + precip into one wide table
    pivot_dur = dur_event.pivot_table(index="Timestamp", columns="event", values="duration_h", fill_value=0)
    pivot_mm  = mm_event.pivot_table(index="Timestamp", columns="event", values="precip_mm",  fill_value=0)
    out = pd.concat(
        {"Dur(h)": pivot_dur, "MM": pivot_mm},
        axis=1
    ).reset_index().rename(columns={"Timestamp":"period"})

    # Round nicely and format period
    out = out.sort_values("period")
    out["period"] = out["period"].dt.strftime("%Y-%m-%d")

    columns = [{"name": col if isinstance(col, str) else " / ".join(col), "id": col if isinstance(col, str) else " / ".join(col)}
               for col in out.columns]
    out.columns = [c["id"] for c in columns]
    data = out.round(1).to_dict("records")

    return fig_stack, fig_total, data, columns

if __name__ == "__main__":
    app.run(debug=True)


<h2><center>Tuulimyllyntie<h2>

In [None]:
base = r"D:\Fiveth\Project_semester_automn_2025\Marjetas_Data\Marjetas_Data\JKL WS100\Data\Tuulimyllyntie"
files = os.listdir(base)
files

['Tuulimyllyntie_202101-202106.csv',
 'Tuulimyllyntie_202107-202112.csv',
 'Tuulimyllyntie_202201-202206.csv',
 'Tuulimyllyntie_202207-202212.csv',
 'Tuulimyllyntie_202301-202306.csv',
 'Tuulimyllyntie_202307-202312.csv',
 'Tuulimyllyntie_202401-202406.csv',
 'Tuulimyllyntie_202407-202412.csv',
 'Tuulimyllyntie_202501-202509.csv']

In [None]:
file_path = [os.path.join(base, f) for f in os.listdir(base) if f.endswith('.csv')]
Tuulimyllyntie = pd.concat((pd.read_csv(f, sep=';') for f in file_path), ignore_index=True)


Tuulimyllyntie["Timestamp"] = pd.to_datetime(Tuulimyllyntie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", Tuulimyllyntie["Timestamp"].isna().sum())
# Sort the ds by timestamp
Tuulimyllyntie = Tuulimyllyntie.sort_values(by="Timestamp").reset_index(drop=True)
Tuulimyllyntie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2021-01-01 00:03:41,0.1,0.002,507.53,0.02,70
1,2021-01-01 00:13:42,0.0,0.0,507.54,0.01,0
2,2021-01-01 00:23:41,0.0,0.0,507.54,0.0,0
3,2021-01-01 00:33:41,0.0,0.0,507.54,0.0,0
4,2021-01-01 00:43:42,0.0,0.0,507.54,0.0,0


In [None]:
Tuulimyllyntie.shape

(340081, 6)

In [None]:
Tuulimyllyntie.duplicated().sum()

np.int64(90494)

In [None]:
Tuulimyllyntie = Tuulimyllyntie.drop_duplicates()
Tuulimyllyntie.shape

(249587, 6)

In [None]:
# Let's find out start and end time of the data and the total days 
start_time = Tuulimyllyntie["Timestamp"].min()
end_time = Tuulimyllyntie["Timestamp"].max()
total_days = (end_time - start_time).days
print(f"Data starts from: {start_time}")
print(f"Data ends at: {end_time}")
print(f"Total days of data: {total_days} days")

Data starts from: 2021-01-01 00:03:41
Data ends at: 2025-09-18 10:24:11
Total days of data: 1721 days


In [None]:
df = Tuulimyllyntie.copy()

def map_event(code: int):
    # Your rules
    if code == 0:
        return "None"
    if code == 60:
        return "Rain"
    if 61 <= code <= 69:
        return "Mix"
    if code == 70:
        return "Snow"
    return "Other"

def prepare(df_raw: pd.DataFrame) -> pd.DataFrame:
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp")
    df["precipitationQuantityDiff_mm"] = df["precipitationQuantityDiff_mm"].clip(lower=0)
    df["event"] = df["precipitationType"].fillna(0).astype(int).map(map_event)

    # duration = time the row's event is "active" until the next timestamp
    dt = df["Timestamp"].shift(-1) - df["Timestamp"]
    dt_sec = dt.dt.total_seconds()

    # Fill last/invalid intervals with median positive step
    median_step = np.nanmedian(dt_sec[dt_sec > 0]) if np.any(dt_sec > 0) else 60.0
    dt_sec = np.where((dt_sec <= 0) | np.isnan(dt_sec), median_step, dt_sec)

    df["duration_h"] = dt_sec / 3600.0
    df.set_index("Timestamp", inplace=True)
    return df

df = prepare(df)

# Helper: aggregate by freq within range
FREQS = {"D": "Daily", "W-MON": "Weekly (Mon start)", "MS": "Monthly (Start)"}

def aggregate(df: pd.DataFrame, start: str, end: str, freq: str):
    sub = df.loc[start:end]
    g = pd.Grouper(freq=freq)

    # Duration by event
    dur_event = (
        sub.groupby([g, "event"])["duration_h"]
           .sum().reset_index()
    )

    # Precip by event
    mm_event = (
        sub.groupby([g, "event"])["precipitationQuantityDiff_mm"]
           .sum().reset_index()
    )

    # Total precip (for trend)
    mm_total = (
        sub["precipitationQuantityDiff_mm"]
           .resample(freq).sum().reset_index()
           .rename(columns={"precipitationQuantityDiff_mm":"total_mm", "Timestamp":"period"})
    )

    # Share % by event (time share)
    dur_total = dur_event.groupby("Timestamp")["duration_h"].sum().rename("dur_total")
    dur_event = dur_event.merge(dur_total, on="Timestamp", how="left")
    dur_event["share_%"] = np.where(dur_event["dur_total"] > 0,
                                    100 * dur_event["duration_h"] / dur_event["dur_total"], 0)

    # Round for display
    for col in ["duration_h", "share_%"]:
        dur_event[col] = dur_event[col].round(1)
    mm_event["precip_mm"] = mm_event["precipitationQuantityDiff_mm"].round(1)
    mm_event.drop(columns=["precipitationQuantityDiff_mm"], inplace=True)
    mm_total["total_mm"] = mm_total["total_mm"].round(1)

    return dur_event, mm_event, mm_total

# ------------------------------------------------------------
# 2) Dash app
app = Dash(__name__)
min_date = df.index.min().date()
max_date = df.index.max().date()

app.layout = html.Div(
    style={"fontFamily":"system-ui","padding":"16px","maxWidth":"1200px","margin":"0 auto"},
    children=[
        html.H2("WS100 – Dynamic Precipitation Dashboard"),
        html.Div([
            html.Div([
                html.Label("Date range"),
                dcc.DatePickerRange(
                    id="date-range", start_date=min_date, end_date=max_date,
                    min_date_allowed=min_date, max_date_allowed=max_date
                ),
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Aggregation"),
                dcc.Dropdown(
                    id="freq", value="MS",
                    options=[{"label":v, "value":k} for k,v in FREQS.items()],
                    clearable=False, style={"width":"220px"}
                )
            ], style={"display":"inline-block","marginRight":"24px"}),

            html.Div([
                html.Label("Metric"),
                dcc.RadioItems(
                    id="metric", value="duration",
                    options=[
                        {"label":"Duration (hours)", "value":"duration"},
                        {"label":"Precipitation (mm)", "value":"precip"},
                        {"label":"Time share (%)", "value":"share"},
                    ],
                    inline=True
                )
            ], style={"display":"inline-block"}),
        ], style={"marginBottom":"12px"}),

        dcc.Graph(id="stacked-graph", style={"height":"480px"}),
        dcc.Graph(id="total-graph", style={"height":"360px"}),

        html.H3("Summary (by period × event)"),
        dash_table.DataTable(
            id="summary-table",
            page_size=12,
            style_table={"overflowX":"auto"},
            style_cell={"padding":"6px", "fontSize":"14px"},
            style_header={"fontWeight":"600"}
        ),
        html.Div("Notes: Duration is computed from the time difference to the next timestamp; "
                 "‘Share’ is duration share per period. Precip negatives are clipped to 0.",
                 style={"marginTop":"8px","color":"#555"})
    ]
)

# ------------------------------------------------------------
# 3) Callbacks
@app.callback(
    [Output("stacked-graph","figure"),
     Output("total-graph","figure"),
     Output("summary-table","data"),
     Output("summary-table","columns")],
    [Input("date-range","start_date"),
     Input("date-range","end_date"),
     Input("freq","value"),
     Input("metric","value")]
)
def update(start_date, end_date, freq, metric):
    dur_event, mm_event, mm_total = aggregate(df, start_date, end_date, freq)

    # Build stacked bar data & labels
    if metric == "duration":
        ycol, ytitle = "duration_h", "Duration (hours)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})
    elif metric == "precip":
        ycol, ytitle = "precip_mm", "Precipitation (mm)"
        df_plot = mm_event.rename(columns={"Timestamp":"period"})
    else:  # share
        ycol, ytitle = "share_%", "Time share (%)"
        df_plot = dur_event.rename(columns={"Timestamp":"period"})

    fig_stack = px.bar(
        df_plot, x="period", y=ycol, color="event",
        barmode="stack", text_auto=".1f",
        labels={"period":"Period", ycol:ytitle, "event":"Event"},
        title=f"{FREQS.get(freq, freq)} — {ytitle}"
    )
    if metric == "share":
        fig_stack.update_yaxes(range=[0, 100])

    # Total precip trend
    fig_total = px.line(
        mm_total, x="period", y="total_mm",
        markers=True, labels={"period":"Period","total_mm":"Total Precip (mm)"},
        title=f"{FREQS.get(freq, freq)} — Total Precipitation"
    )

    # Summary table: pivot by period × event with both duration and precip
    # Merge duration + precip into one wide table
    pivot_dur = dur_event.pivot_table(index="Timestamp", columns="event", values="duration_h", fill_value=0)
    pivot_mm  = mm_event.pivot_table(index="Timestamp", columns="event", values="precip_mm",  fill_value=0)
    out = pd.concat(
        {"Dur(h)": pivot_dur, "MM": pivot_mm},
        axis=1
    ).reset_index().rename(columns={"Timestamp":"period"})

    # Round nicely and format period
    out = out.sort_values("period")
    out["period"] = out["period"].dt.strftime("%Y-%m-%d")

    columns = [{"name": col if isinstance(col, str) else " / ".join(col), "id": col if isinstance(col, str) else " / ".join(col)}
               for col in out.columns]
    out.columns = [c["id"] for c in columns]
    data = out.round(1).to_dict("records")

    return fig_stack, fig_total, data, columns

if __name__ == "__main__":
    app.run(debug=True)
