# River Analysis - Discharge and Sediment Load

This notebook contains two bubble charts analyzing global river characteristics:
1. River discharge analysis
2. Sediment load analysis


In [None]:
from scripts.query_scenarios import ScenarioQuery

# Initialize query engine
query = ScenarioQuery("../data_parquet")

In [None]:
query.list_variables()

In [None]:
water_demand_data = query.get_series(
    variables=[
        "OA water demand province sum",
        "domestic water demand province sum",
        "irrigation water demand province sum",
        "production water demand province sum",
    ],
    filters=None,
    include_params=False,
)
water_demand_data.head()

In [None]:
# Define water demand categories
WATER_CATEGORIES = [
    "irrigation water demand province sum",
    "production water demand province sum",
    "OA water demand province sum",
    "domestic water demand province sum",
]

# Simplified display names for categories
CATEGORY_NAMES = {
    "irrigation water demand province sum": "Agricultural Irrigation",
    "production water demand province sum": "Industrial Production",
    "OA water demand province sum": "Animal husbandry and fishery",
    "domestic water demand province sum": "Urban & Rural Domestic",
}

# Color scheme for water categories
CATEGORY_COLORS = {
    "Agricultural Irrigation": "#1f77b4",
    "Industrial Production": "#ff7f0e",
    "Animal husbandry and fishery": "#2ca02c",
    "Urban & Rural Domestic": "#d62728",
}

In [None]:
import polars as pl
import plotly.graph_objects as go

# Calculate total demand for each category across all time periods and scenarios
composition_data = (
    water_demand_data.group_by("variable")
    .agg(pl.col("value").sum().alias("total_demand"))
    .with_columns(
        pl.col("variable").cast(pl.Utf8).replace(CATEGORY_NAMES).alias("category")
    )
    .with_columns(
        (pl.col("total_demand") / pl.col("total_demand").sum() * 100).alias(
            "percentage"
        )
    )
    .sort("total_demand", descending=True)
)

print("Water Demand Composition:")
print(composition_data)

# Create treemap-style proportional area chart
fig_composition = go.Figure(
    go.Treemap(
        labels=composition_data["category"].to_list(),
        parents=[""] * composition_data.height,  # All at root level
        values=composition_data["total_demand"].to_list(),
        text=[
            f"{cat}<br>{pct:.1f}%"
            for cat, pct in zip(
                composition_data["category"].to_list(),
                composition_data["percentage"].to_list(),
            )
        ],
        textposition="middle center",
        marker=dict(
            colors=[
                CATEGORY_COLORS[cat] for cat in composition_data["category"].to_list()
            ],
            line=dict(width=2, color="white"),
        ),
        hovertemplate="<b>%{label}</b><br>Total Demand: %{value:.2e}<br>Percentage: %{percentRoot:.1f}%<extra></extra>",
    )
)

fig_composition.update_layout(
    title="Composition", height=500, margin=dict(t=80, l=10, r=10, b=10)
)

fig_composition.show()

## 3. Water Demand Composition (Proportional Area Chart)

Calculate total water demand across all time periods for each category and visualize as proportional rectangles.


In [None]:
# Calculate total water demand per scenario per time step
total_demand_ts = (
    water_demand_data.group_by(["scenario_name", "step", "time"])
    .agg(pl.col("value").sum().alias("total_demand"))
    .sort(["scenario_name", "step"])
)

# Calculate statistics across scenarios for each time point
# Mean, standard deviation, and confidence intervals (95% = mean ± 1.96*std)
ts_stats = (
    total_demand_ts.group_by(["step", "time"])
    .agg(
        [
            pl.col("total_demand").mean().alias("mean"),
            pl.col("total_demand").std().alias("std"),
            pl.col("total_demand").min().alias("min"),
            pl.col("total_demand").max().alias("max"),
            pl.col("total_demand").quantile(0.05).alias("p05"),
            pl.col("total_demand").quantile(0.95).alias("p95"),
            pl.col("total_demand").count().alias("n_scenarios"),
        ]
    )
    .with_columns(
        [
            (pl.col("mean") - 1.96 * pl.col("std")).alias("ci_lower"),
            (pl.col("mean") + 1.96 * pl.col("std")).alias("ci_upper"),
        ]
    )
    .sort("step")
)

print(f"Time series statistics calculated for {ts_stats.height} time points")
print(f"Number of scenarios: {ts_stats['n_scenarios'][0]}")
ts_stats.head()

## 4. Total Water Demand Time Series

Aggregate all categories to get total water demand over time. If multiple scenarios exist, show mean ± confidence interval.


In [None]:
from scripts.viz_helpers import quick_plot

quick_plot(
    query,
    variable="water consumption of province in YRB sum",
    filters=None,
    time_range=(2020, 2100),
)