# Scenario Query Examples

This notebook demonstrates how to use `scripts/query_scenarios.py` for exploratory analysis.


In [1]:
import sys
sys.path.insert(0, '..')

from scripts.query_scenarios import ScenarioQuery, quick_query, compare_params
import polars as pl


## Example 1: List available variables and parameters


In [2]:
query = ScenarioQuery("../data_parquet")

print("Available variables:")
for var in query.list_variables():
    print(f"  - {var}")

print("\nAvailable parameters:")
for param in query.param_cols:
    values = query.scenarios.get_column(param).unique().sort().to_list()
    print(f"  - {param}: {values}")


Available variables:
  - GDP per capita
  - OA water demand province sum
  - Total population
  - YRB WSI
  - YRB available surface water
  - domestic water demand province sum
  - hydrologic station discharge[lijin]
  - irrigation water demand province sum
  - production water demand province sum
  - sediment load[lijin]
  - water consumption of province in YRB sum

Available parameters:
  - Fertility Variation: [1.6, 1.65, 1.7, 1.75, 1.8]
  - water-saving irrigation efficiency ratio: [0.8, 0.85, 0.9, 0.95, 1.0]
  - fire generation share province target: [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]
  - Ecological water flow variable: [0.2, 0.25, 0.3]
  - Climate change scenario switch for water yield: [1, 2, 3]
  - Diet change scenario switch: [1, 2, 3]


## Example 2: Filter scenarios by constraints


In [3]:
# Fix Fertility Variation and Climate scenario, see remaining combinations
filtered = query.filter_scenarios({
    "Fertility Variation": 1.6,
    "Climate change scenario switch for water yield": 1
})

print(f"Matching scenarios: {filtered.height}")
print(f"Columns: {filtered.columns}")
filtered.head()


Matching scenarios: 315
Columns: ['scenario_name', 'Fertility Variation', 'water-saving irrigation efficiency ratio', 'fire generation share province target', 'Ecological water flow variable', 'Climate change scenario switch for water yield', 'Diet change scenario switch']


scenario_name,Fertility Variation,water-saving irrigation efficiency ratio,fire generation share province target,Ecological water flow variable,Climate change scenario switch for water yield,Diet change scenario switch
cat,f64,f64,f64,f64,i64,i64
"""sc_0""",1.6,0.8,0.1,0.2,1,1
"""sc_1""",1.6,0.8,0.1,0.2,1,2
"""sc_2""",1.6,0.8,0.1,0.2,1,3
"""sc_9""",1.6,0.8,0.1,0.25,1,1
"""sc_10""",1.6,0.8,0.1,0.25,1,2


## Example 3: Get time series under constraints


In [4]:
data = quick_query(
    variable="Total population",
    filters={"Fertility Variation": 1.6},
    data_dir="../data_parquet",
    time_range=(2020, 2050)
)

print(f"Shape: {data.shape}")
data.head()


Shape: (454545, 11)


scenario_name,step,value,variable,time,Fertility Variation,water-saving irrigation efficiency ratio,fire generation share province target,Ecological water flow variable,Climate change scenario switch for water yield,Diet change scenario switch
cat,u32,f64,cat,f64,f64,f64,f64,f64,i64,i64
"""sc_0""",624,422875100.0,"""Total population""",2020.0,1.6,0.8,0.1,0.2,1,1
"""sc_1""",624,422875100.0,"""Total population""",2020.0,1.6,0.8,0.1,0.2,1,2
"""sc_2""",624,422875100.0,"""Total population""",2020.0,1.6,0.8,0.1,0.2,1,3
"""sc_3""",624,422875100.0,"""Total population""",2020.0,1.6,0.8,0.1,0.2,2,1
"""sc_4""",624,422875100.0,"""Total population""",2020.0,1.6,0.8,0.1,0.2,2,2


## Example 4: Compare parameter impact (wide pivot for easy plotting)


In [5]:
# Fix all params except irrigation efficiency, pivot on it for comparison
comparison = compare_params(
    variable="YRB WSI",
    fixed_params={
        "Fertility Variation": 1.6,
        "fire generation share province target": 0.1,
        "Ecological water flow variable": 0.2,
        "Climate change scenario switch for water yield": 1,
        "Diet change scenario switch": 1
    },
    vary_param="water-saving irrigation efficiency ratio",
    data_dir="../data_parquet",
    time_range=(2020, 2100)
)

print(f"Shape: {comparison.shape}")
comparison.head()


Shape: (1281, 6)


time,0.8,0.85,0.9,0.95,1.0
f64,f64,f64,f64,f64,f64
2020.0,0.7122713,0.7122713,0.7122713,0.7122713,0.7122713
2020.0625,0.709625,0.709625,0.709625,0.709625,0.709625
2020.125,0.706997,0.706997,0.706997,0.706997,0.706997
2020.1875,0.704378,0.704378,0.704378,0.704378,0.704378
2020.25,0.7017682,0.7017682,0.7017682,0.7017682,0.7017682


## Example 5: Using safe variable names (original ↔ safe)

Safe variable names are generated for stable file/column keys:
- Lowercase; non-alphanumerics replaced by `_`; multiple `_` collapsed.
- Mapping file: `data_parquet/variables_map.json` (original → safe).
- You can query by original names (human-friendly) or safe names (stable keys).


In [6]:
import json, pathlib
map_path = pathlib.Path("../data_parquet/variables_map.json")
variables_map = json.loads(map_path.read_text()) if map_path.exists() else {}
variables_map


{'hydrologic station discharge[lijin]': 'hydrologic_station_discharge_lijin',
 'YRB available surface water': 'yrb_available_surface_water',
 'sediment load[lijin]': 'sediment_load_lijin',
 'irrigation water demand province sum': 'irrigation_water_demand_province_sum',
 'production water demand province sum': 'production_water_demand_province_sum',
 'OA water demand province sum': 'oa_water_demand_province_sum',
 'domestic water demand province sum': 'domestic_water_demand_province_sum',
 'GDP per capita': 'gdp_per_capita',
 'Total population': 'total_population',
 'YRB WSI': 'yrb_wsi',
 'water consumption of province in YRB sum': 'water_consumption_of_province_in_yrb_sum'}

In [7]:
# Query by original name (human-friendly)
q = ScenarioQuery("../data_parquet")
orig = quick_query(
    variable="hydrologic station discharge[lijin]",
    filters={"Fertility Variation": 1.6},
    data_dir="../data_parquet",
    time_range=(2020, 2020)
)
orig.select(["scenario_name", "variable", "time", "value"]).head()


scenario_name,variable,time,value
cat,cat,f64,f64
"""sc_0""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_1""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_2""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_3""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_4""","""hydrologic station discharge[l…",2020.0,38.932888


In [8]:
# Query by safe name (stable key)
safe = quick_query(
    variable="hydrologic_station_discharge_lijin",
    filters={"Fertility Variation": 1.6},
    data_dir="../data_parquet",
    time_range=(2020, 2020)
)
safe.select(["scenario_name", "variable", "time", "value"]).head()


scenario_name,variable,time,value
cat,cat,f64,f64
"""sc_0""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_1""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_2""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_3""","""hydrologic station discharge[l…",2020.0,38.932888
"""sc_4""","""hydrologic station discharge[l…",2020.0,38.932888
