In [15]:
# aggregate_climate_by_adm3.py

import duckdb
import pandas as pd

# Connect to DuckDB database (with spatial extension)
con = duckdb.connect('db/india_model.ddb')
con.execute("INSTALL spatial; LOAD spatial;")

# Aggregate climate data by ADM3 region and date using correct column names
query = """
SELECT
    m.GID_3 as gid,
    m.NAME_3 as name,
    c.date,
    AVG(c.t2m) AS avg_temperature,
    SUM(c.tp) AS total_precipitation,
    AVG(c.swvl1) AS avg_soil_moisture,
    AVG(c.u10) AS avg_wind_u,
    AVG(c.v10) AS avg_wind_v,
    AVG(c.ssrd) AS avg_solar_radiation
FROM climate_data AS c
LEFT JOIN point_to_region_mapping AS m
    ON c.latitude = m.latitude AND c.longitude = m.longitude
WHERE m.GID_3 IS NOT NULL
GROUP BY m.GID_3, m.NAME_3, c.date
ORDER BY m.GID_3, c.date
"""

# Run the query and save the result
df = con.execute(query).fetchdf()
df['date'] = pd.to_datetime(df['date']).dt.date

# Convert temperature from Kelvin to Celsius
if 'avg_temperature' in df.columns:
    df['avg_temperature'] = df['avg_temperature'] - 273.15

# Add primary key and title columns
df['primary_key'] = df['gid'].astype(str) + '_' + df['date'].astype(str)
df['title'] = df['name'].astype(str) + ' - ' + df['date'].astype(str)


# Move primary_key to the first column and title to the second
cols = ['primary_key', 'title'] + [col for col in df.columns if col not in ['primary_key', 'title']]
df = df[cols]

df.to_parquet("output/climate_agg_by_adm3.parquet")

print("Aggregated climate data per ADM3 saved to output/climate_agg_by_adm3.parquet with primary_key and title columns added. Temperature is now in Celsius.")

Aggregated climate data per ADM3 saved to output/climate_agg_by_adm3.parquet with primary_key and title columns added. Temperature is now in Celsius.
