# GRACE TWSA (0.25°) and Downscaled (0.05°) Basin-Wise Analysis
This notebook loads GRACE 0.25° and downscaled 0.05° datasets, overlays them on 12 hydrological basins, computes average time series, and visualizes them. It also exports basin-wise time series to Excel files.

In [1]:
import h5py
import xarray as xr
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.validation import make_valid
from matplotlib.ticker import MaxNLocator
import matplotlib.dates as mdates
import os

# Ensure output directory exists
os.makedirs("basin_series_output", exist_ok=True)

In [3]:
# Load GRACE 0.25° data (h5)
with h5py.File('grace025.h5', 'r') as f:
    grace025 = f['data'][:]  # (time, lon, lat)
grace025 = np.swapaxes(grace025, 1, 2)  # -> (time, lat, lon)

# Load downscaled 0.05° data (netCDF)
ds_down = xr.open_dataset('downscaled_grace_tws_data_with_uncertainty_gan_danet.nc')
downscaled005 = ds_down['data'].values  # (time, lat, lon)
lat005 = ds_down['lat'].values
lon005 = ds_down['lon'].values

# GRACE 0.25° grid setup
lat_start, lon_start = 24.125, 65.125
resolution = 0.25
lat_count, lon_count = grace025.shape[1:]
lat025 = np.round(lat_start + np.arange(lat_count) * resolution, 5)
lon025 = np.round(lon_start + np.arange(lon_count) * resolution, 5)

# Create mesh for spatial referencing
lon_grid025, lat_grid025 = np.meshgrid(lon025, lat025)
lon_grid005, lat_grid005 = np.meshgrid(lon005, lat005)

In [4]:
# Load shapefile of TP basins
gdf_polygons = gpd.read_file('Union/TP_basins.shp')
if gdf_polygons.crs != 'EPSG:4326':
    gdf_polygons = gdf_polygons.to_crs('EPSG:4326')
gdf_polygons = gdf_polygons[['BasinName', 'geometry']]
gdf_polygons = gdf_polygons.dropna(subset=['geometry'])

In [5]:
# Loop over each basin and extract time series
import datetime
dates = pd.date_range(start='2002-08', periods=grace025.shape[0], freq='M')
from tqdm import tqdm

for idx, basin in tqdm(gdf_polygons.iterrows(), total=len(gdf_polygons)):
    name = basin['BasinName']
    polygon = make_valid(basin['geometry'])

    # 0.25° grid points in basin
    points025 = [Point(x, y) for x, y in zip(lon_grid025.ravel(), lat_grid025.ravel())]
    mask025 = np.array([polygon.contains(pt) for pt in points025]).reshape(lat_grid025.shape)

    # 0.05° grid points in basin
    points005 = [Point(x, y) for x, y in zip(lon_grid005.ravel(), lat_grid005.ravel())]
    mask005 = np.array([polygon.contains(pt) for pt in points005]).reshape(lat_grid005.shape)

    # Extract mean time series
    series_grace = np.nanmean(grace025[:, mask025], axis=1)
    series_down = np.nanmean(downscaled005[:, mask005], axis=1)

    # Plot
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(dates, series_grace, label='GRACE 0.25°', color='blue')
    ax.plot(dates, series_down, label='Downscaled 0.05°', color='red')
    ax.set_title(f"Basin: {name}")
    ax.set_xlabel("Date")
    ax.set_ylabel("TWSA (cm)")
    ax.legend()
    ax.xaxis.set_major_locator(mdates.YearLocator(3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"basin_series_output/{name.replace(' ', '_')}_timeseries.png", dpi=300)
    plt.close()

    # Save to Excel
    df_out = pd.DataFrame({
        'Date': dates,
        'GRACE_025': series_grace,
        'Downscaled_005': series_down
    })
    df_out.to_excel(f"basin_series_output/{name.replace(' ', '_')}_timeseries.xlsx", index=False)

  dates = pd.date_range(start='2002-08', periods=grace025.shape[0], freq='M')
100%|██████████| 12/12 [09:24<00:00, 47.03s/it]
