This notebook creates precipitation time series from the MSWEP dataset. It uses the basin shapefile to extract precipitation over each basin and then computes the spatial mean to obtain daily precipitation time series.

In [10]:
import os
import xarray as xr
import geopandas as gpd
import rioxarray
import glob
import pandas as pd
from pathlib import Path
from tqdm import tqdm

In [11]:
#Hides line: getfattr: /inputs/MSWEP_V280/Past/Daily/1989001.nc: Operation not supported

import os, sys 
sys.stderr = open(os.devnull, "w");

In [12]:
path_data = '/inputs/MSWEP_V280/Past/Daily'
path_shapefiles = Path('./shapefiles')

In [13]:
# List all files
all_files = sorted(glob.glob(f'{path_data}/*.nc'))

In [14]:
# gdf = gpd.read_file(path_shapefiles)
# gdf

In [15]:
# # Reproject if needed
# if gdf.crs != "EPSG:4326":
#     gdf = gdf.to_crs("EPSG:4326")

In [16]:
# Filter by year 1989–2008
files = [f for f in all_files if 1989 <= int(os.path.basename(f)[:4]) <= 2008]

print(f"Total files to process: {len(files)}")

Total files to process: 7305


In [17]:
basins_mapping = {
    'paso_mazangano': 'CAMELS_UY_10',
    'picada_de_coelho': 'CAMELS_UY_7',
    'sarandi_del_yi': 'CAMELS_UY_12',
    'paso_de_las_toscas': 'CAMELS_UY_8',
    'paso_de_las_piedras_rn': 'CAMELS_UY_15',
    'paso_del_borracho': 'CAMELS_UY_6',
    'bequelo': 'CAMELS_UY_16',
    'paso_de_las_piedras': 'CAMELS_UY_2',
    'paso_baltasar': 'CAMELS_UY_5',
    'fraile_muerto': 'CAMELS_UY_11',
    'paso_de_los_mellizos': 'CAMELS_UY_14',
    'paso_manuel_diaz': 'CAMELS_UY_3',
    'paso_aguiar': 'CAMELS_UY_9',
    'paso_de_la_compania': 'CAMELS_UY_1',
    'tacuarembo': 'CAMELS_UY_4',
    'durazno': 'CAMELS_UY_13'
}

In [22]:
shapefiles = os.listdir(path_shapefiles)
shapefiles=shapefiles[1:]
shapefiles

['paso_de_las_piedras.zip',
 'paso_de_la_compania.zip',
 'fraile_muerto.zip',
 'paso_aguiar.zip',
 'mercedes.zip',
 'bequelo.zip',
 'sarandi_del_yi.zip',
 'paso_manuel_diaz.zip',
 'paso_de_los_mellizos.zip',
 'paso_de_las_toscas.zip',
 'paso_del_borracho.zip',
 'picada_de_coelho.zip',
 'paso_mazangano.zip',
 'paso_de_las_piedras_rn.zip',
 'paso_baltasar.zip',
 'tacuarembo.zip']

In [23]:
# for shapefile in os.listdir(path_shapefiles):
for shapefile in shapefiles:
    print("Processing shapefile:", shapefile)
    results = []

    gdf = gpd.read_file(path_shapefiles / shapefile)

    # Reproject if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs("EPSG:4326")

    # for i, f in enumerate(files, start=1):
    for f in tqdm(files, desc=f"Files for {shapefile}", file=sys.stdout):
        # print("Opening:", f)
        ds = xr.open_dataset(f)
        da = ds["precipitation"]

        # Rename coordinates and assign CRS
        da = da.rename({"lat": "y", "lon": "x"})
        da = da.rio.write_crs("EPSG:4326")

        # Clip to basin
        da_clip = da.rio.clip(
            gdf.geometry,
            gdf.crs,
            drop=True,
            all_touched=True
        )

        # Compute basin mean
        mean_val = da_clip.mean(dim=("y", "x"), skipna=True).values.item()

        # Extract date
        date = pd.to_datetime(ds.time.values[0])

        results.append({"date": date, "precip_mm": mean_val})

        ds.close()

        # print(f"Processed file {i}/{len(files)}: {f.split('/')[-1]} | Basin mean: {mean_val:.2f} mm")
    
    df_results = pd.DataFrame(results)
    df_results = df_results.sort_values("date").set_index("date")

    basin_key = os.path.splitext(shapefile)[0].lower().replace(" ", "_")
    basin = basins_mapping.get(basin_key, basin_key)
    
    out_path = Path(f'./precip_timeseries/{basin}_precip.csv')
    df_results.to_csv(out_path)


Processing shapefile: paso_de_las_piedras.zip
Files for paso_de_las_piedras.zip: 100%|██████████| 7305/7305 [1:05:46<00:00,  1.85it/s]
Processing shapefile: paso_de_la_compania.zip
Files for paso_de_la_compania.zip: 100%|██████████| 7305/7305 [1:04:18<00:00,  1.89it/s]
Processing shapefile: fraile_muerto.zip
Files for fraile_muerto.zip: 100%|██████████| 7305/7305 [1:04:44<00:00,  1.88it/s]
Processing shapefile: paso_aguiar.zip
Files for paso_aguiar.zip: 100%|██████████| 7305/7305 [1:06:34<00:00,  1.83it/s]
Processing shapefile: mercedes.zip
Files for mercedes.zip: 100%|██████████| 7305/7305 [1:06:19<00:00,  1.84it/s]
Processing shapefile: bequelo.zip
Files for bequelo.zip: 100%|██████████| 7305/7305 [1:04:25<00:00,  1.89it/s]
Processing shapefile: sarandi_del_yi.zip
Files for sarandi_del_yi.zip: 100%|██████████| 7305/7305 [1:05:54<00:00,  1.85it/s]
Processing shapefile: paso_manuel_diaz.zip
Files for paso_manuel_diaz.zip: 100%|██████████| 7305/7305 [1:05:35<00:00,  1.86it/s]
Processing