# Add total basin area to lumped shapefiles
This brings them in line with the distributed files that have sub-basin areas in the `unitarea` field.

In [23]:
import warnings
import geopandas as gpd
from pathlib import Path

In [6]:
# Data location
cs_main_folder = Path("/scratch/gwf/gwf_cmt/wknoben/camels-spat-upload")

In [7]:
# Destination location
cs_update_folder = Path("/scratch/gwf/gwf_cmt/wknoben/camels-spat-upload-updates")

In [8]:
# Specify the folder structure
shape_path_part1 = "shapefiles"
shape_path_parts2 = ["headwater", "macro-scale", "meso-scale"]
shape_path_part3 = "shapes-lumped"

In [9]:
# Specify the area-computation CRS
ea_crs = "ESRI:102008"

In [27]:
for shape_path_part2 in shape_path_parts2:

    # 1. Find the basin folders
    shape_middle = f"{shape_path_part1}/{shape_path_part2}/{shape_path_part3}"
    basin_paths = [f for f in (cs_main_folder / shape_middle).iterdir() if f.is_dir()]

    # 2. Loop over the identified folders
    for basin_path in basin_paths:

        # 2.1. Extract the basin ID
        basin_id = basin_path.name # just the final part, e.g. USA_08164300
        
        # 2.2. Load the lumped basin shapefile
        bas = gpd.read_file(basin_path / f"{basin_id}_lumped.shp")

        # 2.3. Calculate the area
        area = (bas.to_crs(ea_crs).area / 10**6).iloc[0]

        # 2.4. Add to the geodataframe, drop the FID column, and sort
        bas['unitarea'] = area
        bas = bas.drop(columns=['FID'])
        bas = bas[['unitarea','geometry']]

        # 2.5. Ensure the output directory exists, and save to file
        des_folder = cs_update_folder / shape_middle / basin_id 
        des_folder.mkdir(exist_ok=True, parents=True)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=FutureWarning)
            bas.to_file(des_folder / f"{basin_id}_lumped.shp")   