### Select lakes in hma subregions/basins from global dataset

In [50]:
import geopandas as gpd


In [51]:
path_hma_basins_l4 = 'data/hma-extent/basins/hma_Lehner2013_levels/hma_hybas_as_lev04_v1c.gpkg' 
path_lakes_global = 'data/water-extent/lakes/HydroLAKES_polys_v10_shp/HydroLAKES_polys_v10.shp' 
path_hma_gtng = 'data/hma-extent/HMA/hma_gtng_202307_subregions.gpkg'


In [52]:
## Read data
global_lakes_gdf = gpd.read_file(path_lakes_global)
hma_basin_gdf = gpd.read_file(path_hma_basins_l4)
hma_region_gdf = gpd.read_file(path_hma_gtng)
print(global_lakes_gdf.shape)
global_lakes_gdf.head(2)


(1427688, 22)


Unnamed: 0,Hylak_id,Lake_name,Country,Continent,Poly_src,Lake_type,Grand_id,Lake_area,Shore_len,Shore_dev,...,Vol_src,Depth_avg,Dis_avg,Res_time,Elevation,Slope_100,Wshd_area,Pour_long,Pour_lat,geometry
0,1,Caspian Sea,Russia,Europe,SWBD,1,0,377001.91,15829.37,7.27,...,1,200.5,8110.642,107883.0,-29,-1.0,1404108.0,47.717708,45.591934,"POLYGON ((49.96181 37.43847, 49.96457 37.44022..."
1,2,Great Bear,Canada,North America,CanVec,1,0,30450.64,5331.72,8.62,...,1,72.2,535.187,47577.7,145,-1.0,147665.4,-123.505546,65.138384,"POLYGON ((-119.78782 67.03574, -119.78637 67.0..."


In [53]:
hma_basin_lake_gdf = global_lakes_gdf[global_lakes_gdf.intersects(hma_basin_gdf.union_all())]
hma_region_lake_gdf = global_lakes_gdf[global_lakes_gdf.intersects(hma_region_gdf.union_all())]


In [54]:
# hma_basin_lake_gdf.to_file(filename='data/water-extent/lakes/HydroLakes_v10_hma_basin.gpkg', driver='GPKG')
# hma_region_lake_gdf.to_file(filename='data/water-extent/lakes/HydroLakes_v10_hma_region.gpkg', driver='GPKG')
# hma_basin_lake_gdf.head(2)


#### statistics of the lakes in the hma basins.

In [57]:
joined_gdf = gpd.sjoin(left_df = hma_basin_gdf,
                        right_df = hma_basin_lake_gdf,
                        how='inner',
                        predicate='intersects')

hma_basin_stats = joined_gdf.groupby('HYBAS_ID').agg(
    lake_number=('Hylak_id', 'size'),     
    lake_area=('Lake_area', 'sum')
    ).reset_index()
hma_basin_stats.head()

Unnamed: 0,HYBAS_ID,lake_number,lake_area
0,4040023060,202,562.65
1,4040023810,426,1034.32
2,4040033430,532,1297.73
3,4040050210,13,7.88
4,4040050240,552,8289.21


In [None]:
hma_basin_lake_stats = hma_basin_gdf.merge(hma_basin_stats, on='HYBAS_ID', how='left')
hma_basin_lake_stats = hma_basin_lake_stats[['HYBAS_ID', 'lake_number', 'lake_area', 'geometry']]
hma_basin_lake_stats.head()
# hma_basin_lake_stats.to_file(filename='data/water-extent/lakes/HydroLakes_v10_hma_basin_stats.gpkg', driver='GPKG') 


#### statistics of the lakes in the hma regions.

In [61]:
joined_gdf = gpd.sjoin(left_df = hma_region_gdf,
                        right_df = hma_region_lake_gdf,
                        how='inner',
                        predicate='intersects')

hma_region_stats = joined_gdf.groupby('o2region').agg(
    lake_number=('Hylak_id', 'size'),     
    lake_area=('Lake_area', 'sum')
    ).reset_index()
hma_region_stats.head()


Unnamed: 0,o2region,lake_number,lake_area
0,13-01,117,3376.28
1,13-02,127,695.86
2,13-03,442,7679.95
3,13-04,280,5371.13
4,13-05,102,1006.3


In [68]:
hma_region_lake_stats = hma_region_gdf.merge(hma_region_stats, on='o2region', how='left')
hma_region_lake_stats = hma_region_lake_stats[['o2region', 'full_name', 'lake_number', 'lake_area', 'geometry']]
hma_region_lake_stats.head()
# hma_region_lake_stats.to_file(filename='data/water-extent/lakes/HydroLakes_v10_hma_region_stats.gpkg', driver='GPKG')


Unnamed: 0,o2region,full_name,lake_number,lake_area,geometry
0,13-01,Hissar Alay,117,3376.28,"MULTIPOLYGON (((70 40.7, 71 40.7, 72.01 40.7, ..."
1,13-02,Pamir (Safed Khirs / West Tarim),127,695.86,"MULTIPOLYGON (((74.35547 39.80418, 74.37581 39..."
2,13-03,West Tien Shan,442,7679.95,"MULTIPOLYGON (((77.99937 43.69747, 78.5 43.749..."
3,13-04,East Tien Shan (Dzhungaria),280,5371.13,"MULTIPOLYGON (((78.5 43.74989, 78.5 44, 78.5 4..."
4,13-05,West Kun Lun,102,1006.3,"MULTIPOLYGON (((76.49466 37.98237, 76.50852 37..."
