# Identifying Streamgage Watersheds

This notebook goes through how to identify watersheds at different scales for a target streamgage, by finding the geometries of the overlapping watersheds with the lat-long location of the gage.

In [1]:

import os
import warnings

import folium
import geopandas as gpd
from IPython.display import Image
import pandas as pd


DATA_DIR = os.path.join("..", "..", "data")

In [2]:
ca_fp = os.path.join(DATA_DIR, "NHD", "NHD_H_California_State_Shape", "Shape")
ca_shapefiles = list(filter(lambda s: s.endswith(".shp"), os.listdir(ca_fp)))
wbd_shapefiles = list(filter(lambda s: s.startswith("WBDHU"), ca_shapefiles))

In [3]:
# Read in the shape files for each HU level:
geo_dfs = dict()
for wbd_filename in wbd_shapefiles:
    fp = os.path.join(ca_fp, wbd_filename)
    gdf = gpd.read_file(fp)
    shapefile_name = wbd_filename.split(".")[0]
    gdf["shapefile_name"] = shapefile_name
    geo_dfs[shapefile_name] = gdf

In [4]:
# Combine all Geo dfs into one:
gdfs_list = list(geo_dfs.values())
gdf = gpd.GeoDataFrame(pd.concat(gdfs_list, ignore_index=True), 
                       crs=gdfs_list[0].crs)

In [5]:
# Get our target gage ids:
sg_df = pd.read_csv(os.path.join(DATA_DIR, "streamgage-full.csv"), encoding="utf-8")
gage_ids = sg_df["gage"].unique()

In [6]:
# Get the full metadata for each gage which includes lat-long:
inventory_fp = os.path.join(DATA_DIR, "inventory_clean")
inventory = pd.read_csv(inventory_fp, sep="\t")
target_gages = inventory[inventory["site_no"].isin(gage_ids)]

In [7]:
def get_streamgage_lat_long(gage_id: int):
    """For a streamgage ID, return the lat-long coordinates of its location."""
    s = target_gages.set_index("site_no").loc[gage_id]
    return s["dec_lat_va"], s["dec_long_va"]

In [8]:
get_streamgage_lat_long(11185500)

(35.90550439, -118.467586)

In [9]:
def get_lat_long_geometry(lat: float, long: float):
    """For a given lat-long coordinate, return the coordinate 
    as a shapely.geometry.point.Point in a geopandas.array.GeometryArray."""
    df = pd.DataFrame({"longitude": [long], "latitude": [lat]})
    geometry = gpd.points_from_xy(df["longitude"], df["latitude"], crs="EPSG:4326")
    return geometry

In [10]:
lat, long = get_streamgage_lat_long(11185500)
get_lat_long_geometry(lat, long)

<GeometryArray>
[<shapely.geometry.point.Point object at 0x7fa887d16910>]
Length: 1, dtype: geometry

In [11]:
def get_streamgage_overlapping_watersheds(gage_id: int):
    """For a given streamgage ID return all overlapping watershed geometries."""
    lat, long = get_streamgage_lat_long(gage_id)
    point = get_lat_long_geometry(lat, long)[0]
    gdf_subset = gdf[(gdf["geometry"].contains(point))]
    if not len(gdf_subset):
        warnings.warn(f"No overlapping watershed geometries found for gage_id: {gage_id}")
    return gdf_subset

In [12]:
get_streamgage_overlapping_watersheds(11185500)

Unnamed: 0,tnmid,metasource,sourcedata,sourceorig,sourcefeat,loaddate,referenceg,areaacres,areasqkm,states,...,ObjectID,geometry,shapefile_name,huc8,huc10,huc14,huc16,huc2,huc6,huc4
5128,{170C1E37-852D-44C5-917D-AC6E0BED46B6},,,,,2012-06-11,,699177.1,2829.47,CA,...,64,"POLYGON ((-118.40092 36.69851, -118.40006 36.6...",WBDHU8,18030001.0,,,,,,
5430,{CD96A6F3-D975-455A-9101-E8FB7C7AC555},,,,,2012-10-24,,141758.6,573.68,CA,...,226,"POLYGON ((-118.33786 35.94061, -118.33747 35.9...",WBDHU10,,1803000106.0,,,,,
6450,{D924BD6E-507C-4E5B-A86C-DDDDE00752A5},{FE4AD22E-A115-40E4-9EC7-8497E5725E31},,Natural Resources and Conservation Service and...,,2022-03-15,,107892400.0,436625.32,"CA,MX,NV,OR",...,4,"MULTIPOLYGON (((-123.01418 37.94333, -123.0148...",WBDHU2,,,,,18.0,,
6458,{C7AD4655-6535-43A8-8E11-6F5A55DD8963},{31AC21ED-1551-431B-963A-63D44B9887E8},,U.S. Geological Survey,,2022-01-02,,10501340.0,42497.47,CA,...,8,"POLYGON ((-118.90461 37.20802, -118.90404 37.2...",WBDHU6,,,,,,180300.0,
6488,{91CB7D75-4CD6-4EC4-ACB5-8524A6DFB363},{31AC21ED-1551-431B-963A-63D44B9887E8},,U.S. Geological Survey,,2022-01-02,,10501340.0,42497.47,CA,...,14,"POLYGON ((-118.90461 37.20802, -118.90404 37.2...",WBDHU4,,,,,,,1803.0


In [13]:
def get_streamgage_watershed(gage_id: int, hu: int):
    """For a given streamgage ID and hydrological unit resolution,
    return the watershed geometry for that point."""
    lat, long = get_streamgage_lat_long(gage_id)
    geos = get_streamgage_overlapping_watersheds(gage_id)
    return geos[(geos["shapefile_name"].str.lower().str.endswith(f"hu{hu}"))]

In [14]:
gdf_subset = get_streamgage_watershed(11185500, 2)
gdf_subset

Unnamed: 0,tnmid,metasource,sourcedata,sourceorig,sourcefeat,loaddate,referenceg,areaacres,areasqkm,states,...,ObjectID,geometry,shapefile_name,huc8,huc10,huc14,huc16,huc2,huc6,huc4
6450,{D924BD6E-507C-4E5B-A86C-DDDDE00752A5},{FE4AD22E-A115-40E4-9EC7-8497E5725E31},,Natural Resources and Conservation Service and...,,2022-03-15,,107892400.0,436625.32,"CA,MX,NV,OR",...,4,"MULTIPOLYGON (((-123.01418 37.94333, -123.0148...",WBDHU2,,,,,18,,


In [15]:
def map_streamgage_watershed(gage_id: int, hu: int, zoom_start: int = 5):
    """Map a streamgage and the overlapping watershed at the given HU resolution."""
    lat, lon = get_streamgage_lat_long(gage_id)
    
    my_map = folium.Map(location=[lat, lon], zoom_start=zoom_start, width="80%", height="100%")
    folium.Marker(location=[lat, lon], 
                  radius=10, popup=f"{gage_id} ({lat}, {lon})").add_to(my_map)

    watershed = get_streamgage_watershed(gage_id, hu)
    if not len(watershed):
        warnings.warn(f"\n  No watershed found for HU{hu}")
    for _, r in watershed.iterrows():
        sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                               style_function=lambda x: {"fillColor": "orange"})
        label = watershed.iloc[0]["shapefile_name"]
        folium.Popup(label).add_to(geo_j)
        geo_j.add_to(my_map)

    return my_map

In [16]:
map_streamgage_watershed(11185500, 2, 5)

In [17]:
map_streamgage_watershed(11185500, 4, 6)

In [18]:
map_streamgage_watershed(11185500, 6, 7)

In [19]:
map_streamgage_watershed(11185500, 8, 8)

In [20]:
map_streamgage_watershed(11185500, 10, 9)