# Retrieve and Analyze Snotel data for a watershed of interest
Authors: Irene Garousi-Nejad (igarousi@cuahsi.org), Ryan Johnson (ryan.c.johnson@utah.edu)
Last updated: January 9th, 2025

This notebook support the CIROH HydroLearn module Introduction to Seasonal Snow Observations, Modeling, and Analysis, demonstrating how to access NRCS SNOTEL data, in particular, snow depth and snow-water-equivalent. 

Supplementary Code:

To simplify this notebook, we developed several helper functions that can be imported from the supporting_scripts folder. 


# 1. Create Map for Watershed for USGS Station ID
The following code uses the pynhd and folium packages to create an interactive map of a watershed from a USGS gauge ID.

In our exercise, we are tasked with identifying all SNOTEL sites upstream of Hetch Hetchy Reservoir on the Tuolumne River. The user can search for "USGS streamflow Tuolumne River" and serveral locations will pop up. Site [11274790](https://waterdata.usgs.gov/monitoring-location/11274790/#dataTypeId=continuous-00065-0&period=P7D&showMedian=false) is the site of interest for this assessment 

In [77]:
import folium
from folium.features import DivIcon
from folium.plugins import MousePosition
from pynhd import NLDI, WaterData, NHDPlusHR, GeoConnex
import pynhd
import geopandas as gpd
import pandas as pd
from supporting_scripts import getData
from shapely.geometry import box, Polygon

import datetime
import s3fs
import boto3
import fsspec
import numpy as np
import xarray as xr
import zarr
import glob
import rasterio
import pyproj
import matplotlib.pyplot as plt
import datetime

Define the watershed outlet using NWIS site id. Create a map object that we'll add layers to.

In [99]:
nldi = NLDI()
usgs_gage_id = "11274790" # NWIS id for Tuolumne river at the mouth of Hetch Hetchy Reservoir

Collect watershed and reach vectors using the pynhd module.

In [101]:
nldi = NLDI()

#Getting basin geometry
print('Collecting basins...', end='')
basin = nldi.get_basins(usgs_gage_id)
basin.to_file("files/TuolumneRiverBasin.shp")
print('done')

# print('Collecting NHD...', end='')
# mr = WaterData("nhdflowline_network")
# nhd = mr.bybox(basin.geometry.iloc[0].bounds)
# print('done')

# print('Collecting gauge locations...', end='')
# gages = pynhd.GeoConnex(item="gages")
# gages = gages.bygeometry(basin.geometry.iloc[0].bounds)
# print('done')

Collecting basins...done


Create and interactive map to display the watershed.

In [23]:
# create map
m = folium.Map(tiles='http://services.arcgisonline.com/arcgis/rest/services/NatGeo_World_Map/MapServer/tile/{z}/{y}/{x}',
                                 attr="Sources: National Geographic",
                                 zoom_start=8, 
                       control_scale=True)
_ = MousePosition().add_to(m)

# add data to the map
#print('Building map...', end='')

# watershed boundary
watershed_json = basin.to_crs(epsg='4326').to_json()
w = folium.features.GeoJson(data=watershed_json, style_function=lambda x: {'color':'darkblue', 'fillColor':'blue'})
m.add_child(w)

# Set the map extent (bounds) to the extent of the sites
m.fit_bounds(m.get_bounds())
m

# 1. Identify SNOTEL sites within a spatial domain

In [102]:
# Create geodataframe of all stations
all_stations_gdf = gpd.read_file('https://raw.githubusercontent.com/egagli/snotel_ccss_stations/main/all_stations.geojson').set_index('code')
all_stations_gdf = all_stations_gdf[all_stations_gdf['csvData']==True]

#filtered_all_stations_gdf = all_stations_gdf[all_stations_gdf.index.str.contains('_SNTL')]  # only select SNOTEL sites

# Use the polygon geometry to select snotel sites that are within the domain
gdf_in_bbox = all_stations_gdf[all_stations_gdf.geometry.within(basin.geometry[0])]

#reset index to have siteid as a column
gdf_in_bbox.reset_index(drop=False, inplace=True)

#make begin and end date a str
gdf_in_bbox['beginDate'] = [datetime.datetime.strftime(gdf_in_bbox['beginDate'][i], "%Y-%m-%d") for i in np.arange(0,len(gdf_in_bbox),1)]
gdf_in_bbox['endDate'] = [datetime.datetime.strftime(gdf_in_bbox['endDate'][i], "%Y-%m-%d") for i in np.arange(0,len(gdf_in_bbox),1)]
gdf_in_bbox

  val = getattr(super(), mtd)(*args, **kwargs)


Unnamed: 0,code,name,network,elevation_m,latitude,longitude,state,HUC,mgrs,mountainRange,beginDate,endDate,csvData,geometry
0,TUM,Tuolumne Meadows,CCSS,2621.28,37.876406,-119.348096,California,180400090102,11SKB,Sierra Nevada,2004-10-01,2025-01-08,True,POINT (-119.3481 37.87641)
1,TES,Tioga Pass Entry Station,CCSS,3031.236,37.91087,-119.258507,California,180400090102,11SLB,Sierra Nevada,2004-10-01,2025-01-07,True,POINT (-119.25851 37.91087)
2,DAN,Dana Meadows,CCSS,2987.04,37.896162,-119.25726,California,180400090102,11SLB,Sierra Nevada,2004-10-01,2025-01-08,True,POINT (-119.25726 37.89616)
3,SLI,Slide Canyon,CCSS,2804.16,38.091234,-119.431881,California,180400090501,11SKC,Sierra Nevada,2005-10-01,2025-01-08,True,POINT (-119.43188 38.09123)


Add the SNOTEL monitoring station locations to the map

In [103]:
# Calculate the bounds to set the map's initial view
minx, miny, maxx, maxy = gdf_in_bbox.total_bounds

# Calculate the center of the bounding box
center_lat = (miny + maxy) / 2
center_lon = (minx + maxx) / 2

# Convert GeoDataFrames to GeoJSON
geojson1 = gdf_in_bbox.astype(dict(beginDate=str, endDate=str)).to_json()
geojson2 = basin.to_json()

# Create a folium map
m = folium.Map([center_lat, center_lon], zoom_start=10,tiles='http://services.arcgisonline.com/arcgis/rest/services/NatGeo_World_Map/MapServer/tile/{z}/{y}/{x}',
                                 attr="Sources: National Geographic",
                       control_scale=True)

_ = MousePosition().add_to(m)

# Add GeoJSON layers for each GeoDataFrame to the map
folium.GeoJson(geojson1, name='GeoDataFrame 1').add_to(m)
folium.GeoJson(geojson2, name='GeoDataFrame 2').add_to(m)

# Add layer control to the map
folium.LayerControl().add_to(m)

# Display the map
m

# 2. Retrieve data for the selected sites

The following uses the getData.py script to download SNOTEL data for the sites within the domain. Inputs to this script include the SNOTEL site name, site ID, and state abbreviation, which are retrieved from the geodataframe above. Additional inputs include the start and end dates, as well as the path to save the outputs.

In [98]:
# Use the getData module to retrieve data 
OutputFolder = 'files'

for i in gdf_in_bbox.index:
    print(i)
    getData.getCaliSNOTELData(gdf_in_bbox.name[i], gdf_in_bbox.code[i], gdf_in_bbox.beginDate[i], gdf_in_bbox.endDate[i], OutputFolder)

0
Start retrieving data for Tuolumne Meadows, TUM
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/TUM:CA:MSNT%257Cid=%2522%2522%257Cname/2004-10-01,2025-01-08/WTEQ::value?fitToScreen=false
1
Start retrieving data for Tioga Pass Entry Station, TES
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/TES:CA:MSNT%257Cid=%2522%2522%257Cname/2004-10-01,2025-01-07/WTEQ::value?fitToScreen=false
2
Start retrieving data for Dana Meadows, DAN
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/DAN:CA:MSNT%257Cid=%2522%2522%257Cname/2004-10-01,2025-01-08/WTEQ::value?fitToScreen=false
3
Start retrieving data for Slide Canyon, SLI
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/SLI:CA:MSNT%257Cid=%2522%2522%257Cname/2005-10-01,2025-