# Query the CUAHSI Hydroportal for SNOTEL data

Adapted from the [2021 SnowEX Hackweek tutorial by David Shean](https://snowex-2021.hackweek.io/tutorials/geospatial/SNOTEL_query.html)

__Note: this notebook (specifically the `ulmo` package) requires a different version of Python than the other notebooks that use `XDEM`, so I ran this using a separate `mamba` environment.__

In [None]:
import os
import glob
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import ulmo

## Define paths to data and variables to fetch

In [None]:
# Paths in directory
site_name = 'MCS'
data_path = f'/Volumes/LaCie/raineyaberle/Research/PhD/SkySat-Stereo/study-sites'
out_dir = os.path.join(data_path, site_name, 'snotel')
aoi_fn = glob.glob(os.path.join(data_path, site_name, 'AOIs', f'*bounds*.shp'))[0]

fetch_vars = ['SNOTEL:PRCPSA_D', 'SNOTEL:SNWD_D', 'SNOTEL:TAVG_D', 'SNOTEL:TMAX_D', 'SNOTEL:TMIN_D', 'SNOTEL:TOBS_D', 'SNOTEL:WTEQ_D']
start_date = '2020-01-01'
end_date = '2024-06-07'

## Load Snotel sites from CUAHSI Hydroshare

In [None]:
wsdlurl = 'https://hydroportal.cuahsi.org/Snotel/cuahsi_1_1.asmx?WSDL'
sites = ulmo.cuahsi.wof.get_sites(wsdlurl)

# Adjust variables
sites_df = pd.DataFrame.from_dict(sites, orient='index').dropna()
sites_df['geometry'] = [Point(float(loc['longitude']), float(loc['latitude'])) for loc in sites_df['location']]
sites_df = sites_df.drop(columns='location')
sites_df = sites_df.astype({"elevation_m":float})

# Convert to geopandas.GeoDataFrame
sites_gdf = gpd.GeoDataFrame(sites_df, crs='EPSG:4326')
sites_gdf.head()

## Filter SNOTEL sites by AOI

In [None]:
# Load site bounds
aoi = gpd.read_file(aoi_fn)
aoi = aoi.to_crs('EPSG:4326')

# Filter SNOTEL sits using site bounds
sites_filt_gdf = sites_gdf.loc[sites_gdf.intersects(aoi.geometry[0])]
site_code = sites_filt_gdf['code'].index[-1]
print(f'Site code: {site_code}')

# Plot AOI and SNOTEL site location(s)
fig, ax = plt.subplots()
aoi.plot(ax=ax, facecolor='None', edgecolor='c')
sites_filt_gdf.plot(ax=ax, color='b')
plt.show()

# Save site information to file
site_fn = os.path.join(out_dir, f'{site_name}_SNOTEL_site_info.csv')
sites_filt_df = pd.DataFrame(sites_filt_gdf)
sites_filt_df.to_csv(site_fn)
print('Site info saved to file:', site_fn)

In [None]:
# Option to check variables available at site
var_info = ulmo.cuahsi.wof.get_site_info(wsdlurl, sites_filt_gdf['code'].index[0])
var_info

## Fetch data, save to file

In [None]:
def snotel_fetch(sitecode, variablecode='SNOTEL:SNWD_D', 
                 start_date='1950-10-01', end_date=datetime.today().strftime('%Y-%m-%d')):
    #print(sitecode, variablecode, start_date, end_date)
    values_df = None
    try:
        # Request data from the server
        site_values = ulmo.cuahsi.wof.get_values(wsdlurl, sitecode, variablecode, start=start_date, end=end_date)
        # Convert to a Pandas DataFrame   
        values_df = pd.DataFrame.from_dict(site_values['values'])
        # Parse the datetime values to Pandas Timestamp objects
        values_df['datetime'] = pd.to_datetime(values_df['datetime'], utc=True)
        # Set the DataFrame index to the Timestamps
        values_df = values_df.set_index('datetime')
        # Convert values to float and replace -9999 nodata values with NaN
        values_df['value'] = pd.to_numeric(values_df['value']).replace(-9999, np.nan)
        # Remove any records flagged with lower quality
        values_df = values_df[values_df['quality_control_level_code'] == '1']
    except:
        print("Unable to fetch %s" % variablecode)

    return values_df

# Check if output directory exists
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
    print('Made directory for outputs:', out_dir)

# Iterate over variables
for var in fetch_vars:
    # Define output file name
    out_fn = os.path.join(out_dir, f"{site_name}_{start_date}_{end_date}_{var.replace(':','_')}.csv")
    # Fetch data
    values_df = snotel_fetch(site_code, var, start_date, end_date)
    values_df['variable'] = var
    # Plot variable
    plt.figure(figsize=(10,4))
    plt.plot(values_df.index, values_df['value'])
    plt.ylabel(var)
    plt.show()
    # Save to file
    values_df.to_csv(out_fn, index=True)
    print('Data saved to file:', out_fn)
    

## Adjust dataframe and variable units

In [None]:
out_fns = sorted(glob.glob(os.path.join(out_dir, f"{site_name}_{start_date}_{end_date}_SNO*.csv")))

for i, fn in enumerate(out_fns):
    df = pd.read_csv(fn)
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)
    if 'PRCPSA' in os.path.basename(fn):
        df = df.rename(columns={'value': 'PRCPSA_in'})
        df['PRCPSA_m'] = df['PRCPSA_in'] / 39.37
        df = df['PRCPSA_m']
    elif 'SNWD' in os.path.basename(fn):
        df = df.rename(columns={'value': 'SNWD_in'})
        df['SNWD_m'] = df['SNWD_in'] / 39.37
        df = df['SNWD_m']
    elif 'TAVG' in os.path.basename(fn):
        df = df.rename(columns={'value': 'TAVG_F'})
        df['TAVG_C'] = (df['TAVG_F'] - 32) *5 / 9
        df = df['TAVG_C']
    elif 'TMAX' in os.path.basename(fn):
        df = df.rename(columns={'value': 'TMAX_F'})
        df['TMAX_C'] = (df['TMAX_F'] - 32) *5 / 9
        df = df['TMAX_C']
    elif 'TMIN' in os.path.basename(fn):
        df = df.rename(columns={'value': 'TMIN_F'})
        df['TMIN_C'] = (df['TMIN_F'] - 32) *5 / 9
        df = df['TMIN_C']
    elif 'TOBS' in os.path.basename(fn):
        df = df.rename(columns={'value': 'TOBS_F'})
        df['TOBS_C'] = (df['TOBS_F'] - 32) *5 / 9
        df = df['TOBS_C']
    elif 'WTEQ' in os.path.basename(fn):
        df = df.rename(columns={'value': 'SWE_in'})
        df['SWE_m'] = df['SWE_in'] / 39.37
        df = df['SWE_m']
    df = pd.DataFrame(df)

    if i==0:
        df_full = df
    else:
        df_full = df_full.merge(df, on='datetime')

# Save to file
df_fn = os.path.join(out_dir, f"{site_name}_{start_date}_{end_date}_adj.csv")
df_full.to_csv(df_fn, index=True)
print('Adjusted data table saved to file:', df_fn)
df_full