# Lab 8-1: Predicting streamflow with the SWE regression method

In this lab, we will use SWE measurements from the East River Valley (SNOTEL sites, plus the Kettle Ponds measurements) as well as USGS streamflow measurments from gage number 09112200.

In [76]:
import pandas as pd

# info for this package here: https://doi-usgs.github.io/dataRetrieval/reference/readNWISdv.html
import dataretrieval.nwis as nwis

from metloom.pointdata import SnotelPointData
from datetime import datetime
import altair as alt
from scipy.stats import linregress
import numpy as np
import xarray as xr
from metpy.units import units

In [41]:
#pip install dataretrieval

In [3]:
start_date = '1990-01-01'
end_date = '2024-01-10'

## Download USGS Streamflow data

In [4]:
usgs_site_code = '09112200' # Replace with the desired USGS site number
# 00060 is the parameter code for streamflow,
# this function will return mean daily values of discharge in cubic feet per second
streamflow_df, metadata = nwis.get_dv(sites=usgs_site_code, start=start_date, end=end_date, parameterCd='00060') 
streamflow_df = streamflow_df.rename(columns={'00060_Mean': 'mean_daily_streamflow'})

Calculate the total April-July streamflow for each year

In [5]:
seconds_in_a_day = 60*60*24
streamflow_df['daily_total_streamflow'] = streamflow_df['mean_daily_streamflow'] * seconds_in_a_day

streamflow_df['year'] = streamflow_df.index.year
streamflow_df['month'] = streamflow_df.index.month
df_amjj = streamflow_df[streamflow_df.month.isin([4,5,6,7])]
df_amjj = df_amjj.groupby('year')[['daily_total_streamflow']].sum()

# Note that 1 cubic foot is equal to 2.30e-5 acre feet. Let's convert
df_amjj['daily_total_streamflow'] = df_amjj['daily_total_streamflow'] * 2.30e-5
df_amjj = df_amjj.rename(columns={'daily_total_streamflow': 'Seasonal total streamflow (acre-feet)'})
df_amjj

Unnamed: 0_level_0,Seasonal total streamflow (acre-feet)
year,Unnamed: 1_level_1
1994,160166.3328
1995,312791.2416
1996,236762.9568
1997,288948.816
1998,153918.576
1999,200737.008
2000,143599.0464
2001,138869.5104
2002,70821.42336
2003,157178.18016


## Download SNOTEL SWE data

In [27]:
snotel_point_butte = SnotelPointData("380:CO:SNTL", "Butte")
SNOTEL_VARS = [
    snotel_point_butte.ALLOWED_VARIABLES.SWE,
]
df_butte_longterm = snotel_point_butte.get_daily_data(
    datetime(df_amjj.index.min() - 1, 10, 1), datetime(df_amjj.index.max(), 7, 1),
    SNOTEL_VARS
)

In [29]:
df_april1_swe = df_butte_longterm[
    (df_butte_longterm.index.get_level_values(0).month == 4)
    &
    (df_butte_longterm.index.get_level_values(0).day == 1)
]

## Combine SWE and Streamflow data

In [33]:
df_april1_swe.index = df_april1_swe.index.get_level_values(0).year

In [37]:
df_swe_and_streamflow = df_april1_swe.join(df_amjj)

In [42]:
alt.Chart(df_swe_and_streamflow).mark_point().encode(
    alt.X('SWE:Q'),
    alt.Y('Seasonal total streamflow (acre-feet):Q')
)

Fit a line to the columns 'SWE' 'Seasonal total streamflow (acre-feet)'

In [55]:
(slope, intercept, r_value, p_value, std_err) = (
    linregress(df_swe_and_streamflow['SWE'], df_swe_and_streamflow['Seasonal total streamflow (acre-feet)'])
)

In [56]:
fit_line_x_values = np.linspace(0, 22, 100)
fit_lin_y_values = intercept + slope * fit_line_x_values
fit_line_df = pd.DataFrame({
    'x': fit_line_x_values,
    'y': fit_lin_y_values,
})

In [57]:
alt.Chart(df_swe_and_streamflow).mark_point().encode(
    alt.X('SWE:Q'),
    alt.Y('Seasonal total streamflow (acre-feet):Q')
) + alt.Chart(fit_line_df).mark_line(color='black').encode(
    alt.X('x:Q').title(''),
    alt.Y('y:Q').title('')
) 

Now pull in SWE data from the Kettle Ponds and see how the linear regression model works for that four SWE measurements from a single year.

In [58]:
sos_file = "../data/sos_full_dataset_30min.nc"
sos_dataset = xr.open_dataset(sos_file)

In [80]:
kps_swe_values = sos_dataset.sel(time = '20230401 0000')[['SWE_p1_c', 'SWE_p2_c', 'SWE_p3_c', 'SWE_p4_c']].to_array().values
kps_swe_values = (kps_swe_values * units("mm")).to(units("inches")).magnitude

In [83]:
streamflow_predictions_from_kps = kps_swe_values * slope + intercept

streamflow_predictions_from_kps = pd.DataFrame({
    'SWE': kps_swe_values,
    'Seasonal total streamflow (acre-feet)': streamflow_predictions_from_kps,
})

In [116]:
(alt.Chart(
    df_swe_and_streamflow.assign(label = 'Measured SWE (SNOTEL) and Streamflow')
).mark_point().encode(
    alt.X('SWE:Q'),
    alt.Y('Seasonal total streamflow (acre-feet):Q'),
    alt.Color('label:N')
) + alt.Chart(
    df_swe_and_streamflow.loc[2023:2023].assign(label = 'Measured SWE (SNOTEL) and Streamflow, 2023')
).mark_point(size=200, shape='square').encode(
    alt.X('SWE:Q'),
    alt.Y('Seasonal total streamflow (acre-feet):Q'),
    alt.Color('label:N').scale(range=['purple']).title('')
) + alt.Chart(
    fit_line_df.assign(label = 'Regression line')
).mark_line(color='black').encode(
    alt.X('x:Q').title(''),
    alt.Y('y:Q').title(''),
    alt.Color('label:N').scale(range=['black']).title('')
) + alt.Chart(
    streamflow_predictions_from_kps.assign(label = 'Measured SWE at Kettle Ponds, Predicted Streamflow')
).mark_point().encode(
    alt.X('SWE:Q'),
    alt.Y('Seasonal total streamflow (acre-feet):Q'),
    alt.Color('label:N').scale(range=['red']).title('')
)).resolve_scale(color='independent').configure_legend(labelLimit=300)