## Access NSRPS Analysis and Forecasts

This notebook is used access hydrological data from the Meterological Services Canada WSC Service.
This is includes:
- Retrieving model analysis
- Retrieving deterministic model results
- Retrieving ensemble model results

It was developed using helpful, open access information provided by [NHS NSRPS Streamflow Tutorials](https://github.com/NHS-Dev/geomet-nsrps-streamflow-tutorials/).

In [2]:
import configparser
import xarray as xr 
import pandas as pd
from pathlib import Path
import yaml

# Import local scripts
import sys
sys.path.append('../scripts')
from nsrps_data_access import query_wms_service_for_forecast_times, query_wms_service_for_analysis_times, query_wcs_service_for_forecast_data,query_wcs_service_for_analysis_data, extract_station_from_grid
from plot_hydrographs import plot_forecast_hydrograph_with_historical_ranges
# Set up logger
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

#Suppress info from matplotlib
logging.getLogger('matplotlib.category').disabled = True

# add autoreload to automatically reload modules when they change
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Read settings

In [9]:
config_file = "../settings/general_settings.yaml"

# Read settings from yaml file
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

# Set variables, this can be replaced by a config file. To be discussed
api_url = config['msc_open_data_settings']['api_url']

# Set up paths 
gis_data_dir = config['paths']['gis_data']
output_base_dir = Path(config['paths']['output_dir'])
output_base_dir.mkdir(parents=True, exist_ok=True)

# Set up paths to gis data
stations_csv = Path(gis_data_dir, config['gis_data']['hydro_stns_csv'])
nsrps_station_location_csv = Path(gis_data_dir, config['gis_data']['nsrps_stns_csv'])

### Read in login settings

In [5]:

# loading login information
login_config = configparser.ConfigParser()
login_config.read_file(open('../settings/config.cfg')) 

login = login_config['Login']


### Retrieve Deterministic Forecast

In [10]:
layer_name = 'DHPS_1km_RiverDischarge'
output_dir = Path(output_base_dir, f'{layer_name}')
output_dir.mkdir(exist_ok=True)

newest_fcast, fcasthrs = query_wms_service_for_forecast_times(layer_name, login)
forecast_ds = query_wcs_service_for_forecast_data(layer_name, login, newest_fcast, fcasthrs)

forecast_ds.to_netcdf(Path(output_dir,f'{newest_fcast}.nc'))

INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T13:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T14:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T15:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T16:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T17:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T18:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T19:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T20:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T21:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T22:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2024-01-28T23:00:00Z
INFO:nsrps_data_access:Querying 2024-01-28T12:00:00Zm lead time 2

In [7]:
# Read in the list of hydrometric stations
nsrps_station_locations_df = pd.read_csv(nsrps_station_location_csv)
hydro_stations_df = pd.read_csv(stations_csv)

search_stations = hydro_stations_df["ID"].tolist()

# Extract the stations that are in the Bow watershed
nsrps_stations = nsrps_station_locations_df[nsrps_station_locations_df['STATION_NUMBER'].isin(search_stations)]

In [8]:
forecast_ds = xr.open_dataset(Path(output_dir,f'{newest_fcast}.nc'))

for station in nsrps_stations.iterrows():

    station_data = extract_station_from_grid(station, forecast_ds)
    station_data_df = station_data.to_dataframe()
    station_data_df.rename(columns={'Band1':'Discharge'}, inplace=True)

    station_data_df.to_csv(f'{output_dir}/{station[1]["STATION_NUMBER"]}_forecast.csv')




In [11]:
layer_name = 'DHPS-Analysis_1km_RiverDischarge'
output_dir = Path(output_base_dir, f'{layer_name}')
output_dir.mkdir(exist_ok=True)

analysis_time = query_wms_service_for_analysis_times(layer_name, login)

analysis_ds = query_wcs_service_for_analysis_data(layer_name, login,analysis_time)

analysis_ds.to_netcdf(Path(output_dir,f'{analysis_time}.nc'))

for station in nsrps_stations.iterrows():

    station_data = extract_station_from_grid(station, analysis_ds)
    station_data_df = station_data.to_dataframe()
    station_data_df.rename(columns={'Band1':'Discharge'}, inplace=True)
    station_data_df.to_csv(f'{output_dir}/{station[1]["STATION_NUMBER"]}_analysis.csv')


In [None]:
variable = 'LEVEL'

real_time_level_stations = ['05BH015']
historic_level_stations = ['05BH015']


#print(f'Real time stations with {variable} data: {real_time_discharge_stations}')

for station_id in real_time_level_stations:
    if station_id not in historic_level_stations:
        continue
    
    historic_df = pd.read_csv(f'{output_dir}/hydrometric-daily-mean/{station_id}_{variable}.csv')
    realtime_df = pd.read_csv(f'{output_dir}/hydrometric-realtime/{station_id}_{variable}.csv')
    analysis_df = pd.read_csv(f'{output_dir}/DHPS-Analysis_1km_RiverDischarge/{station_id}_analysis.csv')
    forecast_df = pd.read_csv(f'{output_dir}/DHPS_1km_RiverDischarge/{station_id}_forecast.csv')
    png_path = f'{output_dir}/{station_id}_{variable}_forecast_hydrograph.png'

    plot_forecast_hydrograph_with_historical_ranges(station_id,variable, historic_df, realtime_df,analysis_df,forecast_df, save_png=True, png_path=png_path)