## Access NSRPS Analysis and Forecasts

This notebook is used access hydrological data from the Meterological Services Canada WSC Service.
This is includes:
- Retrieving model analysis
- Retrieving deterministic model results
- Retrieving ensemble model results

It was developed using helpful, open access information provided by [NHS NSRPS Streamflow Tutorials](https://github.com/NHS-Dev/geomet-nsrps-streamflow-tutorials/).

In [None]:
import configparser
import xarray as xr 
import pandas as pd
from pathlib import Path
import yaml

# Import local scripts
import sys
sys.path.append('../scripts')
from nsrps_data_access import query_wms_service_for_forecast_times, query_wms_service_for_analysis_times, query_wcs_service_for_forecast_data,query_wcs_service_for_analysis_data, extract_station_from_grid,bias_correct_forecast
from hydrograph_plotting import plot_detailed_hydrograph

# Set up logger
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

#Suppress info from matplotlib
logging.getLogger('matplotlib.category').disabled = True

# add autoreload to automatically reload modules when they change
%load_ext autoreload
%autoreload 2

### Read settings

In [None]:
config_file = "../settings/general_settings.yaml"

# Read settings from yaml file
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

# Set variables, this can be replaced by a config file. To be discussed
api_url = config['msc_open_data_settings']['api_url']

# Set up paths 
gis_data_dir = config['paths']['gis_data']
output_base_dir = Path(config['paths']['output_dir'])
output_base_dir.mkdir(parents=True, exist_ok=True)

# Set up paths to gis data
stations_csv = Path(gis_data_dir, config['gis_data']['hydro_stns_csv'])
nsrps_station_location_csv = Path(gis_data_dir, config['gis_data']['nsrps_stns_csv'])

### Read in login settings

In [None]:
# loading login information
login_config = configparser.ConfigParser()
login_config.read_file(open('../settings/config.cfg')) 

login = login_config['Login']


### Retrieve Deterministic Forecast

In [None]:
layer_name = 'DHPS_1km_RiverDischarge'
output_dir = Path(output_base_dir, f'{layer_name}')
output_dir.mkdir(exist_ok=True)

newest_fcast, fcasthrs = query_wms_service_for_forecast_times(layer_name, login)
forecast_ds = query_wcs_service_for_forecast_data(layer_name, login, newest_fcast, fcasthrs)

forecast_ds.to_netcdf(Path(output_dir,f'{newest_fcast}.nc'))

### Read in NSRPS stations

In [None]:
# Read in the list of hydrometric stations
nsrps_station_locations_df = pd.read_csv(nsrps_station_location_csv)
hydro_stations_df = pd.read_csv(stations_csv)

search_stations = hydro_stations_df["ID"].tolist()

# Extract the stations that are in the Bow watershed
nsrps_stations = nsrps_station_locations_df[nsrps_station_locations_df['STATION_NUMBER'].isin(search_stations)]

### Extract forecast data for stations

In [None]:
forecast_ds = xr.open_dataset(Path(output_dir,f'{newest_fcast}.nc'))

for station in hydro_stations_df.iterrows():

    station_data = extract_station_from_grid(station, forecast_ds)
    station_data_df = station_data.to_dataframe()
    station_data_df.rename(columns={'Band1':'Discharge'}, inplace=True)

    station_data_df.to_csv(f'{output_dir}/{station[1]["ID"]}_forecast.csv')

### Download NSRPS Analysis and extract data for stations

In [None]:
layer_name = 'DHPS-Analysis_1km_RiverDischarge'
output_dir = Path(output_base_dir, f'{layer_name}')
output_dir.mkdir(exist_ok=True)

analysis_time = query_wms_service_for_analysis_times(layer_name, login)

analysis_ds = query_wcs_service_for_analysis_data(layer_name, login,analysis_time)

analysis_ds.to_netcdf(Path(output_dir,f'{analysis_time}.nc'))

for station in hydro_stations_df.iterrows():

    station_data = extract_station_from_grid(station, analysis_ds)
    station_data_df = station_data.to_dataframe()
    station_data_df.rename(columns={'Band1':'Discharge'}, inplace=True)
    station_data_df.to_csv(f'{output_dir}/{station[1]["ID"]}_analysis.csv')


### Plot stations

In [None]:
variable = 'DISCHARGE'
bias_correct = True


real_time_level_stations = nsrps_stations.STATION_NUMBER.values#['05BB001']
historic_level_stations = nsrps_stations.STATION_NUMBER.values#['05BB001']

for station_id in real_time_level_stations:
    if station_id not in historic_level_stations:
        continue
    
    historic_df = pd.read_csv(f'{output_base_dir}/hydrometric-daily-mean/{station_id}_{variable}.csv')
    realtime_df = pd.read_csv(f'{output_base_dir}/hydrometric-realtime/{station_id}_{variable}.csv')
    analysis_df = pd.read_csv(f'{output_base_dir}/DHPS-Analysis_1km_RiverDischarge/{station_id}_analysis.csv')
    forecast_df = pd.read_csv(f'{output_base_dir}/DHPS_1km_RiverDischarge/{station_id}_forecast.csv')
    png_path = f'{output_base_dir}/{station_id}_{variable}_forecast_hydrograph.png'

    if bias_correct:
        forecast_bias_corrected_df = bias_correct_forecast(realtime_df, forecast_df)
    else:
        forecast_bias_corrected_df = None


    plot_detailed_hydrograph(station_id,variable, historic_df, realtime_df, forecast_df, analysis_df,threshold_df=None,forecast_bias_corrected_df=forecast_bias_corrected_df, save_png=True, png_path=png_path)

In [None]:
realtime_df

In [None]:
forecast_df

In [None]:
import pandas as pd

def bias_correct(measurements, model):
    
    # Ensure both dataframes are sorted by index
    measurements = measurements.sort_index()
    model = model.sort_index()

    # Find the last overlapping time
    last_overlap_time = measurements.index.intersection(model.index).max()

    # If there's no overlap, use the last time of the measurements dataframe
    if pd.isna(last_overlap_time):
        last_overlap_time = measurements.index[-1]

    # Get the last measurement value
    last_measurement_value = measurements.loc[last_overlap_time, 'DISCHARGE']

    # Get the corresponding model value at the overlap time
    if last_overlap_time in model.index:
        last_model_value = model.loc[last_overlap_time, 'Discharge']
    else:
        last_model_value = model.iloc[0]['Discharge']

    # Calculate the correction factor
    correction_factor = last_measurement_value - last_model_value

    # Apply the correction to the model dataframe
    model['Discharge'] = model['Discharge'].apply(lambda x: x + correction_factor)

    return model

test = bias_correct(realtime_df, forecast_df)

In [None]:
test