## Access Hydrological Observations

This notebook is used access hydrological data from the Meterological Services Canada API.
This is includes:
- Real time gauge data for discharge and water level
- Historic daily discharge
- Annual statistics for discharge and water level

It was developed using helpful, open access information provided by [MSC GeoMet services](https://eccc-msc.github.io/open-data/usage/use-case_oafeat/use-case_oafeat-script_en/).

In [None]:
# Import needed modules
import sys
import pandas as pd
from pathlib import Path
import yaml

# Import local scripts
sys.path.append('../scripts')
from geospatial_plotting import plot_watershed_flowlines_stations
from scalar_data_access import retrieve_data_from_api
from hydrograph_plotting import plot_detailed_hydrograph
# Plot in notebook
%matplotlib inline

# Set up logger
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

#Suppress info from matplotlib
logging.getLogger('matplotlib.category').disabled = True

# add autoreload to automatically reload modules when they change
%load_ext autoreload
%autoreload 2

### Read settings configuration file

In [None]:
config_file = "../settings/general_settings.yaml"

# Read settings from yaml file
with open(config_file, 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)

# Set variables, this can be replaced by a config file. To be discussed
api_url = config['msc_open_data_settings']['api_url']

# Set up paths 
gis_data_dir = config['paths']['gis_data']
output_dir = Path(config['paths']['output_dir'])
output_dir.mkdir(parents=True, exist_ok=True)

# Set up paths to gis data
stations_csv = Path(gis_data_dir, config['gis_data']['hydro_stns_csv'])
watershed_shp = Path(gis_data_dir, config['gis_data']['basins_shp'])
flowlines_shp = Path(gis_data_dir, config['gis_data']['flowlines_shp'])


### Plot watersheds and stations

In [None]:
plot_watershed_flowlines_stations(watershed_shp, flowlines_shp, stations_csv)

## Define stations to be imported

Hydrometric station data can be accessed and downloaded.
This is for near real time data as well as historic record.

In this notebook, the stations are read from a provided csv file.
This could be substituted with any list of stations. Station information can also be pulled from the MSC API.

In [None]:
# Read in the list of hydrometric stations
hydro_stations_df = pd.read_csv(stations_csv)
search_stations = hydro_stations_df["ID"].tolist()

logger.info(f'Search stations: {hydro_stations_df}')

### Download Real Time Hydrometric Data 

In [None]:
collection = 'hydrometric-realtime'
datetime_column = "DATETIME"

download_variable = "DISCHARGE"
real_time_discharge_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir)

download_variable = "LEVEL"
real_time_level_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir)


### Download Historic Daily Mean Data

In [None]:
collection = 'hydrometric-daily-mean'
datetime_column = "DATE"

download_variable = "DISCHARGE"
other_variables = ['DISCHARGE_SYMBOL_EN']
historic_discharge_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir,other_variables)

download_variable = "LEVEL"
other_variables = ['LEVEL_SYMBOL_EN']
historic_level_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir,other_variables)

### Download Annual Statistics

In [None]:

collection = 'hydrometric-annual-statistics'

datetime_column='MAX_DATE'
download_variable = 'MAX_VALUE'
other_variables = ['MAX_SYMBOL_EN']
max_annual_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir,other_variables)

datetime_column='MIN_DATE'
download_variable = 'MIN_VALUE'
other_variables = ['MIN_SYMBOL_EN']
min_annual_stations = retrieve_data_from_api(search_stations, collection, download_variable, datetime_column, api_url, output_dir,other_variables)


In [None]:
variable = 'DISCHARGE'

real_time_level_stations = ['05BH004']
historic_level_stations = ['05BH004']

#print(f'Real time stations with {variable} data: {real_time_discharge_stations}')

for station_id in real_time_level_stations:
    if station_id not in historic_level_stations:
        continue
    
    historic_df = pd.read_csv(f'{output_dir}/hydrometric-daily-mean/{station_id}_{variable}.csv')
    realtime_df = pd.read_csv(f'{output_dir}/hydrometric-realtime/{station_id}_{variable}.csv')
    png_path = f'{output_dir}/{station_id}_{variable}_hydrograph.png'

    plot_detailed_hydrograph(station_id,variable, historic_df, realtime_df, save_png=True, png_path=png_path)