# Download WSC flow data
Flow data can be accessed through a URL request. 

Sources:
- GUI: https://wateroffice.ec.gc.ca/services/links_e.html
- Readme: https://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/Document/WebService_Guidelines.pdf

Parameters:
- Streamflow; ID = `47`

Approval flags:
- Either `provisional` or `final`

Data quality flags:
- `-1`: `UNSPECIFIED`: Automatically assigned during data recording
- `10`: `ICE`: Data may be affected by the presence of ice (backwater effects)
- `20`: `ESTIMATED` Estimated value
- `30`: `PARTIAL DAY`: More than 120 minutes missing during calculation of daily mean
- `40`: `DRY`: Water level has dropped below what the sensor can measure
- `50`: `REVISED`: Previously approved data that were subsequently reviewed and edited

Data returned as `.csv` and in UTC time.

### Reference
“Extracted from the Environment and Climate Change Canada Real-time Hydrometric Data web site (https://wateroffice.ec.gc.ca/mainmenu/real_time_data_index_e.html) on 2023-04-05”

In [1]:
import sys
import time
import pandas as pd
import urllib.request
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path = cs.read_from_config(config_file,'data_path')

# CAMELS-spat metadata
cs_meta_path = cs.read_from_config(config_file,'cs_basin_path')
cs_meta_name = cs.read_from_config(config_file,'cs_meta_name')
cs_unusable_name = cs.read_from_config(config_file,'cs_unusable_name')

# Basin folder
cs_basin_folder = cs.read_from_config(config_file, 'cs_basin_path')
basins_path = Path(data_path) / cs_basin_folder

# Data period
time_s = cs.read_from_config(config_file, 'wsc_start_t')
time_e = cs.read_from_config(config_file, 'wsc_start_e')

### Data loading

In [4]:
# CAMELS-spat metadata file
cs_meta_path = Path(data_path) / cs_meta_path
cs_meta = pd.read_csv(cs_meta_path / cs_meta_name)

In [5]:
# Open list of unusable stations; Enforce reading IDs as string to keep leading 0's
cs_unusable = pd.read_csv(cs_meta_path / cs_unusable_name, dtype={'Station_id': object}) 

### Loop over sites and download the flow record

In [6]:
# General settings
var = '47' # streamflow; 00065 for gage height
main_url = 'https://wateroffice.ec.gc.ca/services/real_time_data/csv/' 

In [20]:
# Loop over the Canada stations only
dnf = [] # List of incomplete stations, retaining these for easier printout and checking later
for ix,row in cs_meta.iterrows():
    if row.Country == 'CAN':
        
        # Get paths, etc
        site, _, _, csv_path, _,_ = cs.prepare_flow_download_outputs(cs_meta, ix, basins_path)
        
        # Skip to next if we already have the data for this station
        if csv_path.is_file():
            continue
        
        # Construct the download URL
        url = f'{main_url}inline?stations[]={site}&parameters[]={var}&start_date={time_s}%2000:00:00&end_date={time_e}%2023:59:59'
        time.sleep(1) # pause for a second so we don't bombard the server with requests
        
        # Read the URL into a dataframe
        tmp = pd.read_csv(url, index_col=[1], parse_dates=True)
        tmp.to_csv(csv_path)
        
        # Checks
        if len(tmp) == 0: # Sites with no data only have a header
            print(f'No data downloaded for {site}')
            dnf.append(site)
        else:
            print(f'Completed {site}')

No data downloaded for 01AD002
Completed 01AD003
No data downloaded for 01AE001
Completed 01AF007
Completed 01AF009
Completed 01AJ003
Completed 01AJ004
Completed 01AJ010
Completed 01AK001
Completed 01AK006
Completed 01AK007
Completed 01AL002
Completed 01AL004
Completed 01AM001
Completed 01AN002
Completed 01AP002
Completed 01AP004
Completed 01AP006
Completed 01AQ001
Completed 01BC001
No data downloaded for 01BD008
Completed 01BE001
No data downloaded for 01BG005
No data downloaded for 01BG008
No data downloaded for 01BG009
No data downloaded for 01BH005
No data downloaded for 01BH010
Completed 01BJ003
Completed 01BJ007
Completed 01BJ010
Completed 01BJ012
Completed 01BL002
Completed 01BL003
Completed 01BO001
Completed 01BP001
Completed 01BP002
Completed 01BQ001
Completed 01BS001
Completed 01BU002
Completed 01BU009
Completed 01BV004
Completed 01BV006
Completed 01CA003
Completed 01CC005
Completed 01CD005
Completed 01DG003
Completed 01DJ005
Completed 01DL001
Completed 01DP004
Completed 01DR

Completed 05DB006
No data downloaded for 05DC006
Completed 05DC011
No data downloaded for 05DC012
Completed 05DD004
Completed 05DD007
Completed 05DD009
Completed 05DE007
Completed 05DF003
Completed 05DF004
Completed 05DF006
Completed 05DF007
Completed 05EA001
Completed 05EA002
Completed 05EA005
Completed 05EA010
No data downloaded for 05EB902
No data downloaded for 05EC002
Completed 05EC005
Completed 05ED002
Completed 05EE005
Completed 05EE006
Completed 05EE009
No data downloaded for 05EF004
No data downloaded for 05EF005
No data downloaded for 05EF006
No data downloaded for 05EG004
Completed 05FA001
Completed 05FA012
Completed 05FA014
Completed 05FA024
Completed 05FB002
Completed 05FC002
Completed 05FC004
Completed 05FC007
Completed 05FE002
No data downloaded for 05FF003
Completed 05GA008
Completed 05GA010
Completed 05GA012
No data downloaded for 05GB004
No data downloaded for 05GC007
No data downloaded for 05GD002
No data downloaded for 05GF001
No data downloaded for 05GF002
No data 

Completed 08JB003
Completed 08JD006
Completed 08JE001
Completed 08JE004
Completed 08KA001
Completed 08KA004
Completed 08KA005
Completed 08KA007
Completed 08KA009
Completed 08KB001
Completed 08KB003
Completed 08KB006
Completed 08KC001
Completed 08KD007
Completed 08KE016
Completed 08KE024
Completed 08KG001
Completed 08KH001
Completed 08KH006
Completed 08KH010
No data downloaded for 08KH011
Completed 08KH019
Completed 08LA001
Completed 08LB020
Completed 08LB024
Completed 08LB038
Completed 08LB064
No data downloaded for 08LB069
Completed 08LB076
Completed 08LC040
Completed 08LD001
No data downloaded for 08LD003
Completed 08LE024
Completed 08LE027
Completed 08LE031
Completed 08LE077
Completed 08LE108
No data downloaded for 08LF023
Completed 08LF094
Completed 08LG008
Completed 08LG016
Completed 08LG048
Completed 08LG056
Completed 08MA001
Completed 08MA002
Completed 08MA003
Completed 08MA006
No data downloaded for 08MB005
Completed 08MB006
Completed 08MB007
Completed 08ME023
Completed 08ME025

In [7]:
# Loop over the Canada stations only
dnf = [] # List of incomplete stations, retaining these for easier printout and checking later
for ix,row in cs_meta.iterrows():
    if row.Country == 'CAN':
        
        # Get paths, etc
        site, _, raw_path, _, _,_ = cs.prepare_flow_download_outputs(cs_meta, ix, basins_path)
        raw_path = Path(str(raw_path).replace('_raw.txt','_raw.csv')) # We get this immediately as csv, no need to save as txt
        
        # Skip to next if we already have the data for this station
        if raw_path.is_file():
            continue
        
        # Construct the download URL
        url = f'{main_url}inline?stations[]={site}&parameters[]={var}&start_date={time_s}%2000:00:00&end_date={time_e}%2023:59:59'
        time.sleep(1) # pause for a second so we don't bombard the server with requests
        
        # Read the URL into a dataframe
        tmp = pd.read_csv(url, index_col=[1], parse_dates=True)
        tmp.to_csv(raw_path)
        
        # Checks
        if len(tmp) == 0: # Sites with no data only have a header
            print(f'No data downloaded for {site}')
            dnf.append(site)
        else:
            print(f'Completed {site}')

No data downloaded for 01AD002
Completed 01AD003
No data downloaded for 01AE001
Completed 01AF007
Completed 01AF009
Completed 01AJ003
Completed 01AJ004
Completed 01AJ010
Completed 01AK001
Completed 01AK006
Completed 01AK007
Completed 01AL002
Completed 01AL004
Completed 01AM001
Completed 01AN002
Completed 01AP002
Completed 01AP004
Completed 01AP006
Completed 01AQ001
Completed 01BC001
No data downloaded for 01BD008
Completed 01BE001
No data downloaded for 01BG005
No data downloaded for 01BG008
No data downloaded for 01BG009
No data downloaded for 01BH005
No data downloaded for 01BH010
Completed 01BJ003
Completed 01BJ007
Completed 01BJ010
Completed 01BJ012
Completed 01BL002
Completed 01BL003
Completed 01BO001
Completed 01BP001
Completed 01BP002
Completed 01BQ001
Completed 01BS001
Completed 01BU002
Completed 01BU009
Completed 01BV004
Completed 01BV006
Completed 01CA003
Completed 01CC005
Completed 01CD005
Completed 01DG003
Completed 01DJ005
Completed 01DL001
Completed 01DP004
Completed 01DR

Completed 05DB006
No data downloaded for 05DC006
Completed 05DC011
No data downloaded for 05DC012
Completed 05DD004
Completed 05DD007
Completed 05DD009
Completed 05DE007
Completed 05DF003
Completed 05DF004
Completed 05DF006
Completed 05DF007
Completed 05EA001
Completed 05EA002
Completed 05EA005
Completed 05EA010
No data downloaded for 05EB902
No data downloaded for 05EC002
Completed 05EC005
Completed 05ED002
Completed 05EE005
Completed 05EE006
Completed 05EE009
No data downloaded for 05EF004
No data downloaded for 05EF005
No data downloaded for 05EF006
No data downloaded for 05EG004
Completed 05FA001
Completed 05FA012
Completed 05FA014
Completed 05FA024
Completed 05FB002
Completed 05FC002
Completed 05FC004
Completed 05FC007
Completed 05FE002
No data downloaded for 05FF003
Completed 05GA008
Completed 05GA010
Completed 05GA012
No data downloaded for 05GB004
No data downloaded for 05GC007
No data downloaded for 05GD002
No data downloaded for 05GF001
No data downloaded for 05GF002
No data 

Completed 08JB003
Completed 08JD006
Completed 08JE001
Completed 08JE004
Completed 08KA001
Completed 08KA004
Completed 08KA005
Completed 08KA007
Completed 08KA009
Completed 08KB001
Completed 08KB003
Completed 08KB006
Completed 08KC001
Completed 08KD007
Completed 08KE016
Completed 08KE024
Completed 08KG001
Completed 08KH001
Completed 08KH006
Completed 08KH010
No data downloaded for 08KH011
Completed 08KH019
Completed 08LA001
Completed 08LB020
Completed 08LB024
Completed 08LB038
Completed 08LB064
No data downloaded for 08LB069
Completed 08LB076
Completed 08LC040
Completed 08LD001
No data downloaded for 08LD003
Completed 08LE024
Completed 08LE027
Completed 08LE031
Completed 08LE077
Completed 08LE108
No data downloaded for 08LF023
Completed 08LF094
Completed 08LG008
Completed 08LG016
Completed 08LG048
Completed 08LG056
Completed 08MA001
Completed 08MA002
Completed 08MA003
Completed 08MA006
No data downloaded for 08MB005
Completed 08MB006
Completed 08MB007
Completed 08ME023
Completed 08ME025

# Update the `unusable` file list

In [9]:
# Print which basins we need to check
for entry in dnf:
    print(f'No data downloaded for {entry}')

No data downloaded for 01AD002
No data downloaded for 01AE001
No data downloaded for 01BD008
No data downloaded for 01BG005
No data downloaded for 01BG008
No data downloaded for 01BG009
No data downloaded for 01BH005
No data downloaded for 01BH010
No data downloaded for 02BF005
No data downloaded for 02BF006
No data downloaded for 02BF007
No data downloaded for 02BF008
No data downloaded for 02BF009
No data downloaded for 02BF012
No data downloaded for 02BF013
No data downloaded for 02DB007
No data downloaded for 02ED014
No data downloaded for 02FD001
No data downloaded for 02FF004
No data downloaded for 02GB007
No data downloaded for 02GH003
No data downloaded for 02HG001
No data downloaded for 02JB013
No data downloaded for 02LB017
No data downloaded for 02LC027
No data downloaded for 02LC043
No data downloaded for 02LD005
No data downloaded for 02LG005
No data downloaded for 02NE011
No data downloaded for 02NF003
No data downloaded for 02OA057
No data downloaded for 02OB032
No data 

In [11]:
country = 'CAN'

In [10]:
reason = 'No real-time discharge observations available '

In [13]:
# Make a dataframe that lists the basins we cannot use
tmp = pd.DataFrame({'Country': country,
                    'Station_id': dnf,
                    'Reason': reason})

In [18]:
cs_unusable = pd.concat([cs_unusable,tmp]).reset_index()

In [19]:
cs_unusable.to_csv(cs_meta_path / cs_unusable_name, encoding='utf-8', index=False)