# Download Water Survey of Canada shapefiles
This is the 2016 release of shapefiles. A more recent release exists, but that does not 100% cover the same basins. Therefore we need both.

In [1]:
import sys
import shutil
import requests
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path,_ = cs.read_from_config(config_file,'data_path')
shps_path,_ = cs.read_from_config(config_file,'ref_shps_path')
file_url,_  = cs.read_from_config(config_file,'can_wsc_2016_shps_url')

### Make output folder

In [6]:
# Construct the download location
download_folder = Path(data_path) / shps_path / 'RHBN-CAN' / 'WSC2016'

In [7]:
# Make sure the download folder exists
download_folder.mkdir(parents=True, exist_ok=True)

### Get the data
Note: apparently there is no clean way to check if a file has been downloaded correctly without comparing hash strings, so we'll just assume that it downloads correctly.

In [8]:
# Retry settings
retries_max = 10

In [9]:
# Make the file name
file_name = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters

In [19]:
# Server blocks requests that do not specify a user agent, so specify that
# Value found by:
# 1. Open download page (https://donnees.ec.gc.ca/data/water/products/national-hydrometric-network-basin-polygons/) in Chrome
# 2. Opening developer tools (F12)
# 3. Initiating download manually
# 4. Restarting request with recording option (ctrl + r)
# 5. Checking the User-Agent value on the Network tab in dev tools
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}

In [15]:
# Make sure the connection is re-tried if it fails
retries_cur = 1
while retries_cur <= retries_max:
    try: 

        # Send a HTTP request to the server and save the HTTP response in a response object called resp
        # 'stream = True' ensures that only response headers are downloaded initially 
        with requests.get(file_url.strip(), headers=headers, stream=True) as response:

            # Decode the response
            response.raw.decode_content = True
            content = response.raw

            # Write to file
            with open(download_folder / file_name, 'wb') as data:
                shutil.copyfileobj(content, data)
                
    except Exception as e:
        print('Error downloading ' + file_url + ' on try ' + str(retries_cur) + ' with error: ' + str(e))
        retries_cur += 1
        continue
    else:
        break           