# Download Water Survey of Canada shapefiles
This is the 2022 release of shapefiles. An older release exists, but that does not 100% cover the same basins. Therefore we need both.

In [1]:
import sys
import shutil
import requests
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [15]:
# Get the required info from the config file
data_path,_ = cs.read_from_config(config_file,'data_path')
shps_path,_ = cs.read_from_config(config_file,'ref_shps_path')
file_url,_  = cs.read_from_config(config_file,'can_wsc_2022_shps_url')
meta_url,_  = cs.read_from_config(config_file,'can_wsc_2022_meta_url')
rdme_url,_  = cs.read_from_config(config_file,'can_wsc_2022_rdme_url')

### Make output folder

In [4]:
# Construct the download location
download_folder = Path(data_path) / shps_path / 'RHBN-CAN' / 'WSC2022'

In [5]:
# Make sure the download folder exists
download_folder.mkdir(parents=True, exist_ok=True)

### Prep downloads

In [24]:
''' Attempts to download [file_url] into [download_folder]/[file_name]; [retries_max=10] attempts. '''
def download_file(file_url,download_folder,file_name,
                  retries_max = 10):
    
    # Make sure the connection is re-tried if it fails
    retries_cur = 1
    while retries_cur <= retries_max:
        try: 

            # Send a HTTP request to the server and save the HTTP response in a response object called resp
            # 'stream = True' ensures that only response headers are downloaded initially 
            with requests.get(file_url.strip(), stream=True) as response:

                # Decode the response
                response.raw.decode_content = True
                content = response.raw

                # Write to file
                with open(download_folder / file_name, 'wb') as data:
                    shutil.copyfileobj(content, data)

        except Exception as e:
            print('Error downloading ' + file_url + ' on try ' + str(retries_cur) + ' with error: ' + str(e))
            retries_cur += 1
            continue
        else:
            break  
    
    return

### Get the meta data files

In [26]:
# Prepare filenames
meta_name = meta_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters
rdme_name = rdme_url.split('/')[-1].strip()

In [25]:
download_file(meta_url,download_folder,meta_name)

In [29]:
download_file(rdme_url,download_folder,rdme_name)

### Get the data
Note: apparently there is no clean way to check if a file has been downloaded correctly without comparing hash strings, so we'll just assume that it downloads correctly.

In [35]:
# The files are stored on the server with names 01.zip, 02.zip, ..., 11.zip
file_names = ["%.2d" % i for i in range(1,12)]
file_names

['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11']

In [36]:
for file_name in file_names:
    this_url  = file_url.replace('*',file_name)
    this_file = file_name + '.zip'
    download_file(this_url,download_folder,this_file)