# Subset Water Survey of Canada 2016 shapefiles
The source data contains 1675 stations. We only want to retain those that are part of the Reference Hydrometric Basin Network.

In [1]:
import sys
import fiona
import pandas as pd
import geopandas as gpd
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
data_path,_ = cs.read_from_config(config_file,'data_path')
shps_path,_ = cs.read_from_config(config_file,'ref_shps_path')
rhbn_meta,_ = cs.read_from_config(config_file,'can_rhbn_meta_url')

### Define where the shapefile is

In [4]:
# Construct the download location
data_folder = Path(data_path) / shps_path / 'RHBN-CAN' / 'WSC2016'

In [5]:
# Make the file name
file_name = 'WSC2016_basins.shp'

### Define where the meta data file is

In [6]:
# Construct the download location
meta_folder = Path(data_path) / shps_path / 'RHBN-CAN'

In [7]:
# Make the file name: Get the last part of the url, strip whitespace and characters, replace extension
meta_name = rhbn_meta.split('/')[-1].strip().replace('xlsx','csv') 

### Subset WSC 2016 shapes to RHBN 2020 basins

In [8]:
# Open the RHBN 2020 list
df = pd.read_csv(str(meta_folder/meta_name))

In [9]:
# Open the WSC2016 shapefile
gdf = gpd.read_file(data_folder/file_name)

In [29]:
# Create a mask of stations in the shapefile that match a station in metadata list
mask = gdf.Station.isin(df.STATION_NUMBER)

In [30]:
# Create a new geodataframe containing only those catchments that are in the RHBN list
rhbn_shp = gdf[mask]

In [32]:
rhbn_shp.to_file(data_folder/file_name)

In [15]:
# check how many of the basins we have shapefiles for
print('Found {} shapefiles matching one of {} Reference Hydrometric Basins'.format(len(rhbn_shp),len(df)))

Found 876 shapefiles matching one of 1027 Reference Hydrometric Basins
