# Download and process HydroLAKES

In [28]:
import geopandas as gpd
import os
import sys
import shutil
import zipfile
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

## Config handling

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [8]:
# Get the required info from the config file
data_path            = cs.read_from_config(config_file,'data_path')
geospatial_temp_path = cs.read_from_config(config_file,'geospatial_temp_path')
lake_path            = cs.read_from_config(config_file,'lake_path')
lake_url             = cs.read_from_config(config_file,'lake_url')

### Download data

In [9]:
download_folder = Path(data_path) / geospatial_temp_path / 'hydrolakes' / 'download'

In [10]:
download_folder.mkdir(parents=True, exist_ok=True)

In [11]:
cs.download_url_into_folder(lake_url,download_folder)

Successfully downloaded https://data.hydrosheds.org/file/hydrolakes/HydroLAKES_polys_v10_shp.zip


### Unzip

In [45]:
file = download_folder / os.path.basename(lake_url)

In [47]:
with zipfile.ZipFile(file,'r') as zip_ref:
    #zip_ref.extractall(download_folder)
    # Create a list to store the extracted file paths
    extracted_files = []

    # Extract all the contents of the zip file to the specified directory
    for file_info in zip_ref.infolist():
        zip_ref.extract(file_info, download_folder)
        extracted_file_path = os.path.join(download_folder, file_info.filename)
        extracted_files.append(extracted_file_path)

### Subset to area of interest

In [53]:
raw_folder = Path(data_path) / geospatial_temp_path / 'hydrolakes' / 'raw'

In [54]:
raw_folder.mkdir(parents=True, exist_ok=True)

In [51]:
shp_file = [file for file in extracted_files if file.endswith('.shp')]

In [40]:
shp = gpd.read_file(shp_file[0])

In [42]:
shp_na = shp[shp['Continent'] == 'North America']

In [57]:
file_name = os.path.basename(shp_file[0]).replace('.shp','_NorthAmerica.shp')
shp_na.to_file(raw_folder/file_name)

### Remove temporary data to save space

In [58]:
if os.path.isdir(download_folder): 
    shutil.rmtree(download_folder)