# Download and process soil data into single geotiffs at native resolution
Code based on: https://git.wur.nl/isric/soilgrids/soilgrids.notebooks/-/blob/master/markdown/webdav_from_Python.md

In [1]:
from osgeo import gdal,ogr,osr
import sys
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import python_cs_functions as cs

### Config handling                                                                                        

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [49]:
# Get the required info from the config file
data_path            = cs.read_from_config(config_file,'data_path')
geospatial_temp_path = cs.read_from_config(config_file,'geospatial_temp_path')
soil_path            = cs.read_from_config(config_file,'soil_path')
soil_url             = cs.read_from_config(config_file,'soil_url')
download_area        = cs.read_from_config(config_file,'geospatial_area')

### Download data

In [52]:
download_folder = Path(data_path) / geospatial_temp_path / 'soilgrids' / 'download'

In [53]:
download_folder.mkdir(parents=True, exist_ok=True)

In [54]:
download_coordinates = geospatial_coordinates_to_download_coordinates(download_area, 'soilgrids')

Returning coordinates as type <class 'tuple'> for use with soilgrids download code.


In [66]:
bb = (-337500.000,1242500.000,152500.000,527500.000)

In [63]:
# General settings
igh = "+proj=igh +lat_0=0 +lon_0=0 +datum=WGS84 +units=m +no_defs" # proj string for Homolosine projection
res = 250 
sg_url = f"/vsicurl?max_retry=3&retry_delay=1&list_dir=no&url={soil_url}"

In [71]:
kwargs = {'format': 'GTiff', 'projWin': bb, 'projWinSRS': igh, 'xRes': res, 'yRes': res, 'creationOptions': ["TILED=YES", "COMPRESS=DEFLATE", "PREDICTOR=2", "BIGTIFF=YES"]}

file = str( './crop_roi_igh_py.tif' )
ds = gdal.Translate(file, 
                    '/vsicurl?max_retry=3&retry_delay=1&list_dir=no&url=https://files.isric.org/soilgrids/latest/data/ocs/ocs_0-30cm_mean.vrt', 
                    **kwargs)
del ds

In [69]:
file

'C:\\Globus endpoint\\CAMELS_spat\\geospatial_temp\\soilgrids\\download\\crop_roi_igh_py.tif'

### Reproject and save as GeoTIFF

In [59]:
download_folder

WindowsPath('C:/Globus endpoint/CAMELS_spat/geospatial_temp/soilgrids/download')

### Functions

In [27]:
import numpy as np
from pyproj import Transformer

In [43]:
def geospatial_coordinates_to_download_coordinates(coords, product):

    '''Converts general download coodinates (lon_min, lon_max,lat_min,lat_max) to the data-specific ones'''

    # Store coordinates as floats in individual variables
    coords = coords.split(',')
    domain_min_lon = np.array(float(coords[0]))
    domain_max_lon = np.array(float(coords[1]))
    domain_min_lat = np.array(float(coords[2]))
    domain_max_lat = np.array(float(coords[3]))

    # Round, if necessary
    if product.lower() == 'merit':
        
        # Download edge values
        lon_left_edge   = np.array([-180,-150,-120,-90,-60,-30, 0,30,60, 90,120,150])
        lat_bottom_edge = np.array([-60,-30,0, 30,60]) # NOTE: latitudes -90 to -60 are NOT part of the MERIT domain

        # Indices if closest lowest
        lon_min_i = np.where(lon_left_edge <= domain_min_lon)[0]
        lon_max_i = np.where(lon_left_edge <= domain_max_lon)[0]
        lat_min_i = np.where(lat_bottom_edge <= domain_min_lat)[0]
        lat_max_i = np.where(lat_bottom_edge <= domain_max_lat)[0]

        # Convert to coordinate output (string)
        out = f'{lon_left_edge[lon_min_i[-1]]},{lon_left_edge[lon_max_i[-1]]},{lat_bottom_edge[lat_min_i[-1]]},{lat_bottom_edge[lat_max_i[-1]]}'

    elif product.lower() == 'soilgrids':

        # Define the CRSs
        src_proj = 'epsg:4326' # Regular lat/lon
        des_proj = '+proj=igh +lat_0=0 +lon_0=0 +datum=WGS84 +units=m +no_defs' # Homolosine

        # Convert EPSG:4326 coordinates to Homolosine that Soilgrids uses
        transformer  = Transformer.from_crs(src_proj, des_proj)
        top_left     = transformer.transform(domain_max_lat, domain_min_lon)
        bottom_right = transformer.transform(domain_min_lat, domain_max_lon)

        # Return in format that's good to go for downloading (tuple)
        out = (top_left[0], top_left[1], bottom_right[0], bottom_right[1])
        
    else:
        print(f'WARNING: geospatial_coordinates_to_download_coordinates(): no code found to process {product}. Returning input as output.')
        out = coords

    print(f'Returning coordinates as type {type(out)} for use with {product} download code.')
    return out

### Download data

In [29]:
usr,pwd = cs.read_merit_credentials()

In [30]:
merit_lon,merit_lat = cs.convert_coordinates_to_merit_download_lists(download_coordinates)

In [34]:
download_folder = Path(data_path) / geospatial_temp_path / 'merit' / 'download'

In [36]:
download_folder.mkdir(parents=True, exist_ok=True)

In [39]:
for dl_lon in merit_lon:
    for dl_lat in merit_lat:
        file_url = merit_url.format(dl_lat,dl_lon) # Replace placeholder values in url with download coordinates
        cs.download_merit_hydro_grid(file_url, usr, pwd, download_folder)

Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n00w180.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n30w180.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n60w180.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n30w150.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n60w150.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n00w120.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n30w120.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n60w120.tar
Successfully downloaded http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro/distribute/v1.0/elv_n00w090.tar
Successfully downloaded http

### Unzip

In [41]:
# Find the file names
files = [ f for f in os.listdir(download_folder) if os.path.isfile(
            os.path.join(download_folder,f))]

In [43]:
for file in files:
    tar_file = tarfile.open(download_folder/file)
    tar_file.extractall(download_folder)
    tar_file.close()
    os.remove(download_folder/file)

### Merge

In [44]:
# Define the output file
merged_file = 'merit_hydro_elv.tif'

In [45]:
# Find the file names
all_files = []
for dir_path, dir_names, file_names in os.walk(download_folder):
    for file_name in file_names:
        if file_name.endswith('.tif'): # ensure we don't accidentally get .aux files from QGIS or something similar
            all_files.append(os.path.join(dir_path,file_name))

In [47]:
merged_folder = Path(data_path) / geospatial_temp_path / 'merit' / 'raw'
merged_folder.mkdir(parents=True, exist_ok=True)

In [54]:
# Convert subsetting area into a usable GDAL setting
# subset_area = [lon_min, lon_max, lat_min, lat_max]
# GDAL window = [ulx, uly, lrx, lry]; [upper left x, upper left y, lower right x, lower right y]
# Mapping:
#   ulx = lon_min = subset_area[0]
#   uly = lat_max = subset_area[3]
#   lrx = lon_max = subset_area[1]
#   lry = lat_min = subset_area[2]
subset_coor = download_area.split(',')
window = [subset_coor[0], subset_coor[3], subset_coor[1], subset_coor[2]]

In [56]:
cs.merge_merit_downloads_into_area_of_interest(all_files, str(merged_folder/merged_file), window)

### Delete the individual GeoTIFF files to save space

In [63]:
if os.path.isdir(download_folder): 
    shutil.rmtree(download_folder)