# 2. Resample
We want all GeoTIFFs at the same resolution to ensure we can process everything easily.

In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path().absolute().parent))
import _functions as pmf

In [2]:
# Specify where the config file can be found
config_file = '../0_config/config.txt'

In [3]:
# Get the required info from the config file
raw_path = pmf.read_from_config(config_file,'raw_path')
data_path = pmf.read_from_config(config_file,'data_path')
x_res = pmf.read_from_config(config_file,'x_res')
y_res = pmf.read_from_config(config_file,'y_res')
domain = pmf.read_from_config(config_file,'domain')

## 2.1 Find all GeoTIFF file paths
These are in their native resolution.

In [4]:
import os

In [5]:
# Find the geotiffs and pre-filter everything not WorldClim
contents = []
for root, dirs, files in os.walk(raw_path):
    if files:
        for file in files:
            if file.lower().endswith('.tif'):
                contents.append(os.path.join(root,file))

In [6]:
# Remove the extra LAI entries for the moment
#  This is unfortunately necessary because we're rerunning those
lai_indices = [i for i, path in enumerate(contents) if '/lai/raw/' in path]
file_paths = [content for i,content in enumerate(contents) if i not in lai_indices]

## 2.2 Resample GeoTIFFs to a single resolution

In [7]:
from osgeo import gdal, gdalconst

In [8]:
# Define the output location
main_path = Path(data_path) / 'data' / 'geotiff_same_resolution'
main_path.mkdir(exist_ok=True, parents=True)

In [9]:
# Format the resampling window
window = [domain.split(',')[0], # minX: -179.5
          domain.split(',')[2], # minY:    5
          domain.split(',')[1], # maxX: - 50
          domain.split(',')[3]] # maxY:   85

In [10]:
# Define the resampling method dictionary: 
#  Distinguishes between continuous (average values) and discrete (nearest neighbor) data
resample_method_dict = {'forest_height' : 'average',
                        'glclu2019'     : 'nearest',
                        'lai'           : 'average',
                        'lgrip30'       : 'nearest',
                        'merit'         : 'average',
                        'pelletier'     : 'average', # 5/6 data sets are continuous but ...
                        'pelletier_mask': 'nearest', # land mask is discrete
                        'soilgrids'     : 'average',
                        'worldclim'     : 'average'}

In [12]:
def resample_geotiff(in_file, outfile, window=None, x_res=None, y_res=None, method=None):

    # Set resampling algorithm
    if method.lower() == 'average':
        resample = gdal.GRA_Average
    elif method.lower() == 'nearest':
        resample = gdal.GRA_NearestNeighbour
    else:
        print(f'WARNING: resample_geotiff(): resampling method {method} not implemented. Defaulting to GRA_Average.')
        resample = gdal.GRA_Average

    # Define the options
    options = gdal.WarpOptions(format = 'VRT',
                               xRes = x_res, # same units as CRS
                               yRes = y_res, # same units as CRS
                               outputBounds = window, # same units as CRS [minX, minY, maxX, maxY]
                               resampleAlg = resample)

    # Load the input data
    src_dataset = gdal.Open(in_file, gdalconst.GA_ReadOnly)
    
    # Do the resampling
    vrt = gdal.Warp('', src_dataset, options=options)

    # Write the VRT to file with translate, so we can use compression
    tif_options = gdal.TranslateOptions(format='GTiff', creationOptions=['COMPRESS=DEFLATE','BIGTIFF=YES'])
    gdal.Translate(outfile, vrt, options=tif_options).FlushCache()

    # Flush cache
    vrt = None
    src_dataset = None

    # Close the input data
    src_dataset = None

### 2.2.1 Processing

In [13]:
gdal.UseExceptions() # Ensure we see it if something goes wrong

In [None]:
for file in file_paths[1:]:

    # Find the main folder name - we need this to find the resampling method
    path_segments = file.split('/')
    data_name = path_segments[5] # Assumes '/Users/usr/data/NorthAmerica_geospatial/product/..'

    # Define the output locations
    out_path = main_path / data_name
    out_path.mkdir(exist_ok=True, parents=True)
    out_file = str( out_path / os.path.basename(file).replace('.tif','_resampled.tif') )

    # Resume after interrupts
    if os.path.isfile(out_file):
        print(f'Resampling {os.path.basename(file)} already processed. Skipping to next.')
        continue
    
    # Check for the Pelletier special case
    if (data_name == 'pelletier') and ('land_cover_mask' in os.path.basename(file)):
        data_name = 'pelletier_mask'

    # Find the resampling method
    method = resample_method_dict[data_name]

    # Resample
    print(f'Resampling {os.path.basename(file)} with method = {method}')
    resample_geotiff(file, out_file, window=window, x_res=x_res, y_res=y_res, method=method)

Resampling forest_height_2020.tif with method = average
Resampling upland_hill-slope_soil_thickness.tif with method = average
Resampling upland_hill-slope_regolith_thickness.tif with method = average
Resampling average_soil_and_sedimentary-deposit_thickness.tif with method = average
Resampling hill-slope_valley-bottom.tif with method = average
Resampling land_cover_mask.tif with method = nearest
Resampling upland_valley-bottom_and_lowland_sedimentary_deposit_thickness.tif with method = average
Resampling wc2.1_30s_prec_09.tif with method = average
Resampling wc2.1_30s_prec_08.tif with method = average
