diff --git a/extract-gis.sh b/extract-gis.sh index a3eef6b..5301d0b 100755 --- a/extract-gis.sh +++ b/extract-gis.sh @@ -425,6 +425,11 @@ case "${geotiff,,}" in call_processing_func "$(dirname $0)/soil_class/soil_class.sh" ;; + # NHM layers + "nhm" | "tgf" | "gf" ) + call_processing_func "$(dirname $0)/nhm/nhm.sh" + ;; + # dataset not included above *) echo "$(basename $0): missing/unknown dataset"; diff --git a/gsde/gsde.sh b/gsde/gsde.sh index 415c389..6332291 100755 --- a/gsde/gsde.sh +++ b/gsde/gsde.sh @@ -275,7 +275,7 @@ if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then tempInstallPath="$cache/r-packages" mkdir -p "$tempInstallPath" export R_LIBS_USER="$tempInstallPath" - + # extract given stats for each variable for var in "${variablesMod[@]}"; do ## build renv and create stats diff --git a/landsat/landsat.sh b/landsat/landsat.sh index f7a25a1..fdd0e4f 100755 --- a/landsat/landsat.sh +++ b/landsat/landsat.sh @@ -176,6 +176,7 @@ logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } # limits # lonLims: comma-delimited longitude # limits +# sourceProj4: the extents projection # # Arguments: # sourceVrt: source vrt file @@ -208,13 +209,14 @@ subset_geotiff () { lonMin="${sortedLons[0]}" lonMax="${sortedLons[1]}" - # subset based on lat/lon - flush to disk at 500MB + # subset based on lat/lon in their given projection - flush to disk at 500MB GDAL_CACHEMAX=500 gdal_translate --config GDAL_CACHEMAX 500 \ - -co COMPRESS="DEFLATE" \ - -co BIGTIFF="YES" \ - -projwin "$lonMin" "$latMax" "$lonMax" "$latMin" "${sourceVrt}" "${destPath}" \ - > /dev/null; + -co COMPRESS="DEFLATE" \ + -co BIGTIFF="YES" \ + -projwin "$lonMin" "$latMax" "$lonMax" "$latMin" "${sourceVrt}" "${destPath}" \ + -projwin_srs "$sourceProj4" \ + > /dev/null; } ####################################### @@ -239,10 +241,12 @@ subset_geotiff () { extract_shapefile_extents () { # local variables local shapefileExtents # ogrinfo output containing ESIR Shapefile extents - local sourceProj4 # projection in proj4 format local leftBottomLims # left bottom coordinates (min lon, min lat) local rightTopLims # top right coordinates (max lon, max lat) + # global variables: + # - $sourceProj4 + # reading arguments local shapefilePath=$1 local destProj4=$2 @@ -283,66 +287,6 @@ extract_shapefile_extents () { latLims="${lowerLeftLims[1]},${upperRightLims[1]}" } -####################################### -# Transform projection based on source -# Proj4 string -# -# Globals: -# None -# -# Arguments: -# sourceProj4: string describing -# source projection -# coords: comma-separated coordinate -# values -# coordsDelim: delimited in the -# $coords variable -# transformDelim: delimtied in the -# transformed values -# destEPSG: EPSG value of the -# destination projection -# (default 'EPSG:4326') -# -# Outputs: -# comma-delimited $coords in the -# $destEPSG format -####################################### -transform_coords () { - # local variables - local sourceProj4="$1" - local coords="$2" - local coordsDelim="$3" - local transformDelim="$4" - local destEPSG="$5" - # local variables assigned in the function - local coordsBlankDel - local coordsTrans - - # if $destEPSG not provided, use EPSG:4326 by default - if [[ -z $destEPSG ]]; then - destEPSG='EPSG:4326' - fi - - # if $coordsDelim not provided, use comma ',' by default - if [[ -z $coordsDelim ]]; then - coordsDelim=',' - fi - - # substituting comma with a blank space - coordsBlankDel=$(echo "$coords" | tr "${coordsDelim}" ' ') - - # transforming coords - coordsTrans=$(echo "$coordsBlankDel" | gdaltransform -s_srs "${sourceProj4}" -t_srs "${destEPSG}" -output_xy) - - # subtitute blank space with $transformDelim value - if [[ -n $transformDelim ]]; then - coordsTrans=$(echo "$coordsTrans" | tr ' ' $transformDelim) - fi - - # echo-ing $coordsTrans variable - echo "${coordsTrans}" -} - # =============== # Data Processing @@ -370,13 +314,13 @@ for var in "${variables[@]}"; do ### check if the $startDate and $endDate variables are set if [[ -z ${startDate} ]] || [[ -z ${endDate} ]]; then echo "$(logDate)$(basename $0): ERROR! Both"' `--start-date` and `--end-date` must be provided' - exit 1; + exit 1; fi # extract the entered years and populate $files array for y in "${validYears[@]}"; do if [[ "$y" -ge "${startDate}" ]] && [[ "$y" -le "${endDate}" ]]; then - files+=("${landcoverPrefix}${y}") - fi + files+=("${landcoverPrefix}${y}") + fi done ;; @@ -385,7 +329,7 @@ for var in "${variables[@]}"; do ### check if the $startDate and $endDate variables are provided if [[ -n ${startDate} ]] || [[ -n ${endDate} ]]; then echo "$(logDate)$(basename $0): WARNING! For land-cover-change, only the difference for the 2010-2015 period is available" - echo "$(logDate)$(basename $0)"': WARNING! The `--start-date` and `--end-date` arguments are ignored for '"${var}" + echo "$(logDate)$(basename $0)"': WARNING! The `--start-date` and `--end-date` arguments are ignored for '"${var}" fi # populate $files array files+=("$landcoverchangeFile") @@ -408,7 +352,7 @@ fi filesComplete=() # populating filesComplete array for f in "${files[@]}"; do - filesComplete+=($(ls -d ${geotiffDir}/${f}* | xargs -n 1 basename)) + filesComplete+=($(ls -d ${geotiffDir}/${f}* | xargs -n 1 basename)) done # extracting the .zip files @@ -432,6 +376,8 @@ rasterProj4="$(gdalsrsinfo "${tempTif}" | grep -e "PROJ.4" | cut -d ':' -f2)" if [[ -n $shapefile ]]; then # create latLims and lonLims variables specifying the limits of the ESRI Shapefile extract_shapefile_extents "${shapefile}" "${rasterProj4}" +else + sourceProj4="EPSG:4326" fi # subset and produce stats if needed @@ -441,7 +387,7 @@ if [[ "${printGeotiff,,}" == "true" ]]; then for tif in "${tiffs[@]}"; do # subset based on lat and lon values - subset_geotiff "${cache}/${tif}" "${outputDir}/${prefix}${tif}" "$latLims" "$lonLims" + subset_geotiff "${cache}/${tif}" "${outputDir}/${prefix}${tif}" "$latLims" "$lonLims" "$sourceProj4" done fi diff --git a/nhm/README.sh b/nhm/README.sh new file mode 100644 index 0000000..13c7076 --- /dev/null +++ b/nhm/README.sh @@ -0,0 +1,31 @@ +# `NHM` Geospatial Dataset +In this file, the necessary technical details of the dataset is explained. + +## Location of the `Soil Grids V1` Dataset Files +The `NHM` geospatial dataset files are located under the following directory accessible from Digital Alliance (formerly Compute Canada) Graham cluster: + +```console +/project/rpp-kshook/Climate_Forcing_Data/geospatial-data/nhm/ # rpp-kshook allocation +/project/def-mclark-ab/data/geospatial-data/nhm/ # def-mclark-ab allocation +``` + +And the structure of the files is as following: + +```console +/project/rpp-kshook/Climate_Forcing_Data/geospatial-data/nhm/ +└── dem.zip +``` + +## Spatial and Temporal Extents +The spatial coverage for each dataset is either the Canadian/American transboundary area, or the entire CONUS. + +## Dataset Variables +This variables of this dataset are detailed in the table below: + +|# |Variable Name (used in `gistool`) |Description |Comments | +|-------|---------------------------------------|---------------------------------------|---------------| +|1 |`dem` | Transboundary 30-m DEM |TGF dataset | + + +> ![WARNING] +> This dataset is being populated and the meta-data provided here will change. diff --git a/nhm/nhm.sh b/nhm/nhm.sh new file mode 100755 index 0000000..7709668 --- /dev/null +++ b/nhm/nhm.sh @@ -0,0 +1,302 @@ +#!/bin/bash +# GIS Data Processing Workflow +# Copyright (C) 2023-2024, University of Calgary +# Copyright (C) 2022-2024, University of Saskatchewan +# +# This file is part of GIS Data Processing Workflow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# ========================= +# Credits and contributions +# ========================= +# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html +# 2. General ideas of GeoTIFF subsetting are taken from https://github.com/CH-Earth/CWARHM +# developed mainly by Wouter Knoben (hence the header copyright credit). See the preprint +# at: https://www.essoar.org/doi/10.1002/essoar.10509195.1 + + +# ================ +# General comments +# ================ +# * All variables are camelCased for distinguishing from function names; +# * function names are all in lower_case with words seperated by underscore for legibility; +# * shell style is based on Google Open Source Projects' +# Style Guide: https://google.github.io/styleguide/shellguide.html + + +# =============== +# Usage Functions +# =============== +short_usage() { + echo "usage: $(basename $0) -cio DIR -v var1[,var2[...]] [-r INT] [-se DATE] [-ln REAL,REAL] [-f PATH] [-F STR] [-t BOOL] [-a stat1[,stat2,[...]] [-q q1[,q2[...]]]] [-p STR] " +} + + +# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT +parsedArguments=$(getopt -a -n nhm -o i:o:v:r:s:e:l:n:f:F:t:a:u:q:p:c:L: --long dataset-dir:,output-dir:,variable:,crs:,start-date:,end-date:,lat-lims:,lon-lims:,shape-file:,fid:,print-geotiff:,stat:,include-na:,quantile:,prefix:,cache:,lib-path: -- "$@") +validArguments=$? +if [ "$validArguments" != "0" ]; then + short_usage; + exit 1; +fi + +# check if no options were passed +if [ $# -eq 0 ]; then + echo "$(basename $0): ERROR! arguments missing"; + exit 1; +fi + +# check long and short options passed +eval set -- "$parsedArguments" +while : +do + case "$1" in + -i | --dataset-dir) geotiffDir="$2" ; shift 2 ;; # required + -o | --output-dir) outputDir="$2" ; shift 2 ;; # required + -v | --variable) variables="$2" ; shift 2 ;; # required + -r | --crs) crs="$2" ; shift 2 ;; # required + -s | --start-date) startDate="$2" ; shift 2 ;; # redundant - added for compatibility + -e | --end-date) endDate="$2" ; shift 2 ;; # redundant - added for compatibility + -l | --lat-lims) latLims="$2" ; shift 2 ;; # required - could be redundant + -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required - could be redundant + -f | --shape-file) shapefile="$2" ; shift 2 ;; # required - could be redundant + -F | --fid) fid="$2" ; shift 2 ;; # optional + -t | --print-geotiff) printGeotiff="$2" ; shift 2 ;; # required + -a | --stat) stats="$2" ; shift 2 ;; # optional + -u | --include-na) includeNA="$2" ; shift 2 ;; # required + -q | --quantile) quantiles="$2" ; shift 2 ;; # optional + -p | --prefix) prefix="$2" ; shift 2 ;; # optional + -c | --cache) cache="$2" ; shift 2 ;; # required + -L | --lib-path) renvCache="$2" ; shift 2 ;; # required + + # -- means the end of the arguments; drop this, and break out of the while loop + --) shift; break ;; + + # in case of invalid option + *) + echo "$(basename $0): ERROR! invalid option '$1'"; + short_usage; exit 1 ;; + esac +done + +# check if $ensemble is provided +if [[ -n "$startDate" ]] || [[ -n "$endDate" ]]; then + echo "$(basename $0): ERROR! redundant argument (time extents) provided"; + exit 1; +fi + +# check the prefix if not set +if [[ -z $prefix ]]; then + prefix="nhm_" +fi + +# parse comma-delimited variables +IFS=',' read -ra variables <<< "${variables}" + + +# ===================== +# Necessary Assumptions +# ===================== +# TZ to be set to UTC to avoid invalid dates due to Daylight Saving +alias date='TZ=UTC date' +# expand aliases for the one stated above +shopt -s expand_aliases + +# necessary hard-coded paths +exactextractrCache="${renvCache}/exact-extract-env" # exactextractr renv cache path +renvPackagePath="${renvCache}/renv_0.16.0.tar.gz" # renv_0.16.0 source path + + +# ========================== +# Necessary Global Variables +# ========================== +# the structure of the original file names are one of the following: +# * %{var}_M_sl%{soil_layer_depth_number}_250m_ll.tif +# * %{var}_250m_ll.tif +# but the user is expected to include complete variable (file) name in the +# `--variable` input argument comma-separated list. + + +# =================== +# Necessary Functions +# =================== +# Modules below available on Compute Canada (CC) Graham Cluster Server +load_core_modules () { + module -q purge + module -q load StdEnv/2020 + module -q load gcc/9.3.0 + module -q load r/4.1.2 + module -q load gdal/3.0.4 + module -q load udunits/2.2.28 + module -q load geos/3.10.2 + module -q load proj/9.0.0 +} +load_core_modules + + +# ================= +# Useful One-liners +# ================= +# sorting a comma-delimited string of real numbers +sort_comma_delimited () { IFS=',' read -ra arr <<< "$*"; echo ${arr[*]} | tr " " "\n" | sort -n | tr "\n" " "; } + +# log date format +logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; } + + +####################################### +# subset GeoTIFFs +# +# Globals: +# latLims: comma-delimited latitude +# limits +# lonLims: comma-delimited longitude +# limits +# +# Arguments: +# sourceVrt: source vrt file +# destPath: destionation path (inclu- +# ding file name) +# +# Outputs: +# one mosaiced (merged) GeoTIFF under +# the $destDir +####################################### +subset_geotiff () { + # local variables + local latMin + local latMax + local lonMin + local lonMax + local sortedLats + local sortedLons + # reading arguments + local sourceVrt="$1" + local destPath="$2" + + # extracting minimum and maximum of latitude and longitude respectively + ## latitude + sortedLats=($(sort_comma_delimited "$latLims")) + latMin="${sortedLats[0]}" + latMax="${sortedLats[1]}" + ## longitude + sortedLons=($(sort_comma_delimited "$lonLims")) + lonMin="${sortedLons[0]}" + lonMax="${sortedLons[1]}" + + # subset based on lat/lon - flush to disk at 500MB + GDAL_CACHEMAX=500 + gdal_translate --config GDAL_CACHEMAX 500 \ + -co COMPRESS="DEFLATE" \ + -co BIGTIFF="YES" \ + -projwin $lonMin $latMax $lonMax $latMin "${sourceVrt}" "${destPath}" \ + > /dev/null; +} + + +# =============== +# Data Processing +# =============== +# display info +echo "$(logDate)$(basename $0): processing USGS-NHM GeoTIFF(s)..." + +# make the output directory +echo "$(logDate)$(basename $0): creating output directory under $outputDir" +mkdir -p "$outputDir" # making the output directory +mkdir -p "$cache" # making the cache directory + +# if shapefile is provided extract the extents from it +if [[ -n $shapefile ]]; then + # extract the shapefile extent + IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)" + # transform the extents in case they are not in EPSG:4326 + IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value + if [[ -n $sourceProj4 ]]; then + : + else + echo "$(logDate)$(basename $0): WARNING! Assuming WSG84 CRS for the input ESRI shapefile" + sourceProj4=("PROJ.4" " +proj=longlat +datum=WGS84 +no_defs") # made an array for compatibility with the following statements + fi + + # transform limits and assing to variables + IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy) + # define $latLims and $lonLims from $shapefileExtents + lonLims="${leftBottomLims[0]},${rightTopLims[0]}" + latLims="${leftBottomLims[1]},${rightTopLims[1]}" +fi + +# extracting contents of the .zip file +for var in "${variables[@]}"; do + # create temporary directories for each variable + mkdir -p "$cache/$var" + # unzip + unzip "$geotiffDir/${var}.zip" -d "$cache/${var}/" + # build vrt + gdalbuildvrt "${cache}/${var}.vrt" ${cache}/${var}/*.tif -resolution highest -sd 1 > /dev/null +done + +# subset and produce stats if needed +if [[ "$printGeotiff" == "true" ]]; then + echo "$(logDate)$(basename $0): subsetting GeoTIFFs under $outputDir" + for var in "${variables[@]}"; do + # subset based on lat and lon values + subset_geotiff "${cache}/${var}.vrt" "${outputDir}/${prefix}${var}.tif" + done +fi + +## make R renv project directory +if [[ -n "$shapefile" ]] && [[ -n $stats ]]; then + echo "$(logDate)$(basename $0): Extracting stats under $outputDir" + mkdir -p "$cache/r-virtual-env/" + ## make R renv in $cache + virtualEnvPath="$cache/r-virtual-env/" + cp "$(dirname $0)/../assets/renv.lock" "$virtualEnvPath" + ## load necessary modules - excessive, mainly for clarification + load_core_modules + + ## make the temporary directory for installing r packages + tempInstallPath="$cache/r-packages" + mkdir -p "$tempInstallPath" + export R_LIBS_USER="$tempInstallPath" + + # extract given stats for each variable + for var in "${variables[@]}"; do + ## build renv and create stats + Rscript "$(dirname $0)/../assets/stats.R" \ + "$tempInstallPath" \ + "$exactextractrCache" \ + "$renvPackagePath" \ + "$virtualEnvPath" \ + "$virtualEnvPath" \ + "${virtualEnvPath}/renv.lock" \ + "${cache}/${var}.vrt" \ + "$shapefile" \ + "$outputDir/${prefix}stats_${var}.csv" \ + "$stats" \ + "$includeNA" \ + "$quantiles" \ + "$fid" >> "${outputDir}/${prefix}stats_${var}.log" 2>&1; + done +fi + +# remove unnecessary files +mkdir "$HOME/empty_dir" +echo "$(logDate)$(basename $0): deleting temporary files from $cache" +rsync --quiet -aP --delete "$HOME/empty_dir/" "$cache" +rm -r "$cache" +echo "$(logDate)$(basename $0): temporary files from $cache are removed" +echo "$(logDate)$(basename $0): results are produced under $outputDir" +