r.futures: update to version 2.3.0 (#714)

* add missing snow package for running CI
OSGeo · Mar 16, 2022 · 240e15b · 240e15b
1 parent 89c2066
commit 240e15b
Show file tree

Hide file tree

Showing 9 changed files with 151 additions and 45 deletions.
diff --git a/.github/workflows/install_r_packages.R b/.github/workflows/install_r_packages.R
@@ -1,2 +1,2 @@
 #!/usr/bin/env Rscript
-install.packages(c("MuMIn", "lme4", "optparse"))
+install.packages(c("MuMIn", "lme4", "optparse", "snow"))
diff --git a/src/raster/r.futures/r.futures.calib/r.futures.calib.py b/src/raster/r.futures/r.futures.calib/r.futures.calib.py
@@ -283,14 +283,52 @@
 import atexit
 import numpy as np
 from io import StringIO
-from multiprocessing import Process, Queue
+from multiprocessing import Process, Queue, Pool
 
 import grass.script.core as gcore
 import grass.script.raster as grast
 import grass.script.utils as gutils
 from grass.exceptions import CalledModuleError
 
 
+try:
+    from grass.script.utils import append_random
+except ImportError:
+    import random
+    import string
+
+    def append_random(name, suffix_length=None, total_length=None):
+        """Add a random part to of a specified length to a name (string)
+        >>> append_random("tmp", 8)
+        >>> append_random("tmp", total_length=16)
+        ..note::
+            This function is copied from grass79.
+        """
+        if suffix_length and total_length:
+            raise ValueError(
+                "Either suffix_length or total_length can be provided, not both"
+            )
+        if not suffix_length and not total_length:
+            raise ValueError("suffix_length or total_length has to be provided")
+        if total_length:
+            # remove len of name and one underscore
+            name_length = len(name)
+            suffix_length = total_length - name_length - 1
+            if suffix_length <= 0:
+                raise ValueError(
+                    "No characters left for the suffix:"
+                    " total_length <{total_length}> is too small"
+                    " or name <{name}> ({name_length}) is too long".format(**locals())
+                )
+        # We don't do lower and upper case because that could cause conflicts in
+        # contexts which are case-insensitive.
+        # We use lowercase because that's what is in UUID4 hex string.
+        allowed_chars = string.ascii_lowercase + string.digits
+        # The following can be shorter with random.choices from Python 3.6.
+        suffix = "".join(random.choice(allowed_chars) for _ in range(suffix_length))
+        return "{name}_{suffix}".format(**locals())
+
+
 TMP = []
 
 
@@ -482,8 +520,40 @@ def new_development(development_end, development_diff):
     )
 
 
-def patch_analysis_per_subregion(
-    development_diff, subregions, threshold, tmp_clump, tmp_clump_cat
+def analyse_subregion(params):
+    tmp_clump_cat, subregions, cat, clump, threshold = params
+    grast.mapcalc(
+        "{new} = if ({reg} == {cat}, {clump}, null())".format(
+            new=tmp_clump_cat, reg=subregions, cat=cat, clump=clump
+        ),
+        overwrite=True,
+    )
+    env = os.environ.copy()
+    env["GRASS_REGION"] = gcore.region_env(zoom=tmp_clump_cat)
+    try:
+        data = gcore.read_command(
+            "r.object.geometry",
+            input=tmp_clump_cat,
+            flags="m",
+            separator="comma",
+            env=env,
+            quiet=True,
+        ).strip()
+        data = np.loadtxt(StringIO(data), delimiter=",", usecols=(1, 2), skiprows=1)
+        # in case there is just one record
+        data = data.reshape((-1, 2))
+        return data[data[:, 0] > threshold]
+    except CalledModuleError:
+        gcore.warning(
+            "Subregion {cat} has no changes in development, no patches found.".format(
+                cat=cat
+            )
+        )
+        return np.empty([0, 2])
+
+
+def patch_analysis_per_subregion_parallel(
+    development_diff, subregions, threshold, tmp_clump, tmp_name, nprocs
 ):
     gcore.run_command(
         "r.clump", input=development_diff, output=tmp_clump, overwrite=True, quiet=True
@@ -493,36 +563,16 @@ def patch_analysis_per_subregion(
         .strip()
         .splitlines()
     )
-    subregions_data = {}
-    env = os.environ.copy()
+    params = []
+    toremove = []
     for cat in cats:
-        grast.mapcalc(
-            "{new} = if ({reg} == {cat}, {clump}, null())".format(
-                new=tmp_clump_cat, reg=subregions, cat=cat, clump=tmp_clump
-            ),
-            overwrite=True,
-        )
-        env["GRASS_REGION"] = gcore.region_env(zoom=tmp_clump_cat)
-        try:
-            data = gcore.read_command(
-                "r.object.geometry",
-                input=tmp_clump_cat,
-                flags="m",
-                separator="comma",
-                env=env,
-                quiet=True,
-            ).strip()
-            data = np.loadtxt(StringIO(data), delimiter=",", usecols=(1, 2), skiprows=1)
-            # in case there is just one record
-            data = data.reshape((-1, 2))
-            subregions_data[cat] = data[data[:, 0] > threshold]
-        except CalledModuleError:
-            gcore.warning(
-                "Subregion {cat} has no changes in development, no patches found.".format(
-                    cat=cat
-                )
-            )
-            subregions_data[cat] = np.empty([0, 2])
+        tmp_clump_cat = append_random(tmp_name, suffix_length=8)
+        toremove.append(tmp_clump_cat)
+        params.append((tmp_clump_cat, subregions, cat, tmp_clump, threshold))
+    with Pool(processes=nprocs) as pool:
+        results = pool.map_async(analyse_subregion, params).get()
+    subregions_data = dict(zip(cats, results))
+    gcore.run_command("g.remove", type="raster", flags="f", name=toremove, quiet=True)
     return subregions_data
 
 
@@ -670,6 +720,7 @@ def main():
     dev_start = options["development_start"]
     dev_end = options["development_end"]
     only_file = flags["l"]
+    nprocs = int(options["nprocs"])
     patches_per_subregion = flags["s"]
     if not only_file:
         repeat = int(options["repeat"])
@@ -709,8 +760,13 @@ def main():
     diff_development(dev_start, dev_end, options["subregions"], orig_patch_diff)
     data = write_data = patch_analysis(orig_patch_diff, threshold, tmp_clump)
     if patches_per_subregion:
-        subregions_data = patch_analysis_per_subregion(
-            orig_patch_diff, options["subregions"], threshold, tmp_clump, tmp_cat_clump
+        subregions_data = patch_analysis_per_subregion_parallel(
+            orig_patch_diff,
+            options["subregions"],
+            threshold,
+            tmp_clump,
+            tmp_name,
+            nprocs,
         )
         # if there is just one column, write the previous analysis result
         if len(subregions_data.keys()) > 1:
@@ -750,7 +806,6 @@ def main():
     )  # to get percentage for readability
 
     seed = int(options["random_seed"])
-    nprocs = int(options["nprocs"])
     count = 0
     proc_count = 0
     queue_list = []

diff --git a/src/raster/r.futures/r.futures.calib/testsuite/test_r_futures_calib.py b/src/raster/r.futures/r.futures.calib/testsuite/test_r_futures_calib.py
@@ -116,6 +116,7 @@ def test_pga_calib_library_subregions(self):
             patch_threshold=0,
             subregions="zipcodes",
             patch_sizes="data/out_library_subregion.csv",
+            nprocs=8,
         )
         self.assertTrue(
             filecmp.cmp(

diff --git a/src/raster/r.futures/r.futures.devpressure/r.futures.devpressure.py b/src/raster/r.futures/r.futures.devpressure/r.futures.devpressure.py
@@ -59,6 +59,8 @@
 # % required: no
 # % answer: 1
 # %end
+# %option G_OPT_M_NPROCS
+# %end
 # %flag
 # % key: n
 # % description: Do not propagate nulls
@@ -75,6 +77,8 @@
 import grass.script.core as gcore
 import grass.script.utils as gutils
 import grass.script.raster as grast
+from grass.script import task as gtask
+
 
 TMPFILE = None
 TMP = []
@@ -86,6 +90,11 @@ def cleanup():
     gutils.try_remove(TMPFILE)
 
 
+def module_has_parameter(module, parameter):
+    task = gtask.command_info(module)
+    return parameter in [each["name"] for each in task["params"]]
+
+
 def main():
     size = int(options["size"])
     gamma = scale = None
@@ -154,7 +163,12 @@ def main():
     with open(path, "w") as f:
         f.write(write_filter(matrix))
     gcore.message(_("Running development pressure filter..."))
-    gcore.run_command("r.mfilter", input=rmfilter_inp, output=rmfilter_out, filter=path)
+    params = {}
+    if module_has_parameter("r.mfilter", "nprocs"):
+        params["nprocs"] = options["nprocs"]
+    gcore.run_command(
+        "r.mfilter", input=rmfilter_inp, output=rmfilter_out, filter=path, **params
+    )
 
     if flags["n"]:
         gcore.run_command(

diff --git a/src/raster/r.futures/r.futures.devpressure/testsuite/test_r_futures_devpressure.py b/src/raster/r.futures/r.futures.devpressure/testsuite/test_r_futures_devpressure.py
@@ -45,6 +45,7 @@ def test_devpressure_run(self):
             method="gravity",
             size=15,
             flags="n",
+            nprocs=2,
         )
         self.assertRastersNoDifference(
             actual=self.output, reference=self.result, precision=1e-6

diff --git a/src/raster/r.futures/r.futures.html b/src/raster/r.futures/r.futures.html
@@ -228,4 +228,4 @@ <h2>AUTHORS</h2>
 <p>
 <em>Development pressure, demand and calibration and preprocessing modules:</em><br>
 
-Anna Petrasova, <a href="https://geospatial.ncsu.edu/geoforall/">NCSU GeoForAll</a><br>
+Anna Petrasova, <a href="https://geospatial.ncsu.edu/geoforall/">NCSU GeoForAll</a>
diff --git a/src/raster/r.futures/r.futures.pga/r.futures.pga.html b/src/raster/r.futures/r.futures.pga/r.futures.pga.html
@@ -157,7 +157,6 @@ <h2>AUTHORS</h2>
 Anna Petrasova, <a href="https://geospatial.ncsu.edu/geoforall/">NCSU GeoForAll</a><br>
 Vaclav Petras, <a href="https://geospatial.ncsu.edu/geoforall/">NCSU GeoForAll</a><br>
 
-
 <!--
 <p>
 <i>Last changed: $Date: 2015-10-20 02:18:21 -0400 (Tue, 20 Oct 2015) $</i>

diff --git a/src/raster/r.futures/r.futures.potential/r.futures.potential.py b/src/raster/r.futures/r.futures.potential/r.futures.potential.py
@@ -8,7 +8,7 @@
 #
 # PURPOSE:      FUTURES Potential submodel
 #
-# COPYRIGHT:    (C) 2016-2020 by the GRASS Development Team
+# COPYRIGHT:    (C) 2016-2021 by the GRASS Development Team
 #
 #               This program is free software under the GNU General Public
 #               License (>=v2). Read the file COPYING that comes with GRASS
@@ -46,6 +46,13 @@
 # %end
 # %option
 # % type: string
+# % key: random_column
+# % description: Random effect predictor
+# % required: no
+# % multiple: no
+# %end
+# %option
+# % type: string
 # % key: fixed_columns
 # % description: Predictor columns that will be used for all models when dredging
 # % required: no
@@ -118,6 +125,7 @@
   make_option(c("-x","--maximum"), action="store", default=NA, type='integer', help="maximum number of variables for dredge"),
   make_option(c("-n","--nprocs"), action="store", default=1, type='integer', help="number of processes for dredge"),
   make_option(c("-f","--fixed"), action="store", default=NA, type='character', help="fixed predictors for dredge"),
+  make_option(c("-a","--random"), action="store", default=NA, type='character', help="random effect predictor"),
   make_option(c("-e","--export_dredge"), action="store", default=NA, type='character', help="output CSV file of all models (when using dredge)")
 )
 opt = parse_args(OptionParser(option_list=option_list))
@@ -138,14 +146,21 @@
 }
 if (!is.na(opt$level)) {
     predictors <- predictors[predictors != opt$level]
+    if (!is.na(opt$random)) {
+        predictors <- predictors[predictors != opt$random]
+    }
 }
 predictors <- predictors[predictors != opt$response]
 
 if (is.na(opt$level)) {
     fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors), collapse= "+")))
     model = glm(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
 } else {
-    interc <- paste("(1|", opt$level, ")")
+    if (is.na(opt$random)) {
+        interc <- paste("(1|", opt$level, ")")
+    } else {
+        interc <- paste("(", opt$random, "|", opt$level, ")")
+    }
     fmla <- as.formula(paste(opt$response, " ~ ", paste(c(predictors, interc), collapse= "+")))
     model = glmer(formula=fmla, family = binomial, data=input_data, na.action = "na.fail")
 }
@@ -161,7 +176,11 @@
             fixed <- paste(" ~ ",  fixed)
         }
         else {
-            fixed <- paste(" ~ ",  fixed, " + ", paste("(1|", opt$level, ")", sep=""))
+            if (is.na(opt$random)) {
+                fixed <- paste(" ~ ",  fixed, " + ", paste("(1|", opt$level, ")", sep=""))
+            } else {
+                fixed <- paste(" ~ ",  fixed, " + ", paste("(", opt$random, "|", opt$level, ")", sep=""))
+            }
         }
         fmla_fixed <- as.formula(fixed)
     }
@@ -213,6 +232,7 @@ def main():
     columns = options["columns"].split(",")
     binary = options["developed_column"]
     level = options["subregions_column"]
+    random = options["random_column"]
     sep = gutils.separator(options["separator"])
     minim = int(options["min_variables"])
     dredge = flags["d"]
@@ -265,6 +285,10 @@ def main():
     columns += [binary]
     if include_level:
         columns += [level]
+    if random:
+        columns += [random]
+    # filter duplicates
+    columns = list(dict.fromkeys(columns))
     where = "{c} IS NOT NULL".format(c=columns[0])
     for c in columns[1:]:
         where += " AND {c} IS NOT NULL".format(c=c)
@@ -304,6 +328,8 @@ def main():
     ]
     if include_level:
         cmd += ["-l", level]
+        if random:
+            cmd += ["-a", random]
     if dredge and fixed_columns:
         cmd += ["-f", ",".join(fixed_columns)]
     if dredge and options["dredge_output"]:
@@ -319,16 +345,26 @@ def main():
     gscript.info("-------------------------")
     gscript.message(gscript.decode(stdout))
 
+    # note: this would be better with pandas, but adds dependency
     with open(TMP_POT, "r") as fin, open(options["output"], "w") as fout:
         i = 0
         for line in fin.readlines():
             row = line.strip().split("\t")
             row = [each.strip('"') for each in row]
             if i == 0:
                 row[0] = "ID"
+                if include_level and random:
+                    row[2] = row[1]
                 row[1] = "Intercept"
             if i == 1 and not include_level:
                 row[0] = single_level
+            if i >= 1:
+                if include_level:
+                    # devpressure needs to be after intercept
+                    if random:
+                        row[2], row[1] = row[1], row[2]
+                else:
+                    row[0] = single_level
             fout.write(sep.join(row))
             fout.write("\n")
             i += 1

diff --git a/src/raster/r.futures/r.futures.potsurface/r.futures.potsurface.html b/src/raster/r.futures/r.futures.potsurface/r.futures.potsurface.html
@@ -18,9 +18,9 @@ <h2>EXAMPLES</h2>
 r.futures.potsurface input=potential.csv subregions=counties output=pot_surface
 </pre></div>
 <center>
-<img src="r_futures_potsurface.png" alt="Illustration of a potential
-surface in 3D, with a raster of the developed cells (in red) and 
-undeveloped cells (in green) draped onto the surface.">
+    <img src="r_futures_potsurface.png" alt="Illustration of a potential
+    surface in 3D, with a raster of the developed cells (in red) and
+    undeveloped cells (in green) draped onto the surface.">
 <p>
 Figure: We can visualize the potential surface in 3D and drape raster
 representing developed (red) and undeveloped (green) cells over it.