t.rast.univar: Add support for percentiles (#3039)

* extended statistics * add back accidentally removed method
OSGeo · Jun 21, 2023 · 51e962a · 51e962a
1 parent 84fc38f
commit 51e962a
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 12 deletions.
diff --git a/python/grass/temporal/univar_statistics.py b/python/grass/temporal/univar_statistics.py
@@ -96,7 +96,13 @@ def compute_univar_stats(registered_map_info, stats_module, fs, rast_region=Fals
         string += f'{fs}{stats["n"]}'
         if "median" in stats:
             string += f'{fs}{stats["first_quartile"]}{fs}{stats["median"]}'
-            string += f'{fs}{stats["third_quartile"]}{fs}{stats["percentile_90"]}'
+            string += f'{fs}{stats["third_quartile"]}'
+            if stats_module.inputs.percentile:
+                for perc in stats_module.inputs.percentile:
+                    perc_value = stats[
+                        f"percentile_{str(perc).rstrip('0').rstrip('.').replace('.','_')}"
+                    ]
+                    string += f"{fs}{perc_value}"
         string += eol
     return string
 
@@ -111,6 +117,7 @@ def print_gridded_dataset_univar_statistics(
     fs="|",
     rast_region=False,
     zones=None,
+    percentile=None,
     nprocs=1,
 ):
     """Print univariate statistics for a space time raster or raster3d dataset
@@ -120,6 +127,7 @@ def print_gridded_dataset_univar_statistics(
     :param output: Name of the optional output file, if None stdout is used
     :param where: A temporal database where statement
     :param extended: If True compute extended statistics
+    :param percentile: List of percentiles to compute
     :param no_header: Suppress the printing of column names
     :param fs: Field separator
     :param nprocs: Number of cores to use for processing
@@ -148,7 +156,7 @@ def print_gridded_dataset_univar_statistics(
         dbif.close()
         err = "Space time %(sp)s dataset <%(i)s> is empty"
         if where:
-            err += " or where condition is wrong"
+            err += " or where condition does not return any maps"
         gs.fatal(
             _(err) % {"sp": sp.get_new_map_instance(None).get_type(), "i": sp.get_id()}
         )
@@ -177,7 +185,14 @@ def print_gridded_dataset_univar_statistics(
             ]
         )
         if extended is True:
-            cols.extend(["first_quartile", "median", "third_quartile", "percentile_90"])
+            cols.extend(["first_quartile", "median", "third_quartile"])
+            if percentile:
+                cols.extend(
+                    [
+                        f"percentile_{str(perc).rstrip('0').rstrip('.').replace('.','_')}"
+                        for perc in percentile
+                    ]
+                )
         string = fs.join(cols)
 
         if output is None:
@@ -197,6 +212,7 @@ def print_gridded_dataset_univar_statistics(
         "r.univar" if type == "strds" else "r3.univar",
         flags=flag,
         zones=zones,
+        percentile=percentile,
         stdout_=PIPE,
         run_=False,
     )

diff --git a/temporal/t.rast.univar/t.rast.univar.py b/temporal/t.rast.univar/t.rast.univar.py
@@ -2,11 +2,11 @@
 
 ############################################################################
 #
-# MODULE:	t.rast.univar
-# AUTHOR(S):	Soeren Gebbert
+# MODULE:    t.rast.univar
+# AUTHOR(S):    Soeren Gebbert
 #
-# PURPOSE:	Calculates univariate statistics from the non-null cells for each registered raster map of a space time raster dataset
-# COPYRIGHT:	(C) 2011-2017, Soeren Gebbert and the GRASS Development Team
+# PURPOSE:    Calculates univariate statistics from the non-null cells for each registered raster map of a space time raster dataset
+# COPYRIGHT:    (C) 2011-2017, Soeren Gebbert and the GRASS Development Team
 #
 #  This program is free software; you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
@@ -46,6 +46,15 @@
 # % required: no
 # %end
 
+# %option
+# % key: percentile
+# % type: double
+# % required: no
+# % multiple: yes
+# % options: 0-100
+# % description: Percentile to calculate (requires extended statistics flag)
+# %end
+
 # %option G_OPT_T_WHERE
 # % guisection: Selection
 # %end
@@ -71,6 +80,11 @@
 # % guisection: Formatting
 # %end
 
+# %rules
+# % requires: percentile,-e
+# % exclusive: zones,-r
+# %end
+
 import grass.script as gs
 
 ############################################################################
@@ -93,7 +107,16 @@ def main():
     no_header = flags["u"]
     rast_region = bool(flags["r"])
     separator = gs.separator(options["separator"])
-
+    percentile = None
+    if options["percentile"]:
+        try:
+            percentile = list(map(float, options["percentile"].split(",")))
+        except ValueError:
+            gs.fatal(
+                _("<{}> is not valid input to the percentile option").format(
+                    options["percentile"]
+                )
+            )
     # Make sure the temporal database exists
     tgis.init()
 
@@ -117,6 +140,7 @@ def main():
         fs=separator,
         rast_region=rast_region,
         zones=zones,
+        percentile=percentile,
         nprocs=nprocs,
     )
 

diff --git a/temporal/t.rast.univar/testsuite/test_t_rast_univar.py b/temporal/t.rast.univar/testsuite/test_t_rast_univar.py
@@ -173,6 +173,7 @@ def test_coarser_resolution(self):
 a_3@testing||2001-07-01 00:00:00|2001-10-01 00:00:00|300|300|300|300|0|0|0|28800|0|96|96
 a_4@testing||2001-10-01 00:00:00|2002-01-01 00:00:00|400|400|400|400|0|0|0|38400|0|96|96
 """
+
         for ref, res in zip(
             univar_text.split("\n"), t_rast_univar.outputs.stdout.split("\n")
         ):
@@ -204,8 +205,62 @@ def test_subset_with_output(self):
             if ref and res:
                 ref_line = ref.split("|", 1)[1]
                 res_line = res.split("|", 1)[1]
-                print(type(ref_line))
-                print(type(res_line))
+                self.assertLooksLike(ref_line, res_line)
+
+    def test_subset_with_extended_statistics_and_output(self):
+        self.runModule("g.region", res=10)
+        self.assertModule(
+            "t.rast.univar",
+            input="A",
+            flags="er",
+            output="univar_output.txt",
+            where="start_time >= '2001-03-01'",
+            percentile=[10.0, 97.5],
+            overwrite=True,
+            verbose=True,
+        )
+
+        univar_text = """id|semantic_label|start|end|mean|min|max|mean_of_abs|stddev|variance|coeff_var|sum|null_cells|cells|non_null_cells|first_quartile|median|third_quartile|percentile_10|percentile_97_5
+a_2@m2||2001-04-01 00:00:00|2001-07-01 00:00:00|200|200|200|200|0|0|0|1920000|0|9600|9600|200|200|200|200|200
+a_3@m2||2001-07-01 00:00:00|2001-10-01 00:00:00|300|300|300|300|0|0|0|2880000|0|9600|9600|300|300|300|300|300
+a_4@m2||2001-10-01 00:00:00|2002-01-01 00:00:00|400|400|400|400|0|0|0|3840000|0|9600|9600|400|400|400|400|400
+"""
+        univar_output = open("univar_output.txt", "r").read()
+
+        for ref, res in zip(univar_text.split("\n"), univar_output.split("\n")):
+            if ref and res:
+                ref_line = ref.split("|", 1)[1]
+                res_line = res.split("|", 1)[1]
+                self.assertLooksLike(ref_line, res_line)
+
+    def test_subset_with_extended_statistics_output_and_zones(self):
+        self.runModule("g.region", res=10)
+        self.assertModule(
+            "t.rast.univar",
+            input="A",
+            flags="e",
+            zones="zones",
+            output="univar_output.txt",
+            where="start_time >= '2001-03-01'",
+            percentile=[10.0, 97.5],
+            overwrite=True,
+            verbose=True,
+        )
+
+        univar_text = """id|semantic_label|start|end|zone|mean|min|max|mean_of_abs|stddev|variance|coeff_var|sum|null_cells|cells|non_null_cells|first_quartile|median|third_quartile|percentile_10|percentile_97_5
+a_2@m2||2001-04-01 00:00:00|2001-07-01 00:00:00|2|200|200|200|200|0|0|0|4800|0|24|24|200|200|200|200|200
+a_2@m2||2001-04-01 00:00:00|2001-07-01 00:00:00|3|200|200|200|200|0|0|0|14400|0|72|72|200|200|200|200|200
+a_3@m2||2001-07-01 00:00:00|2001-10-01 00:00:00|2|300|300|300|300|0|0|0|7200|0|24|24|300|300|300|300|300
+a_3@m2||2001-07-01 00:00:00|2001-10-01 00:00:00|3|300|300|300|300|0|0|0|21600|0|72|72|300|300|300|300|300
+a_4@m2||2001-10-01 00:00:00|2002-01-01 00:00:00|2|400|400|400|400|0|0|0|9600|0|24|24|400|400|400|400|400
+a_4@m2||2001-10-01 00:00:00|2002-01-01 00:00:00|3|400|400|400|400|0|0|0|28800|0|72|72|400|400|400|400|400
+"""
+        univar_output = open("univar_output.txt", "r").read()
+
+        for ref, res in zip(univar_text.split("\n"), univar_output.split("\n")):
+            if ref and res:
+                ref_line = ref.split("|", 1)[1]
+                res_line = res.split("|", 1)[1]
                 self.assertLooksLike(ref_line, res_line)
 
     def test_subset_with_output_coarse_resolution(self):
@@ -261,8 +316,6 @@ def test_with_zones(self):
         self.runModule("g.region", res=1)
         self.assertModule(t_rast_univar)
 
-        print(t_rast_univar.outputs.stdout.split("\n"))
-
         univar_text = """id|semantic_label|start|end|zone|mean|min|max|mean_of_abs|stddev|variance|coeff_var|sum|null_cells|cells|non_null_cells
 a_1@PERMANENT||2001-01-01 00:00:00|2001-04-01 00:00:00|1|100|100|100|100|0|0|0|60000|0|600|600
 a_1@PERMANENT||2001-01-01 00:00:00|2001-04-01 00:00:00|2|100|100|100|100|0|0|0|168000|0|1680|1680