Bug fixes to r.learn.ml2

OSGeo · Feb 26, 2021 · 07e69bf · 07e69bf
1 parent ab735f9
commit 07e69bf
Show file tree

Hide file tree

Showing 13 changed files with 256 additions and 416 deletions.
diff --git a/grass7/raster/r.learn.ml2/r.learn.predict/r.learn.predict.py b/grass7/raster/r.learn.ml2/r.learn.predict/r.learn.predict.py
@@ -95,7 +95,8 @@ def main():
         import joblib
 
         if sklearn.__version__ < "0.20":
-            gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")
+            gs.fatal(
+                "Package python3-scikit-learn 0.20 or newer is not installed")
 
     except ImportError:
         gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")

diff --git a/grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.html b/grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.html
@@ -97,7 +97,7 @@ <h3>Hyperparameters</h3>
 <p>The estimator settings tab provides access to the most pertinent parameters that affect the
 	previously described algorithms. The scikit-learn estimator defaults are generally supplied,
 	and these parameters can be tuned using a grid-search by inputting multiple comma-separated
-	parameters. The grid search is performed using a 2-fold cross validation. This tuning can also
+	parameters. The grid search is performed using a 3-fold cross validation. This tuning can also
 	be accomplished simultaneously with nested cross-validation by settings the <em>cv</em> option
 	to &gt 1.</p>
 

diff --git a/grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.py b/grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.py
diff --git a/grass7/raster/r.learn.ml2/rlearnlib/raster.py b/grass7/raster/r.learn.ml2/rlearnlib/raster.py
@@ -68,14 +68,16 @@ def __init__(self, rasters=None, group=None):
         if group:
             groups_in_mapset = (
                 g.list(type="group", stdout_=PIPE)
-                .outputs.stdout.strip()
-                .split(os.linesep)
+                    .outputs.stdout.strip()
+                    .split(os.linesep)
             )
             groups_in_mapset = [i.split("@")[0] for i in groups_in_mapset]
             group = group.split("@")[0]
 
             if group not in groups_in_mapset:
-                gs.fatal("Imagery group {group} does not exist".format(group=group))
+                gs.fatal(
+                    "Imagery group {group} does not exist".format(group=group)
+                )
             else:
                 map_list = im.group(
                     group=group, flags=["l", "g"], quiet=True, stdout_=PIPE
@@ -310,12 +312,10 @@ def read(self, row=None, rows=None):
         -----
         Read an entire RasterStack into a numpy array
 
-        If the row parameter is used then a single row is read into a 3d numpy array
-
-        If the rows parameter is used, then a range of rows from (start_row, end_row) is read
-        into a 3d numpy array
-
-        If no additional arguments are supplied, then all of the maps within the RasterStack are
+        If the row parameter is used then a single row is read into a 3d numpy
+        array. If the rows parameter is used, then a range of rows from
+        (start_row, end_row) is read into a 3d numpy array. If no additional
+        arguments are supplied, then all of the maps within the RasterStack are
         read into a 3d numpy array (obeying the GRASS region settings)
 
         Parameters
@@ -329,7 +329,6 @@ def read(self, row=None, rows=None):
 
         Returns
         -------
-
         data : ndarray
             3d masked numpy array containing data from RasterStack rasters.
         """
@@ -508,7 +507,8 @@ def _predfun_multioutput(img, estimator):
         result = result.transpose(2, 0, 1)
 
         # repeat mask for n_bands
-        mask3d = np.repeat(a=mask2d[np.newaxis, :, :], repeats=result.shape[0], axis=0)
+        mask3d = np.repeat(a=mask2d[np.newaxis, :, :], repeats=result.shape[0],
+                           axis=0)
 
         # convert proba to masked array
         result = np.ma.masked_array(result, mask=mask3d, fill_value=np.nan)
@@ -570,19 +570,23 @@ def predict(self, estimator, output, height=None, overwrite=False):
 
         if len(indexes) > 1:
             result_stack = self._predict_multi(
-                estimator, reg, indexes, indexes, height, func, output, overwrite
+                estimator, reg, indexes, indexes, height, func, output,
+                overwrite
             )
         else:
             if height is not None:
 
                 with RasterRow(
-                    output, mode="w", mtype=mtype, overwrite=overwrite
-                ) as dst:
-                    n_windows = len([i for i in self.row_windows(height=height)])
+                        output, mode="w", mtype=mtype,
+                        overwrite=overwrite) as dst:
+                    n_windows = len(
+                        [i for i in self.row_windows(height=height)]
+                    )
 
                     data_gen = (
                         (wi, self.read(rows=rows))
-                        for wi, rows in enumerate(self.row_windows(height=height))
+                        for wi, rows in enumerate(
+                        self.row_windows(height=height))
                     )
 
                     for wi, arr in data_gen:
@@ -601,14 +605,16 @@ def predict(self, estimator, output, height=None, overwrite=False):
                 result = func(arr, estimator)
                 result = np.ma.filled(result, nodata)
                 numpy2raster(
-                    result[0, :, :], mtype=mtype, rastname=output, overwrite=overwrite
+                    result[0, :, :], mtype=mtype, rastname=output,
+                    overwrite=overwrite
                 )
 
             result_stack = RasterStack(output)
 
         return result_stack
 
-    def predict_proba(self, estimator, output, class_labels=None, height=None, overwrite=False):
+    def predict_proba(self, estimator, output, class_labels=None, height=None,
+                      overwrite=False):
         """Prediction method for RasterStack class
 
         Parameters
@@ -653,12 +659,14 @@ def predict_proba(self, estimator, output, class_labels=None, height=None, overw
 
         # create and open rasters for writing
         result_stack = self._predict_multi(
-            estimator, reg, indexes, class_labels, height, func, output, overwrite
+            estimator, reg, indexes, class_labels, height, func, output,
+            overwrite
         )
 
         return result_stack
 
-    def _predict_multi(self, estimator, region, indexes, class_labels, height, func, output, overwrite):
+    def _predict_multi(self, estimator, region, indexes, class_labels, height,
+                       func, output, overwrite):
         # create and open rasters for writing if incremental reading
         if height is not None:
             dst = []
@@ -728,7 +736,8 @@ def row_windows(self, region=None, height=25):
             region = Region()
 
         windows = (
-            (row, row + height) if row + height <= region.rows else (row, region.rows)
+            (row, row + height)
+            if row + height <= region.rows else (row, region.rows)
             for row in range(0, region.rows, height)
         )
 
@@ -773,7 +782,8 @@ def extract_pixels(self, rast_name, use_cats=False, as_df=False):
         ).outputs.stdout
 
         if data == "":
-            gs.fatal("The training pixel locations do not spatially intersect any raster datasets")
+            gs.fatal("The training pixel locations do not spatially "
+                     "intersect any raster datasets")
 
         data = data.strip().split(os.linesep)
         data = [i.split("|") for i in data]
@@ -837,21 +847,21 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
             Extracted raster values as Pandas DataFrame if as_df = True.
         """
         # some checks
-        if VectorTopo(vect_name).exist() is False:
+        try:
+            vname, mapset = vect_name.split("@")
+        except ValueError:
+            vname = vect_name
+            mapset = (
+                g.mapset(flags="p", stdout_=PIPE).
+                    outputs.stdout.split(os.linesep)[0]
+            )
+
+        if VectorTopo(name=vname, mapset=mapset).exist() is False:
             gs.fatal("The supplied vector map does not exist")
 
         if isinstance(fields, str):
             fields = [fields]
 
-        vname = vect_name.split("@")[0]
-
-        try:
-            mapset = vect_name.split("@")[1]
-        except IndexError:
-            mapset = g.mapset(flags="p", stdout_=PIPE).outputs.stdout.split(os.linesep)[
-                0
-            ]
-
         # open grass vector
         with VectorTopo(name=vname, mapset=mapset, mode="r") as points:
 
@@ -860,7 +870,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
 
             # read attribute table (ignores region)
             df = pd.read_sql_query(
-                sql="select * from {name}".format(name=points.table.name), con=points.table.conn
+                sql="select * from {name}".format(name=points.table.name),
+                con=points.table.conn
             )
 
             for i in fields:
@@ -890,9 +901,11 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
                         dtype = np.float32
 
                     if len(list(itertools.chain(*rast_data))) == 0:
-                        gs.fatal("There are no training point geometries in the supplied vector dataset")
+                        gs.fatal("There are no training point geometries in "
+                                 "the supplied vector dataset")
 
-                    X = [k.split("|")[1] if k.split("|")[1] != "*" else nodata for k in rast_data]
+                    X = [k.split("|")[1] if k.split("|")[1] != "*" else nodata
+                         for k in rast_data]
                     X = np.asarray(X)
                     cat = np.asarray([int(k.split("|")[0]) for k in rast_data])
 
@@ -901,7 +914,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
                     else:
                         X = [float(i) for i in X]
 
-                X = pd.DataFrame(data=np.column_stack((X, cat)), columns=[name, key_col])
+                X = pd.DataFrame(data=np.column_stack((X, cat)),
+                                 columns=[name, key_col])
                 X[name] = X[name].astype(dtype)
                 Xs.append(X)
 
@@ -916,7 +930,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
 
         # remove samples containing NaNs
         if na_rm is True:
-            gs.message("Removing samples with NaN values in the raster feature variables...")
+            gs.message("Removing samples with NaN values in the raster "
+                       "feature variables...")
             df = df.dropna()
 
         if as_df is False:

diff --git a/grass7/raster/r.learn.ml2/rlearnlib/stats.py b/grass7/raster/r.learn.ml2/rlearnlib/stats.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -- coding: utf-8 --
 
-"""The statistics module contains simple wrappers around GRASS modules for statistical functions
-on raster maps"""
+"""The statistics module contains simple wrappers around GRASS modules for
+statistical functions on raster maps"""
 
 import os
 import numpy as np
@@ -14,7 +14,8 @@
 class StatisticsMixin(object):
     def covar(self, correlation=False):
         """
-        Outputs a covariance or correlation matrix for the layers within the RasterStack object
+        Outputs a covariance or correlation matrix for the layers within the
+        RasterStack object
 
         Parameters
         ----------
@@ -24,8 +25,8 @@ def covar(self, correlation=False):
         Returns
         -------
         numpy.ndarray
-            Covariance/correlation matrix of the layers within the RasterStack with diagonal and
-            upper triangle positions set to nan.
+            Covariance/correlation matrix of the layers within the RasterStack
+            with diagonal and upper triangle positions set to nan.
         """
 
         if correlation is True:
@@ -51,8 +52,8 @@ def linear_regression(self, x, y):
         Parameters
         ----------
         x : str
-            Name of GRASS GIS raster map to use as the x-variable. Has to be within the RasterStack
-            object.
+            Name of GRASS GIS raster map to use as the x-variable. Has to be
+            within the RasterStack object.
 
         y : str
             Name of GRASS GIS raster map to use as the y-variable.
@@ -80,14 +81,15 @@ def multiple_regression(
         Parameters
         ----------
         x : str
-            Name of GRASS GIS raster map to use as the x-variable. Has to be within the RasterStack
-            object.
+            Name of GRASS GIS raster map to use as the x-variable. Has to be
+            within the RasterStack object.
 
         y : str
             Name of GRASS GIS raster map to use as the y-variable.
 
         estimates : str (opt)
-            Optionally specify a name to create a raster map of the regression estimate.
+            Optionally specify a name to create a raster map of the regression
+            estimate.
 
         residuals : str (opt)
             Optionally specify a name to create a raste rmap of the residuals.

diff --git a/grass7/raster/r.learn.ml2/rlearnlib/transformers.py b/grass7/raster/r.learn.ml2/rlearnlib/transformers.py
@@ -1,5 +1,4 @@
 import importlib
-
 import numpy as np
 
 
@@ -16,15 +15,15 @@ def __init__(self):
         self._encoding = None
         self._inverse = None
 
-    def fit(self, X, y = None):
+    def fit(self, X, y=None):
         self._encoding = {value: label for (label, value, mtype) in X}
         self._inverse = {label: value for (label, value, mtype) in X}
         return self
 
-    def transform(self, X, y = None):
+    def transform(self, X, y=None):
         """Takes integer values and returns the category label"""
         return np.asarray([self._encoding[x] for x in X]).astype(np.object)
 
-    def inverse_transform(self, X, y = None):
+    def inverse_transform(self, X, y=None):
         """Takes a category label and returns the category index"""
         return np.asarray([self._inverse[x] for x in X]).astype(np.object)