Skip to content

Commit

Permalink
Bug fixes to r.learn.ml2
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenpawley committed Feb 26, 2021
1 parent ab735f9 commit 07e69bf
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 416 deletions.
3 changes: 2 additions & 1 deletion grass7/raster/r.learn.ml2/r.learn.predict/r.learn.predict.py
Expand Up @@ -95,7 +95,8 @@ def main():
import joblib

if sklearn.__version__ < "0.20":
gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")
gs.fatal(
"Package python3-scikit-learn 0.20 or newer is not installed")

except ImportError:
gs.fatal("Package python3-scikit-learn 0.20 or newer is not installed")
Expand Down
2 changes: 1 addition & 1 deletion grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.html
Expand Up @@ -97,7 +97,7 @@ <h3>Hyperparameters</h3>
<p>The estimator settings tab provides access to the most pertinent parameters that affect the
previously described algorithms. The scikit-learn estimator defaults are generally supplied,
and these parameters can be tuned using a grid-search by inputting multiple comma-separated
parameters. The grid search is performed using a 2-fold cross validation. This tuning can also
parameters. The grid search is performed using a 3-fold cross validation. This tuning can also
be accomplished simultaneously with nested cross-validation by settings the <em>cv</em> option
to &gt 1.</p>

Expand Down
191 changes: 104 additions & 87 deletions grass7/raster/r.learn.ml2/r.learn.train/r.learn.train.py

Large diffs are not rendered by default.

89 changes: 52 additions & 37 deletions grass7/raster/r.learn.ml2/rlearnlib/raster.py
Expand Up @@ -68,14 +68,16 @@ def __init__(self, rasters=None, group=None):
if group:
groups_in_mapset = (
g.list(type="group", stdout_=PIPE)
.outputs.stdout.strip()
.split(os.linesep)
.outputs.stdout.strip()
.split(os.linesep)
)
groups_in_mapset = [i.split("@")[0] for i in groups_in_mapset]
group = group.split("@")[0]

if group not in groups_in_mapset:
gs.fatal("Imagery group {group} does not exist".format(group=group))
gs.fatal(
"Imagery group {group} does not exist".format(group=group)
)
else:
map_list = im.group(
group=group, flags=["l", "g"], quiet=True, stdout_=PIPE
Expand Down Expand Up @@ -310,12 +312,10 @@ def read(self, row=None, rows=None):
-----
Read an entire RasterStack into a numpy array
If the row parameter is used then a single row is read into a 3d numpy array
If the rows parameter is used, then a range of rows from (start_row, end_row) is read
into a 3d numpy array
If no additional arguments are supplied, then all of the maps within the RasterStack are
If the row parameter is used then a single row is read into a 3d numpy
array. If the rows parameter is used, then a range of rows from
(start_row, end_row) is read into a 3d numpy array. If no additional
arguments are supplied, then all of the maps within the RasterStack are
read into a 3d numpy array (obeying the GRASS region settings)
Parameters
Expand All @@ -329,7 +329,6 @@ def read(self, row=None, rows=None):
Returns
-------
data : ndarray
3d masked numpy array containing data from RasterStack rasters.
"""
Expand Down Expand Up @@ -508,7 +507,8 @@ def _predfun_multioutput(img, estimator):
result = result.transpose(2, 0, 1)

# repeat mask for n_bands
mask3d = np.repeat(a=mask2d[np.newaxis, :, :], repeats=result.shape[0], axis=0)
mask3d = np.repeat(a=mask2d[np.newaxis, :, :], repeats=result.shape[0],
axis=0)

# convert proba to masked array
result = np.ma.masked_array(result, mask=mask3d, fill_value=np.nan)
Expand Down Expand Up @@ -570,19 +570,23 @@ def predict(self, estimator, output, height=None, overwrite=False):

if len(indexes) > 1:
result_stack = self._predict_multi(
estimator, reg, indexes, indexes, height, func, output, overwrite
estimator, reg, indexes, indexes, height, func, output,
overwrite
)
else:
if height is not None:

with RasterRow(
output, mode="w", mtype=mtype, overwrite=overwrite
) as dst:
n_windows = len([i for i in self.row_windows(height=height)])
output, mode="w", mtype=mtype,
overwrite=overwrite) as dst:
n_windows = len(
[i for i in self.row_windows(height=height)]
)

data_gen = (
(wi, self.read(rows=rows))
for wi, rows in enumerate(self.row_windows(height=height))
for wi, rows in enumerate(
self.row_windows(height=height))
)

for wi, arr in data_gen:
Expand All @@ -601,14 +605,16 @@ def predict(self, estimator, output, height=None, overwrite=False):
result = func(arr, estimator)
result = np.ma.filled(result, nodata)
numpy2raster(
result[0, :, :], mtype=mtype, rastname=output, overwrite=overwrite
result[0, :, :], mtype=mtype, rastname=output,
overwrite=overwrite
)

result_stack = RasterStack(output)

return result_stack

def predict_proba(self, estimator, output, class_labels=None, height=None, overwrite=False):
def predict_proba(self, estimator, output, class_labels=None, height=None,
overwrite=False):
"""Prediction method for RasterStack class
Parameters
Expand Down Expand Up @@ -653,12 +659,14 @@ def predict_proba(self, estimator, output, class_labels=None, height=None, overw

# create and open rasters for writing
result_stack = self._predict_multi(
estimator, reg, indexes, class_labels, height, func, output, overwrite
estimator, reg, indexes, class_labels, height, func, output,
overwrite
)

return result_stack

def _predict_multi(self, estimator, region, indexes, class_labels, height, func, output, overwrite):
def _predict_multi(self, estimator, region, indexes, class_labels, height,
func, output, overwrite):
# create and open rasters for writing if incremental reading
if height is not None:
dst = []
Expand Down Expand Up @@ -728,7 +736,8 @@ def row_windows(self, region=None, height=25):
region = Region()

windows = (
(row, row + height) if row + height <= region.rows else (row, region.rows)
(row, row + height)
if row + height <= region.rows else (row, region.rows)
for row in range(0, region.rows, height)
)

Expand Down Expand Up @@ -773,7 +782,8 @@ def extract_pixels(self, rast_name, use_cats=False, as_df=False):
).outputs.stdout

if data == "":
gs.fatal("The training pixel locations do not spatially intersect any raster datasets")
gs.fatal("The training pixel locations do not spatially "
"intersect any raster datasets")

data = data.strip().split(os.linesep)
data = [i.split("|") for i in data]
Expand Down Expand Up @@ -837,21 +847,21 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
Extracted raster values as Pandas DataFrame if as_df = True.
"""
# some checks
if VectorTopo(vect_name).exist() is False:
try:
vname, mapset = vect_name.split("@")
except ValueError:
vname = vect_name
mapset = (
g.mapset(flags="p", stdout_=PIPE).
outputs.stdout.split(os.linesep)[0]
)

if VectorTopo(name=vname, mapset=mapset).exist() is False:
gs.fatal("The supplied vector map does not exist")

if isinstance(fields, str):
fields = [fields]

vname = vect_name.split("@")[0]

try:
mapset = vect_name.split("@")[1]
except IndexError:
mapset = g.mapset(flags="p", stdout_=PIPE).outputs.stdout.split(os.linesep)[
0
]

# open grass vector
with VectorTopo(name=vname, mapset=mapset, mode="r") as points:

Expand All @@ -860,7 +870,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):

# read attribute table (ignores region)
df = pd.read_sql_query(
sql="select * from {name}".format(name=points.table.name), con=points.table.conn
sql="select * from {name}".format(name=points.table.name),
con=points.table.conn
)

for i in fields:
Expand Down Expand Up @@ -890,9 +901,11 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
dtype = np.float32

if len(list(itertools.chain(*rast_data))) == 0:
gs.fatal("There are no training point geometries in the supplied vector dataset")
gs.fatal("There are no training point geometries in "
"the supplied vector dataset")

X = [k.split("|")[1] if k.split("|")[1] != "*" else nodata for k in rast_data]
X = [k.split("|")[1] if k.split("|")[1] != "*" else nodata
for k in rast_data]
X = np.asarray(X)
cat = np.asarray([int(k.split("|")[0]) for k in rast_data])

Expand All @@ -901,7 +914,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):
else:
X = [float(i) for i in X]

X = pd.DataFrame(data=np.column_stack((X, cat)), columns=[name, key_col])
X = pd.DataFrame(data=np.column_stack((X, cat)),
columns=[name, key_col])
X[name] = X[name].astype(dtype)
Xs.append(X)

Expand All @@ -916,7 +930,8 @@ def extract_points(self, vect_name, fields, na_rm=True, as_df=False):

# remove samples containing NaNs
if na_rm is True:
gs.message("Removing samples with NaN values in the raster feature variables...")
gs.message("Removing samples with NaN values in the raster "
"feature variables...")
df = df.dropna()

if as_df is False:
Expand Down
22 changes: 12 additions & 10 deletions grass7/raster/r.learn.ml2/rlearnlib/stats.py
@@ -1,8 +1,8 @@
#!/usr/bin/env python
# -- coding: utf-8 --

"""The statistics module contains simple wrappers around GRASS modules for statistical functions
on raster maps"""
"""The statistics module contains simple wrappers around GRASS modules for
statistical functions on raster maps"""

import os
import numpy as np
Expand All @@ -14,7 +14,8 @@
class StatisticsMixin(object):
def covar(self, correlation=False):
"""
Outputs a covariance or correlation matrix for the layers within the RasterStack object
Outputs a covariance or correlation matrix for the layers within the
RasterStack object
Parameters
----------
Expand All @@ -24,8 +25,8 @@ def covar(self, correlation=False):
Returns
-------
numpy.ndarray
Covariance/correlation matrix of the layers within the RasterStack with diagonal and
upper triangle positions set to nan.
Covariance/correlation matrix of the layers within the RasterStack
with diagonal and upper triangle positions set to nan.
"""

if correlation is True:
Expand All @@ -51,8 +52,8 @@ def linear_regression(self, x, y):
Parameters
----------
x : str
Name of GRASS GIS raster map to use as the x-variable. Has to be within the RasterStack
object.
Name of GRASS GIS raster map to use as the x-variable. Has to be
within the RasterStack object.
y : str
Name of GRASS GIS raster map to use as the y-variable.
Expand Down Expand Up @@ -80,14 +81,15 @@ def multiple_regression(
Parameters
----------
x : str
Name of GRASS GIS raster map to use as the x-variable. Has to be within the RasterStack
object.
Name of GRASS GIS raster map to use as the x-variable. Has to be
within the RasterStack object.
y : str
Name of GRASS GIS raster map to use as the y-variable.
estimates : str (opt)
Optionally specify a name to create a raster map of the regression estimate.
Optionally specify a name to create a raster map of the regression
estimate.
residuals : str (opt)
Optionally specify a name to create a raste rmap of the residuals.
Expand Down
7 changes: 3 additions & 4 deletions grass7/raster/r.learn.ml2/rlearnlib/transformers.py
@@ -1,5 +1,4 @@
import importlib

import numpy as np


Expand All @@ -16,15 +15,15 @@ def __init__(self):
self._encoding = None
self._inverse = None

def fit(self, X, y = None):
def fit(self, X, y=None):
self._encoding = {value: label for (label, value, mtype) in X}
self._inverse = {label: value for (label, value, mtype) in X}
return self

def transform(self, X, y = None):
def transform(self, X, y=None):
"""Takes integer values and returns the category label"""
return np.asarray([self._encoding[x] for x in X]).astype(np.object)

def inverse_transform(self, X, y = None):
def inverse_transform(self, X, y=None):
"""Takes a category label and returns the category index"""
return np.asarray([self._inverse[x] for x in X]).astype(np.object)

0 comments on commit 07e69bf

Please sign in to comment.