Merge pull request #846 from EnMAP-Box/845-classification-layer-accur…

…acy-and-area-report-add-support-for-non-matching-category-names resolved #845
EnMAP-Box · Mar 18, 2024 · 50f06ff · 50f06ff
2 parents fe67f1b + 4d7ae55
commit 50f06ff
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 16 deletions.
diff --git a/enmapboxprocessing/algorithm/classificationperformancesimplealgorithm.py b/enmapboxprocessing/algorithm/classificationperformancesimplealgorithm.py
@@ -28,7 +28,8 @@ def shortDescription(self) -> str:
                'Stehman (2014): https://doi.org/10.1080/01431161.2014.930207. ' \
                'Note that (simple) random sampling is a special case of stratified random sampling, ' \
                'with exactly one stratum. \n' \
-               'Observed and predicted categories are matched by name.'
+               'Observed and predicted categories are matched by name, if possible. ' \
+               'Otherwise, categories are matched by order (in this case, a warning message is logged).'
 
     def helpParameters(self) -> List[Tuple[str, str]]:
         return [

diff --git a/enmapboxprocessing/algorithm/classificationperformancestratifiedalgorithm.py b/enmapboxprocessing/algorithm/classificationperformancestratifiedalgorithm.py
@@ -9,14 +9,14 @@
 
 import numpy as np
 
+from enmapbox.typeguard import typechecked
 from enmapboxprocessing.algorithm.rasterizecategorizedvectoralgorithm import RasterizeCategorizedVectorAlgorithm
 from enmapboxprocessing.algorithm.translatecategorizedrasteralgorithm import TranslateCategorizedRasterAlgorithm
 from enmapboxprocessing.enmapalgorithm import EnMAPProcessingAlgorithm, Group
 from enmapboxprocessing.rasterreader import RasterReader
 from enmapboxprocessing.reportwriter import HtmlReportWriter, CsvReportWriter, MultiReportWriter
 from enmapboxprocessing.utils import Utils
 from qgis.core import (QgsProcessingContext, QgsProcessingFeedback, QgsVectorLayer, QgsRasterLayer, QgsUnitTypes)
-from enmapbox.typeguard import typechecked
 
 
 @typechecked
@@ -34,7 +34,8 @@ def displayName(cls) -> str:
     def shortDescription(self) -> str:
         return 'Estimates map accuracy and area proportions for stratified random sampling as described in ' \
                'Stehman (2014): https://doi.org/10.1080/01431161.2014.930207. \n' \
-               'Observed and predicted categories are matched by name.'
+               'Observed and predicted categories are matched by name, if possible. ' \
+               'Otherwise, categories are matched by order (in this case, a warning message is logged).'
 
     def helpParameters(self) -> List[Tuple[str, str]]:
         return [
@@ -59,22 +60,27 @@ def checkCategories(self, parameters: Dict[str, Any], context: QgsProcessingCont
             categoriesReference = Utils.categoriesFromPalettedRasterRenderer(reference.renderer())
         else:
             assert 0
-        categoriesPrediction = Utils.categoriesFromPalettedRasterRenderer(classification.renderer())
+        categoriesPrediction = Utils().categoriesFromRenderer(classification.renderer(), classification)
+        if len(categoriesReference) == len(categoriesPrediction):
+            return True, ''  # if the number of categories is equal, we can at leased match by name (see #845)
         for cR in categoriesReference:
+            found = False
             for cP in categoriesPrediction:
                 if cR.name == cP.name:
-                    return True, ''  # good, we found the reference class
-            return False, f'Observed category "{cR.name}" not found in predicted categories.'
+                    found = True  # good, we found the reference class
+            if not found:
+                return False, f'Observed category "{cR.name}" not found in predicted categories.'
         for cP in categoriesPrediction:
+            found = False
             for cR in categoriesReference:
                 if cR.name == cP.name:
-                    return True, ''  # good, we found the map class
-            return False, f'Predicted category "{cP.name}" not found in observed categories.'
+                    found = True  # good, we found the map class
+            if not found:
+                return False, f'Predicted category "{cP.name}" not found in observed categories.'
         return False, 'Empty category list.'
 
     def checkParameterValues(self, parameters: Dict[str, Any], context: QgsProcessingContext) -> Tuple[bool, str]:
         checks = [
-            self.checkParameterRasterClassification(parameters, self.P_CLASSIFICATION, context),
             self.checkParameterMapClassification(parameters, self.P_REFERENCE, context),
             self.checkParameterRasterClassification(parameters, self.P_STRATIFICATION, context),
         ]
@@ -152,7 +158,7 @@ def processAlgorithm(
             arrayReference = RasterReader(reference).array()[0]
             categoriesReference = Utils.categoriesFromPalettedRasterRenderer(reference.renderer())
             arrayPrediction = RasterReader(classification).array()[0]
-            categoriesPrediction = Utils.categoriesFromPalettedRasterRenderer(classification.renderer())
+            categoriesPrediction = Utils().categoriesFromRenderer(classification.renderer(), classification)
             arrayStratification = RasterReader(stratification).array()[0]
             categoriesStratification = Utils.categoriesFromPalettedRasterRenderer(stratification.renderer())
             # - get valid reference location
@@ -162,11 +168,23 @@ def processAlgorithm(
             yReference = arrayReference[valid].astype(np.float32)
             yMap = arrayPrediction[valid].astype(np.float32)
             # - remap class ids by name
-            yMapRemapped = np.zeros_like(yMap)
-            for cP in categoriesPrediction:
+            yMapRemapped = yMap.copy()  # this initial state is correct for matching by order (see #845)
+            classNamesMatching = list()
+            for i, cP in enumerate(categoriesPrediction):
+                found = False
                 for cR in categoriesReference:
                     if cR.name == cP.name:
                         yMapRemapped[yMap == cP.value] = cR.value
+                        found = True
+                        classNamesMatching.append([cP.name, cR.name])
+                if not found:
+                    feedback.pushWarning(
+                        f'predicted class "{categoriesPrediction[i].name}" not found in reference classes. '
+                        f'class will be matched by order: '
+                        f'"{cP.name}" -> "{categoriesReference[i].name}".'
+                    )
+                    classNamesMatching.append([cP.name, categoriesReference[i].name])
+
             yMap = yMapRemapped
             # - prepare strata
             stratum = arrayStratification[valid]
@@ -185,7 +203,10 @@ def processAlgorithm(
             stats = stratifiedAccuracyAssessment(stratum, yReference, yMap, h, N_h, classValues, classNames)
             pixelUnits = QgsUnitTypes.toString(classification.crs().mapUnits())
             pixelArea = classification.rasterUnitsPerPixelX() * classification.rasterUnitsPerPixelY()
-            self.writeReport(filename, stats, pixelUnits=pixelUnits, pixelArea=pixelArea)
+
+            self.writeReport(
+                filename, stats, pixelUnits=pixelUnits, pixelArea=pixelArea, classNamesMatching=classNamesMatching
+            )
             # dump json
             with open(filename + '.json', 'w') as file:
                 file.write(json.dumps(stats.__dict__, indent=4))
@@ -199,7 +220,10 @@ def processAlgorithm(
         return result
 
     @classmethod
-    def writeReport(cls, filename: str, stats: 'StratifiedAccuracyAssessmentResult', pixelUnits='pixel', pixelArea=1.):
+    def writeReport(
+            cls, filename: str, stats: 'StratifiedAccuracyAssessmentResult', pixelUnits='pixel', pixelArea=1.,
+            classNamesMatching: list = None
+    ):
 
         def smartRound(obj, ndigits):
             if isinstance(obj, list):
@@ -230,6 +254,9 @@ def confidenceIntervall(mean, se):
             report.writeParagraph(f'Sample size: {stats.n} px')
             report.writeParagraph(f'Area size: {smartRound(stats.N, 2)} {pixelUnits}')
 
+            if classNamesMatching is not None:
+                report.writeTable(classNamesMatching, 'Class matching', ['predicted', 'observed'])
+
             values = smartRound(stats.confusion_matrix_counts, 2)
             report.writeTable(
                 values, 'Adjusted confusion matrix counts: predicted (rows) vs. observed (columns)',

diff --git a/enmapboxprocessing/utils.py b/enmapboxprocessing/utils.py
@@ -326,11 +326,14 @@ def categoriesFromCategorizedSymbolRenderer(cls, renderer: QgsCategorizedSymbolR
         return categories
 
     @classmethod
-    def categoriesFromRenderer(cls, renderer: Union[QgsFeatureRenderer, QgsRasterRenderer]) -> Optional[Categories]:
+    def categoriesFromRenderer(cls, renderer: Union[QgsFeatureRenderer, QgsRasterRenderer],
+                               layer: QgsMapLayer = None) -> Optional[Categories]:
         if isinstance(renderer, QgsPalettedRasterRenderer):
             return Utils.categoriesFromPalettedRasterRenderer(renderer)
         if isinstance(renderer, QgsCategorizedSymbolRenderer):
             return Utils.categoriesFromCategorizedSymbolRenderer(renderer)
+        if isinstance(renderer, QgsSingleBandGrayRenderer):
+            return Utils.categoriesFromRasterBand(layer, renderer.grayBand())
 
     @classmethod
     def categoriesFromRasterBand(cls, raster: QgsRasterLayer, bandNo: int) -> Categories:

diff --git a/...s/enmap-box/enmapboxprocessing/algorithm/test_ClassificationPerformanceSimpleAlgorithm.py b/...s/enmap-box/enmapboxprocessing/algorithm/test_ClassificationPerformanceSimpleAlgorithm.py
@@ -1,11 +1,13 @@
 from math import isnan
 
-from enmapboxtestdata import landcover_polygon
 from enmapboxprocessing.algorithm.classificationperformancesimplealgorithm import \
     ClassificationPerformanceSimpleAlgorithm
 from enmapboxprocessing.algorithm.testcase import TestCase
+from enmapboxprocessing.driver import Driver
+from enmapboxprocessing.rasterreader import RasterReader
 from enmapboxprocessing.utils import Utils
 from enmapboxtestdata import landcover_map_l3
+from enmapboxtestdata import landcover_polygon
 
 writeToDisk = True
 
@@ -36,3 +38,23 @@ def test_perfectMap(self):
         stats = Utils.jsonLoad(result[alg.P_OUTPUT_REPORT] + '.json')
         for v in stats['producers_accuracy_se'] + stats['users_accuracy_se']:
             self.assertFalse(isnan(v))  # previously we had NaN values, so better check this
+
+    def test_nonMatchingCategoryNames(self):
+        reader = RasterReader(landcover_map_l3)
+        writer = Driver(self.filename('copy')).createFromArray(reader.array(), reader.extent(), reader.crs())
+        writer.close()
+
+        alg = ClassificationPerformanceSimpleAlgorithm()
+        alg.initAlgorithm()
+        parameters = {
+            alg.P_CLASSIFICATION: self.filename('copy'),
+            alg.P_REFERENCE: landcover_map_l3,
+            alg.P_OPEN_REPORT: self.openReport,
+            alg.P_OUTPUT_REPORT: self.filename('report.html'),
+        }
+
+        result = self.runalg(alg, parameters)
+        return
+        stats = Utils.jsonLoad(result[alg.P_OUTPUT_REPORT] + '.json')
+        for v in stats['producers_accuracy_se'] + stats['users_accuracy_se']:
+            self.assertFalse(isnan(v))  # previously we had NaN values, so better check this