From 93703e7d6a201807c48bd0951359457c75fcf028 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 01:36:24 +0200 Subject: [PATCH] GDALNoDataMaskBand::IRasterIO(): speed optimization, and reduce copy&paste --- gcore/gdalnodatamaskband.cpp | 188 +++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 74 deletions(-) diff --git a/gcore/gdalnodatamaskband.cpp b/gcore/gdalnodatamaskband.cpp index 635f936e7e4d..615336e6468e 100644 --- a/gcore/gdalnodatamaskband.cpp +++ b/gcore/gdalnodatamaskband.cpp @@ -111,13 +111,19 @@ static GDALDataType GetWorkDataType(GDALDataType eDataType) eWrkDT = GDT_Byte; break; + case GDT_Int16: + eWrkDT = GDT_Int16; + break; + case GDT_UInt16: + eWrkDT = GDT_UInt16; + break; + case GDT_UInt32: eWrkDT = GDT_UInt32; break; case GDT_Int8: - case GDT_Int16: case GDT_Int32: case GDT_CInt16: case GDT_CInt32: @@ -162,6 +168,16 @@ bool GDALNoDataMaskBand::IsNoDataInRange(double dfNoDataValue, return GDALIsValueInRange(dfNoDataValue); } + case GDT_Int16: + { + return GDALIsValueInRange(dfNoDataValue); + } + + case GDT_UInt16: + { + return GDALIsValueInRange(dfNoDataValue); + } + case GDT_UInt32: { return GDALIsValueInRange(dfNoDataValue); @@ -224,6 +240,70 @@ CPLErr GDALNoDataMaskBand::IReadBlock(int nXBlockOff, int nYBlockOff, nBlockXSize, &sExtraArg); } +/************************************************************************/ +/* SetZeroOr255() */ +/************************************************************************/ + +#if (defined(__GNUC__) && !defined(__clang__)) +__attribute__((optimize("tree-vectorize"))) +#endif +static void +SetZeroOr255(GByte *pabyDestAndSrc, size_t nBufSize, GByte byNoData) +{ + for (size_t i = 0; i < nBufSize; ++i) + { + pabyDestAndSrc[i] = (pabyDestAndSrc[i] == byNoData) ? 0 : 255; + } +} + +template +#if (defined(__GNUC__) && !defined(__clang__)) +__attribute__((optimize("tree-vectorize"))) +#endif +static void +SetZeroOr255(GByte *pabyDest, const T *panSrc, size_t nBufSize, T nNoData) +{ + for (size_t i = 0; i < nBufSize; ++i) + { + pabyDest[i] = (panSrc[i] == nNoData) ? 0 : 255; + } +} + +template +static void SetZeroOr255(GByte *pabyDest, const T *panSrc, int nBufXSize, + int nBufYSize, GSpacing nPixelSpace, + GSpacing nLineSpace, T nNoData) +{ + if (nPixelSpace == 1 && nLineSpace == nBufXSize) + { + const size_t nBufSize = static_cast(nBufXSize) * nBufYSize; + SetZeroOr255(pabyDest, panSrc, nBufSize, nNoData); + } + else if (nPixelSpace == 1) + { + for (int iY = 0; iY < nBufYSize; iY++) + { + SetZeroOr255(pabyDest, panSrc, nBufXSize, nNoData); + pabyDest += nLineSpace; + panSrc += nBufXSize; + } + } + else + { + size_t i = 0; + for (int iY = 0; iY < nBufYSize; iY++) + { + GByte *pabyLineDest = pabyDest + iY * nLineSpace; + for (int iX = 0; iX < nBufXSize; iX++) + { + *pabyLineDest = (panSrc[i] == nNoData) ? 0 : 255; + ++i; + pabyLineDest += nPixelSpace; + } + } + } +} + /************************************************************************/ /* IRasterIO() */ /************************************************************************/ @@ -259,22 +339,12 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, if (nPixelSpace == 1 && nLineSpace == nBufXSize) { const size_t nBufSize = static_cast(nBufXSize) * nBufYSize; - for (size_t i = 0; i < nBufSize; ++i) - { - pabyData[i] = pabyData[i] == byNoData ? 0 : 255; - } + SetZeroOr255(pabyData, nBufSize, byNoData); } else { - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLine = pabyData + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLine = *pabyLine == byNoData ? 0 : 255; - pabyLine += nPixelSpace; - } - } + SetZeroOr255(pabyData, pabyData, nBufXSize, nBufYSize, nPixelSpace, + nLineSpace, byNoData); } return CE_None; } @@ -311,41 +381,39 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, */ switch (eWrkDT) { - case GDT_UInt32: + case GDT_Int16: { - const GUInt32 nNoData = static_cast(m_dfNoDataValue); - const GUInt32 *panSrc = static_cast(pTemp); + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); + } + break; - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLineDest = panSrc[i] == nNoData ? 0 : 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + case GDT_UInt16: + { + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); } break; - case GDT_Int32: + case GDT_UInt32: { - const GInt32 nNoData = static_cast(m_dfNoDataValue); - const GInt32 *panSrc = static_cast(pTemp); + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); + } + break; - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLineDest = panSrc[i] == nNoData ? 0 : 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + case GDT_Int32: + { + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); } break; @@ -401,44 +469,16 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, case GDT_Int64: { const auto *panSrc = static_cast(pTemp); - - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - const auto nVal = panSrc[i]; - if (nVal == m_nNoDataValueInt64) - *pabyLineDest = 0; - else - *pabyLineDest = 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, m_nNoDataValueInt64); } break; case GDT_UInt64: { const auto *panSrc = static_cast(pTemp); - - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - const auto nVal = panSrc[i]; - if (nVal == m_nNoDataValueUInt64) - *pabyLineDest = 0; - else - *pabyLineDest = 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, m_nNoDataValueUInt64); } break;