Skip to content

Commit

Permalink
Merge pull request #10283 from rouault/sqlite_stddev
Browse files Browse the repository at this point in the history
SQLite/GPKG: add STDDEV_POP() and STDDEV_SAMP() aggregate functions
  • Loading branch information
rouault authored Jul 6, 2024
2 parents 8c0fcc3 + b012d9a commit 75ad908
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 0 deletions.
12 changes: 12 additions & 0 deletions autotest/ogr/ogr_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4095,3 +4095,15 @@ def test_ogr_sql_ST_Area_on_ellipsoid(tmp_vsimem, require_spatialite):
with ds.ExecuteSQL("SELECT ST_Area(null, 1) FROM my_layer") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f[0] is None


def test_ogr_sqlite_stddev():
"""Test STDDEV_POP() and STDDEV_SAMP"""

ds = ogr.Open(":memory:", update=1)
ds.ExecuteSQL("CREATE TABLE test(v REAL)")
ds.ExecuteSQL("INSERT INTO test VALUES (4),(NULL),('invalid'),(5)")
with ds.ExecuteSQL("SELECT STDDEV_POP(v), STDDEV_SAMP(v) FROM test") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0) == pytest.approx(0.5, rel=1e-15)
assert f.GetField(1) == pytest.approx(0.5**0.5, rel=1e-15)
9 changes: 9 additions & 0 deletions doc/source/user/sql_sqlite_dialect.rst
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ For example we can select the annotation features as:
SELECT * FROM nation WHERE OGR_STYLE LIKE 'LABEL%'
Statistics functions
++++++++++++++++++++

In addition to standard COUNT(), SUM(), AVG(), MIN(), MAX(), the following
aggregate functions are available:

- STDDEV_POP: (GDAL >= 3.10) numerical population standard deviation.
- STDDEV_SAMP: (GDAL >= 3.10) numerical `sample standard deviation <https://en.wikipedia.org/wiki/Standard_deviation#Sample_standard_deviation>`__

Spatialite SQL functions
++++++++++++++++++++++++

Expand Down
71 changes: 71 additions & 0 deletions ogr/ogrsf_frmts/sqlite/ogrsqlitesqlfunctionscommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,69 @@ static void OGRSQLITE_LIKE(sqlite3_context *pContext, int argc,
insensitive, bUTF8Strings));
}

/************************************************************************/
/* OGRSQLITE_STDDEV_Step() */
/************************************************************************/

// Welford's online algorithm for variance:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
struct OGRSQLITE_STDDEV_Context
{
int64_t nValues;
double dfMean;
double dfM2; // Accumulator for squared distance from the mean
};

static void OGRSQLITE_STDDEV_Step(sqlite3_context *pContext, int /* argc*/,
sqlite3_value **argv)
{
auto pAggCtxt =
static_cast<OGRSQLITE_STDDEV_Context *>(sqlite3_aggregate_context(
pContext, static_cast<int>(sizeof(OGRSQLITE_STDDEV_Context))));
const auto eType = sqlite3_value_type(argv[0]);
if (eType != SQLITE_INTEGER && eType != SQLITE_FLOAT)
return;

const double dfValue = sqlite3_value_double(argv[0]);
pAggCtxt->nValues++;
const double dfDelta = dfValue - pAggCtxt->dfMean;
pAggCtxt->dfMean += dfDelta / pAggCtxt->nValues;
const double dfDelta2 = dfValue - pAggCtxt->dfMean;
pAggCtxt->dfM2 += dfDelta * dfDelta2;
}

/************************************************************************/
/* OGRSQLITE_STDDEV_POP_Finalize() */
/************************************************************************/

static void OGRSQLITE_STDDEV_POP_Finalize(sqlite3_context *pContext)
{
auto pAggCtxt =
static_cast<OGRSQLITE_STDDEV_Context *>(sqlite3_aggregate_context(
pContext, static_cast<int>(sizeof(OGRSQLITE_STDDEV_Context))));
if (pAggCtxt->nValues > 0)
{
sqlite3_result_double(pContext,
sqrt(pAggCtxt->dfM2 / pAggCtxt->nValues));
}
}

/************************************************************************/
/* OGRSQLITE_STDDEV_SAMP_Finalize() */
/************************************************************************/

static void OGRSQLITE_STDDEV_SAMP_Finalize(sqlite3_context *pContext)
{
auto pAggCtxt =
static_cast<OGRSQLITE_STDDEV_Context *>(sqlite3_aggregate_context(
pContext, static_cast<int>(sizeof(OGRSQLITE_STDDEV_Context))));
if (pAggCtxt->nValues > 1)
{
sqlite3_result_double(pContext,
sqrt(pAggCtxt->dfM2 / (pAggCtxt->nValues - 1)));
}
}

/************************************************************************/
/* OGRSQLiteRegisterSQLFunctionsCommon() */
/************************************************************************/
Expand Down Expand Up @@ -365,6 +428,14 @@ static OGRSQLiteExtensionData *OGRSQLiteRegisterSQLFunctionsCommon(sqlite3 *hDB)
OGRSQLITE_LIKE, nullptr, nullptr);
}

sqlite3_create_function(hDB, "STDDEV_POP", 1, UTF8_INNOCUOUS, nullptr,
nullptr, OGRSQLITE_STDDEV_Step,
OGRSQLITE_STDDEV_POP_Finalize);

sqlite3_create_function(hDB, "STDDEV_SAMP", 1, UTF8_INNOCUOUS, nullptr,
nullptr, OGRSQLITE_STDDEV_Step,
OGRSQLITE_STDDEV_SAMP_Finalize);

pData->SetRegExpCache(OGRSQLiteRegisterRegExpFunction(hDB));

return pData;
Expand Down

0 comments on commit 75ad908

Please sign in to comment.