Skip to content

Commit

Permalink
Improve filters.stats (#2034)
Browse files Browse the repository at this point in the history
* Fix comment.

* Enhance stats filter test to check higher-order statistics.

* Only compute average once.
Simplify a little math.
Add comments.

* Only run skewness/kurtosis in advanced mode.
  • Loading branch information
abellgithub authored and hobu committed May 24, 2018
1 parent 408a5f5 commit 7570b0a
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 40 deletions.
4 changes: 2 additions & 2 deletions filters/ColorinterpFilter.cpp
Expand Up @@ -181,7 +181,7 @@ void ColorinterpFilter::filter(PointView& view)
std::vector<double> values(view.size());

stats::Summary summary(Dimension::name(m_interpDim),
stats::Summary::NoEnum);
stats::Summary::NoEnum, false);
for (PointId idx = 0; idx < view.size(); ++idx)
{
double v = view.getFieldAs<double>(m_interpDim, idx);
Expand Down Expand Up @@ -239,7 +239,7 @@ void ColorinterpFilter::filter(PointView& view)
else if (std::isnan(m_min) || std::isnan(m_max))
{
stats::Summary summary(Dimension::name(m_interpDim),
stats::Summary::NoEnum);
stats::Summary::NoEnum, false);
for (PointId idx = 0; idx < view.size(); ++idx)
{
double v = view.getFieldAs<double>(m_interpDim, idx);
Expand Down
23 changes: 14 additions & 9 deletions filters/StatsFilter.cpp
Expand Up @@ -74,18 +74,22 @@ void Summary::extractMetadata(MetadataNode &m)
if (!std::isinf(std) && !std::isnan(std))
m.add("stddev", std, "standard deviation");

double k = kurtosis();
if (!std::isinf(k) && !std::isnan(k))
m.add("kurtosis", k, "kurtosis");

double sk = skewness();
if (!std::isinf(sk) && !std::isnan(sk))
m.add("skewness", skewness(), "skewness");

double v = variance();
if (!std::isinf(v) && !std::isnan(v))
m.add("variance", v, "variance");
m.add("name", m_name, "name");

if (m_advanced)
{
double k = kurtosis();
if (!std::isinf(k) && !std::isnan(k))
m.add("kurtosis", k, "kurtosis");

double sk = skewness();
if (!std::isinf(sk) && !std::isnan(sk))
m.add("skewness", skewness(), "skewness");
}

if (m_enumerate == Enumerate)
{
for (auto& v : m_values)
Expand Down Expand Up @@ -169,6 +173,7 @@ void StatsFilter::addArgs(ProgramArgs& args)
args.add("global", "Dimensions to compute global stats (median, mad, mode)",
m_global);
args.add("count", "Dimensions whose values should be counted", m_counts);
args.add("advanced", "Calculate skewness and kurtosis", m_advanced);
}


Expand Down Expand Up @@ -232,7 +237,7 @@ void StatsFilter::prepared(PointTableRef table)
// Create the summary objects.
for (auto& dv : dims)
m_stats.insert(std::make_pair(layout->findDim(dv.first),
Summary(dv.first, dv.second)));
Summary(dv.first, dv.second, m_advanced)));
}


Expand Down
55 changes: 33 additions & 22 deletions filters/StatsFilter.hpp
Expand Up @@ -57,24 +57,26 @@ typedef std::map<double, point_count_t> EnumMap;
typedef std::vector<double> DataVector;

public:
Summary(std::string name, EnumType enumerate) :
m_name(name), m_enumerate(enumerate)
Summary(std::string name, EnumType enumerate, bool advanced = true) :
m_name(name), m_enumerate(enumerate), m_advanced(advanced)
{ reset(); }

double minimum() const
{ return m_min; }
double maximum() const
{ return m_max; }
double average() const
{ return m_avg; }
{ return M1; }
double variance() const
{ return M2/(m_cnt - 1.0); }
double stddev() const
{ return std::sqrt(variance()); }
double skewness() const
{ return std::sqrt(double(m_cnt)) * M3 / std::pow(M2, 1.5); }
{ return (M2 && m_advanced) ?
std::sqrt(double(m_cnt)) * M3 / std::pow(M2, 1.5) : 0.0; }
double kurtosis() const
{ return double(m_cnt)*M4 / (M2*M2) - 3.0; }
{ return (M2 && m_advanced) ?
double(m_cnt)*M4 / (M2*M2) - 3.0 : 0.0; }
double median() const
{ return m_median; }
double mad() const
Expand All @@ -94,23 +96,17 @@ typedef std::vector<double> DataVector;
m_max = (std::numeric_limits<double>::lowest)();
m_min = (std::numeric_limits<double>::max)();
m_cnt = 0;
m_avg = 0.0;
m_median = 0.0;
m_mad = 0.0;
M1 = M2 = M3 = M4 = 0.0;
}

void insert(double value)
{
double delta, delta_n, delta_n2, term1;

point_count_t n1(m_cnt);

m_cnt++;
point_count_t n(m_cnt);
m_min = (std::min)(m_min, value);
m_max = (std::max)(m_max, value);
m_avg += (value - m_avg) / m_cnt;

if (m_enumerate != NoEnum)
m_values[value]++;
if (m_enumerate == Global)
Expand All @@ -122,24 +118,38 @@ typedef std::vector<double> DataVector;

// stolen from http://www.johndcook.com/blog/skewness_kurtosis/

n1 = n;
delta = value - M1;
delta_n = delta / n;
delta_n2 = delta_n * delta_n;
term1 = delta * delta_n * n1;
point_count_t n(m_cnt);

// Difference from the mean
double delta = value - M1;
// Portion that this point's difference from the mean that it
// contributes to the mean.
double delta_n = delta / n;

// First moment - average.
M1 += delta_n;
M4 += term1 * delta_n2 * (n*n - 3*n + 3) +
(6 * delta_n2 * M2) - (4 * delta_n * M3);
M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2;
M2 += term1;

double delta_2 = pow(delta, 2.0);

if (m_advanced)
{
double delta_n2 = pow(delta_n, 2.0);
// Fourth moment - kurtosis (sum part)
M4 += delta_2 * delta_n2 * (n*n - 3*n + 3) +
(6 * delta_n2 * M2) - (4 * delta_n * M3);
// Third moment - skewness (sum part)
M3 += delta_2 * delta_n * (n - 2) - 3 * delta_n * M2;
}
// Second moment - variance (sum part)
M2 += delta_2;
}

private:
std::string m_name;
EnumType m_enumerate;
bool m_advanced;
double m_max;
double m_min;
double m_avg;
double m_mad;
double m_median;
EnumMap m_values;
Expand Down Expand Up @@ -177,6 +187,7 @@ class PDAL_DLL StatsFilter : public Filter, public Streamable
StringList m_enums;
StringList m_counts;
StringList m_global;
bool m_advanced;
std::map<Dimension::Id, stats::Summary> m_stats;
};

Expand Down
69 changes: 62 additions & 7 deletions test/unit/filters/StatsFilterTest.cpp
Expand Up @@ -49,7 +49,7 @@ TEST(Stats, simple)
Options ops;
ops.add("bounds", bounds);
ops.add("count", 1000);
ops.add("mode", "constant");
ops.add("mode", "ramp");

StageFactory f;

Expand Down Expand Up @@ -77,13 +77,68 @@ TEST(Stats, simple)
EXPECT_FLOAT_EQ(statsY.minimum(), 2.0);
EXPECT_FLOAT_EQ(statsZ.minimum(), 3.0);

EXPECT_FLOAT_EQ(statsX.maximum(), 1.0);
EXPECT_FLOAT_EQ(statsY.maximum(), 2.0);
EXPECT_FLOAT_EQ(statsZ.maximum(), 3.0);
EXPECT_FLOAT_EQ(statsX.maximum(), 101.0);
EXPECT_FLOAT_EQ(statsY.maximum(), 102.0);
EXPECT_FLOAT_EQ(statsZ.maximum(), 103.0);

EXPECT_FLOAT_EQ(statsX.average(), 1.0);
EXPECT_FLOAT_EQ(statsY.average(), 2.0);
EXPECT_FLOAT_EQ(statsZ.average(), 3.0);
EXPECT_FLOAT_EQ(statsX.average(), 51.0);
EXPECT_FLOAT_EQ(statsY.average(), 52.0);
EXPECT_FLOAT_EQ(statsZ.average(), 53.0);

EXPECT_FLOAT_EQ(statsX.variance(), 837.09351);
EXPECT_FLOAT_EQ(statsY.variance(), 837.0965);
EXPECT_FLOAT_EQ(statsZ.variance(), 837.1015);

EXPECT_DOUBLE_EQ(statsX.skewness(), 0.0);
EXPECT_DOUBLE_EQ(statsY.skewness(), 0.0);
EXPECT_DOUBLE_EQ(statsZ.skewness(), 0.0);

EXPECT_DOUBLE_EQ(statsX.kurtosis(), 0.0);
EXPECT_DOUBLE_EQ(statsY.kurtosis(), 0.0);
EXPECT_DOUBLE_EQ(statsZ.kurtosis(), 0.0);
}

TEST(Stats, advanced)
{
BOX3D bounds(1.0, 2.0, 3.0, 101.0, 102.0, 103.0);
Options ops;
ops.add("bounds", bounds);
ops.add("count", 1000);
ops.add("mode", "ramp");

StageFactory f;

Stage* reader(f.createStage("readers.faux"));
EXPECT_TRUE(reader);
reader->setOptions(ops);

StatsFilter filter;
Options so;
so.add("advanced", true);

filter.setInput(*reader);
filter.setOptions(so);
EXPECT_EQ(filter.getName(), "filters.stats");

PointTable table;
filter.prepare(table);
filter.execute(table);

const stats::Summary& statsX = filter.getStats(Dimension::Id::X);
const stats::Summary& statsY = filter.getStats(Dimension::Id::Y);
const stats::Summary& statsZ = filter.getStats(Dimension::Id::Z);

EXPECT_EQ(statsX.count(), 1000u);
EXPECT_EQ(statsY.count(), 1000u);
EXPECT_EQ(statsZ.count(), 1000u);

EXPECT_NEAR(statsX.skewness(), 7.6279972e+11, 10000);
EXPECT_NEAR(statsY.skewness(), 6.1023649e+12, 100000);
EXPECT_NEAR(statsZ.skewness(), 2.0595297e+13, 1000000);

EXPECT_NEAR(statsX.kurtosis(), -527558696e+4, 10000);
EXPECT_NEAR(statsY.kurtosis(), -422043928e+5, 100000);
EXPECT_NEAR(statsZ.kurtosis(), -142438122e+6, 1000000);
}


Expand Down

0 comments on commit 7570b0a

Please sign in to comment.