-
Notifications
You must be signed in to change notification settings - Fork 73
fix variance for identical values #554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -397,6 +397,19 @@ class Variance | |
| void add( double x ) | ||
| { | ||
| m_count++; | ||
| // Track consecutive same values to avoid numerical errors when all values are identical | ||
| if( m_count == 1 ) | ||
| { | ||
| m_lastValue = x; | ||
| m_consecutiveValueCount = 1; | ||
| } | ||
| if( x == m_lastValue && m_count > 1) | ||
| m_consecutiveValueCount++; | ||
| else | ||
| { | ||
| m_lastValue = x; | ||
| m_consecutiveValueCount = 1; | ||
| } | ||
| m_dx = x - m_mean; | ||
| m_mean += m_dx / m_count; | ||
| m_unnormVar += ( x - m_mean ) * m_dx; | ||
|
|
@@ -405,9 +418,14 @@ class Variance | |
| void remove( double x ) | ||
| { | ||
| m_count--; | ||
| // Note: For sliding windows, we cannot accurately track consecutive values when removing from the beginning, | ||
| // and we don't need to. Since we are checking m_consecutiveValueCount >= m_count in compute, | ||
| // the only events needs reset is a new value comes in at the end. | ||
| if( m_count == 0 ) | ||
| { | ||
| m_mean = m_unnormVar = 0; | ||
| m_consecutiveValueCount = 0; | ||
| m_lastValue = 0; | ||
| return; | ||
| } | ||
| m_dx = x - m_mean; | ||
|
|
@@ -418,12 +436,19 @@ class Variance | |
| void reset() | ||
| { | ||
| m_mean = m_unnormVar = m_count = 0; | ||
| m_consecutiveValueCount = 0; | ||
| m_lastValue = 0; | ||
| } | ||
|
|
||
| double compute() const | ||
| { | ||
| if( m_count > m_ddof ) | ||
| { | ||
| // Check if all values are identical | ||
| if( m_consecutiveValueCount >= m_count [[unlikely]]) | ||
| return 0.0; | ||
| return ( m_unnormVar < 0 ? 0 : m_unnormVar / ( m_count - m_ddof ) ); | ||
| } | ||
|
|
||
| return std::numeric_limits<double>::quiet_NaN(); | ||
| } | ||
|
|
@@ -435,6 +460,9 @@ class Variance | |
| double m_dx; | ||
| double m_count; | ||
| int64_t m_ddof; | ||
| // Below variables are used to eliminate numerical errors when all values in the window are identical | ||
| double m_lastValue; | ||
| int64_t m_consecutiveValueCount; | ||
| }; | ||
|
|
||
| class WeightedVariance | ||
|
|
@@ -455,6 +483,20 @@ class WeightedVariance | |
| { | ||
| if( w <= 0 ) | ||
| return; | ||
| // Track consecutive same values and observation count | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the same comments apply for |
||
| m_count++; | ||
| if( m_count == 1 ) | ||
| { | ||
| m_lastValue = x; | ||
| m_consecutiveValueCount = 1; | ||
| } | ||
| if( x == m_lastValue && m_count > 1 ) | ||
| m_consecutiveValueCount++; | ||
| else | ||
| { | ||
| m_lastValue = x; | ||
| m_consecutiveValueCount = 1; | ||
| } | ||
| m_wsum += w; | ||
| m_dx = x - m_wmean; | ||
| m_wmean += ( w / m_wsum ) * m_dx; | ||
|
|
@@ -463,10 +505,14 @@ class WeightedVariance | |
|
|
||
| void remove( double x, double w ) | ||
| { | ||
| m_count--; | ||
| m_wsum -= w; | ||
| if( m_wsum < EPSILON ) | ||
| { | ||
| m_wsum = m_wmean = m_unnormWVar = 0; | ||
| m_count = 0; | ||
| m_consecutiveValueCount = 0; | ||
| m_lastValue = 0; | ||
| return; | ||
| } | ||
| m_dx = x - m_wmean; | ||
|
|
@@ -477,12 +523,20 @@ class WeightedVariance | |
| void reset() | ||
| { | ||
| m_wsum = m_wmean = m_unnormWVar = 0; | ||
| m_count = 0; | ||
| m_consecutiveValueCount = 0; | ||
| m_lastValue = 0; | ||
| } | ||
|
|
||
| double compute() const | ||
| { | ||
| if( m_wsum > m_ddof ) | ||
| { | ||
| // Check if all values are identical | ||
| if( m_consecutiveValueCount >= m_count [[unlikely]]) | ||
| return 0.0; | ||
| return ( m_unnormWVar < 0 ? 0 : m_unnormWVar / ( m_wsum - m_ddof ) ); | ||
| } | ||
|
|
||
| return std::numeric_limits<double>::quiet_NaN(); | ||
| } | ||
|
|
@@ -494,6 +548,10 @@ class WeightedVariance | |
| double m_unnormWVar; | ||
| double m_dx; | ||
| int64_t m_ddof; | ||
| // Below variables are used to eliminate numerical errors when all values in the window are identical | ||
| int64_t m_count; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can remove
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have updated weighted variance to match variance. m_count as a variable is still needed in weighted variance because we need it to initialize m_lastValue and m_consecutiveValueCount. |
||
| double m_lastValue; | ||
| int64_t m_consecutiveValueCount; | ||
| }; | ||
|
|
||
| class Covariance | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.