Skip to content

Commit

Permalink
Implements most remaining statistical functions (correlation, covaria…
Browse files Browse the repository at this point in the history
…nce, variance). Some work remains with corner cases and linear regression.

[git-p4: depot-paths = "//open/mondrian/": change = 239]
  • Loading branch information
krave committed Nov 28, 2002
1 parent 6f25ef8 commit a6bf9da
Show file tree
Hide file tree
Showing 2 changed files with 253 additions and 19 deletions.
157 changes: 146 additions & 11 deletions src/main/mondrian/olap/fun/BuiltinFunTable.java
Expand Up @@ -1295,7 +1295,21 @@ public void testAvg(FoodMartTestCase test) {
}
//todo: testAvgWithNulls
}));
if (false) define(new FunDefBase("Correlation", "Correlation(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the correlation of two series evaluated over a set.", "fn*"));
define(new MultiResolver(
"Correlation", "Correlation(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the correlation of two series evaluated over a set.",
new String[]{"fnxN","fnxNN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp1 = (ExpBase) getArg(evaluator, args, 1);
ExpBase exp2 = (ExpBase) getArg(evaluator, args, 2);
return correlation(evaluator.push(), members, exp1, exp2);
}
public void testCorrelation(FoodMartTestCase test) {
String result = test.executeExpr("Correlation({[Store].[All Stores].[USA].children}, [Measures].[Unit Sales], [Measures].[Store Sales])");
test.assertEquals("0.9999063938016924", result);
}
}));
define(new MultiResolver(
"Count", "Count(<Set>[, EXCLUDEEMPTY | INCLUDEEMPTY])", "Returns the number of tuples in a set, empty cells included unless the optional EXCLUDEEMPTY flag is used.",
new String[]{"fnx", "fnxy"},
Expand Down Expand Up @@ -1323,8 +1337,36 @@ public void testCount(FoodMartTestCase test) {
}
//todo: testCountNull, testCountNoExp
}));
if (false) define(new FunDefBase("Covariance", "Covariance(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the covariance of two series evaluated over a set (biased).", "fn*"));
if (false) define(new FunDefBase("CovarianceN", "CovarianceN(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the covariance of two series evaluated over a set (unbiased).", "fn*"));
define(new MultiResolver(
"Covariance", "Covariance(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the covariance of two series evaluated over a set (biased).",
new String[]{"fnxN","fnxNN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp1 = (ExpBase) getArg(evaluator, args, 1);
ExpBase exp2 = (ExpBase) getArg(evaluator, args, 2);
return covariance(evaluator.push(), members, exp1, exp2, true);
}
public void testCovariance(FoodMartTestCase test) {
String result = test.executeExpr("Covariance({[Store].[All Stores].[USA].children}, [Measures].[Unit Sales], [Measures].[Store Sales])");
test.assertEquals("1.3557618990466664E9", result);
}
}));
define(new MultiResolver(
"CovarianceN", "CovarianceN(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Returns the covariance of two series evaluated over a set (unbiased).",
new String[]{"fnxN","fnxNN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp1 = (ExpBase) getArg(evaluator, args, 1);
ExpBase exp2 = (ExpBase) getArg(evaluator, args, 2);
return covariance(evaluator.push(), members, exp1, exp2, false);
}
public void testCovarianceN(FoodMartTestCase test) {
String result = test.executeExpr("CovarianceN({[Store].[All Stores].[USA].children}, [Measures].[Unit Sales], [Measures].[Store Sales])");
test.assertEquals("2.0336428485699995E9", result);
}
}));
define(new FunDefBase("IIf", "IIf(<Logical Expression>, <Numeric Expression1>, <Numeric Expression2>)", "Returns one of two numeric values determined by a logical test.", "fnbnn"));
if (false) define(new FunDefBase("LinRegIntercept", "LinRegIntercept(<Set>, <Numeric Expression>[, <Numeric Expression>])", "Calculates the linear regression of a set and returns the value of b in the regression line y = ax + b.", "fn*"));
if (false) define(new FunDefBase("LinRegPoint", "LinRegPoint(<Numeric Expression>, <Set>, <Numeric Expression>[, <Numeric Expression>])", "Calculates the linear regression of a set and returns the value of y in the regression line y = ax + b.", "fn*"));
Expand Down Expand Up @@ -1381,10 +1423,56 @@ public void testMin(FoodMartTestCase test) {
}));
define(new FunDefBase("Ordinal", "<Level>.Ordinal", "Returns the zero-based ordinal value associated with a level.", "pnl"));
if (false) define(new FunDefBase("Rank", "Rank(<Tuple>, <Set>)", "Returns the one-based rank of a tuple in a set.", "fn*"));
if (false) define(new FunDefBase("Stddev", "Stddev(<Set>[, <Numeric Expression>])", "Alias for Stdev.", "fn*"));
if (false) define(new FunDefBase("StddevP", "StddevP(<Set>[, <Numeric Expression>])", "Alias for StdevP.", "fn*"));
if (false) define(new FunDefBase("Stdev", "Stdev(<Set>[, <Numeric Expression>])", "Returns the standard deviation of a numeric expression evaluated over a set (unbiased).", "fn*"));
if (false) define(new FunDefBase("StdevP", "StdevP(<Set>[, <Numeric Expression>])", "Returns the standard deviation of a numeric expression evaluated over a set (biased).", "fn*"));
define(new MultiResolver(
"Stddev", "Stddev(<Set>[, <Numeric Expression>])", "Alias for Stdev.",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return stdev(evaluator.push(), members, exp, false);
}
}));
define(new MultiResolver(
"Stdev", "Stdev(<Set>[, <Numeric Expression>])", "Returns the standard deviation of a numeric expression evaluated over a set (unbiased).",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return stdev(evaluator.push(), members, exp, false);
}
public void testStdev(FoodMartTestCase test) {
String result = test.executeExpr(
"STDEV({[Store].[All Stores].[USA].children},[Measures].[Store Sales])");
test.assertEquals("65825.4547549297", result);
}
}));
define(new MultiResolver(
"StddevP", "StddevP(<Set>[, <Numeric Expression>])", "Alias for StdevP.",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return stdev(evaluator.push(), members, exp, true);
}
}));
define(new MultiResolver(
"StdevP", "StdevP(<Set>[, <Numeric Expression>])", "Returns the standard deviation of a numeric expression evaluated over a set (biased).",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return stdev(evaluator.push(), members, exp, true);
}
public void testStdevP(FoodMartTestCase test) {
String result = test.executeExpr(
"STDEVP({[Store].[All Stores].[USA].children},[Measures].[Store Sales])");
test.assertEquals("53746.25874541283", result);
}
}));
define(new MultiResolver(
"Sum", "Sum(<Set>[, <Numeric Expression>])", "Returns the sum of a numeric expression evaluated over a set.",
new String[]{"fnx", "fnxN"},
Expand Down Expand Up @@ -1435,10 +1523,57 @@ public FunDef resolve(Exp[] args, int[] conversionCount) {
return new ValueFunDef(argTypes);
}
});
if (false) define(new FunDefBase("Var", "Var(<Set>[, <Numeric Expression>])", "Returns the variance of a numeric expression evaluated over a set (unbiased).", "fn*"));
if (false) define(new FunDefBase("Variance", "Variance(<Set>[, <Numeric Expression>])", "Alias for Var.", "fn*"));
if (false) define(new FunDefBase("VarianceP", "VarianceP(<Set>[, <Numeric Expression>])", "Alias for VarP.", "fn*"));
if (false) define(new FunDefBase("VarP", "VarP(<Set>[, <Numeric Expression>])", "Returns the variance of a numeric expression evaluated over a set (biased).", "fn*"));
define(new MultiResolver(
"Var", "Var(<Set>[, <Numeric Expression>])", "Returns the variance of a numeric expression evaluated over a set (unbiased).",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return var(evaluator.push(), members, exp, false);
}
public void testVar(FoodMartTestCase test) {
String result = test.executeExpr(
"VAR({[Store].[All Stores].[USA].children},[Measures].[Store Sales])");
test.assertEquals("4.332990493693297E9", result);
}
}));
define(new MultiResolver(
"Variance", "Variance(<Set>[, <Numeric Expression>])", "Alias for Var.",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return var(evaluator.push(), members, exp, false);
}
}));
define(new MultiResolver(
"VarianceP", "VarianceP(<Set>[, <Numeric Expression>])", "Alias for VarP.",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return var(evaluator.push(), members, exp, true);
}
}));
define(new MultiResolver(
"VarP", "VarP(<Set>[, <Numeric Expression>])", "Returns the variance of a numeric expression evaluated over a set (biased).",
new String[]{"fnx", "fnxN"},
new FunkBase() {
public Object evaluate(Evaluator evaluator, Exp[] args) {
Vector members = (Vector) getArg(evaluator, args, 0);
ExpBase exp = (ExpBase) getArg(evaluator, args, 1);
return var(evaluator.push(), members, exp, true);
}
public void testVarP(FoodMartTestCase test) {
String result = test.executeExpr(
"VARP({[Store].[All Stores].[USA].children},[Measures].[Store Sales])");
test.assertEquals("2.888660329128865E9", result);
}
}));

//
// SET FUNCTIONS
if (false) define(new FunDefBase("AddCalculatedMembers", "AddCalculatedMembers(<Set>)", "Adds calculated members to a set.", "fx*"));
Expand Down
115 changes: 107 additions & 8 deletions src/main/mondrian/olap/fun/FunUtil.java
Expand Up @@ -450,6 +450,22 @@ static Object topOrBottom (Evaluator evaluator, Vector members, ExpBase exp, boo
static class SetWrapper {
Vector v = new Vector();
public int errorCount = 0, nullCount = 0;

//private double avg = Double.NaN;
//todo: parameterize inclusion of nulls
//by making this a method of the SetWrapper, we can cache the result
//this allows its reuse in Correlation
// public double getAverage() {
// if (avg == Double.NaN) {
// double sum = 0.0;
// for (int i = 0; i < v.size(); i++) {
// sum += ((Double) v.elementAt(i)).doubleValue();
// }
// //todo: should look at context and optionally include nulls
// avg = sum / v.size();
// }
// return avg;
// }
}

static Object median(Evaluator evaluator, Vector members, ExpBase exp) {
Expand Down Expand Up @@ -502,23 +518,106 @@ static Object max(Evaluator evaluator, Vector members, ExpBase exp) {
}
}

static Object avg(Evaluator evaluator, Vector members, ExpBase exp) {
static Object var(Evaluator evaluator, Vector members, ExpBase exp, boolean biased) {
SetWrapper sw = evaluateSet(evaluator, members, exp);
return _var(sw, biased);
}

private static Object _var(SetWrapper sw, boolean biased) {
if (sw.errorCount > 0) {
return new Double(Double.NaN);
} else if (sw.v.size() == 0) {
return Util.nullValue;
}
else {
double sum = 0.0;
double stdev = 0.0;
double avg = _avg(sw);
for (int i = 0; i < sw.v.size(); i++) {
sum += ((Double) sw.v.elementAt(i)).doubleValue();
stdev += Math.pow((((Double) sw.v.elementAt(i)).doubleValue() - avg),2);
}
//todo: should look at context and optionally include nulls
return new Double(sum / sw.v.size());
int n = sw.v.size();
if (!biased) { n--; }
return new Double(stdev / n);
}
}

static Object correlation(Evaluator evaluator, Vector members, ExpBase exp1, ExpBase exp2) {
SetWrapper sw1 = evaluateSet(evaluator, members, exp1);
SetWrapper sw2 = evaluateSet(evaluator, members, exp2);
Object covar = _covariance(sw1, sw2, false);
Object var1 = _var(sw1, false); //this should be false, yes?
Object var2 = _var(sw2, false);
if ((covar instanceof Double) && (var1 instanceof Double) && (var2 instanceof Double)) {
return new Double(((Double) covar).doubleValue() /
Math.sqrt(((Double) var1).doubleValue() * ((Double) var2).doubleValue()));
}
else {
return Util.nullValue;
}
}

static Object covariance(Evaluator evaluator, Vector members, ExpBase exp1, ExpBase exp2, boolean biased) {
SetWrapper sw1 = evaluateSet(evaluator.push(), members, exp1);
SetWrapper sw2 = evaluateSet(evaluator.push(), members, exp2);
//todo: because evaluateSet does not add nulls to the SetWrapper, this solution may
//lead to mismatched vectors and is therefore not robust
// return _covariance(sw1, sw2, biased);
return _covariance(sw1, sw2, biased);
}


private static Object _covariance(SetWrapper sw1, SetWrapper sw2, boolean biased) {
if (sw1.v.size() != sw2.v.size()) {
return Util.nullValue;
}
double avg1 = _avg(sw1);
double avg2 = _avg(sw2);
double covar = 0.0;
for (int i = 0; i < sw1.v.size(); i++) {
//all of this casting seems inefficient - can we make SetWrapper contain an array of double instead?
double diff1 = (((Double) sw1.v.elementAt(i)).doubleValue() - avg1);
double diff2 = (((Double) sw2.v.elementAt(i)).doubleValue() - avg2);
covar += (diff1 * diff2);
}
int n = sw1.v.size();
if (!biased) { n--; }
return new Double(covar / n);
}

static Object stdev(Evaluator evaluator, Vector members, ExpBase exp, boolean biased) {
Object o = var(evaluator, members, exp, biased);
if (o instanceof Double) {
return new Double(Math.sqrt(((Double) o).doubleValue()));
}
else {
return o;
}
}

static Object avg(Evaluator evaluator, Vector members, ExpBase exp) {
SetWrapper sw = evaluateSet(evaluator, members, exp);
if (sw.errorCount > 0) {
return new Double(Double.NaN);
} else if (sw.v.size() == 0) {
return Util.nullValue;
}
else {
return new Double(_avg(sw));
}
}

//todo: parameterize inclusion of nulls
//also, maybe make _avg a method of setwrapper, so we can cache the result (i.e. for correl)
private static double _avg(SetWrapper sw) {
double sum = 0.0;
for (int i = 0; i < sw.v.size(); i++) {
sum += ((Double) sw.v.elementAt(i)).doubleValue();
}
//todo: should look at context and optionally include nulls
return sum / sw.v.size();
}


static Object sum(Evaluator evaluator, Vector members, ExpBase exp) {
SetWrapper sw = evaluateSet(evaluator, members, exp);
if (sw.errorCount > 0) {
Expand All @@ -540,9 +639,9 @@ static Object sum(Evaluator evaluator, Vector members, ExpBase exp) {
}

/**
* Evluates <code>exp</code> over <code>members</code> to generate a
* <code>Vector</code> of <code>SetWrapper</code>, which contains a
* <code>Double</code> value and meta information, unlike
* Evaluates <code>exp</code> (if defined) over <code>members</code> to
* generate a <code>Vector</code> of <code>SetWrapper</code>, which contains
* a <code>Double</code> value and meta information, unlike
* <code>evaluateMembers</code>, which only produces values
*/
static SetWrapper evaluateSet(Evaluator evaluator, Vector members, ExpBase exp) {
Expand Down

0 comments on commit a6bf9da

Please sign in to comment.