MONDRIAN: Support for Grouping Sets query for Distinct Count measure.…

… New property to allow fail fast in case a MDX query element is not prefixed with a [Dimension] [git-p4: depot-paths = "//open/mondrian/": change = 10480]
pentaho · Jan 29, 2008 · 2706aba · 2706aba
1 parent e6ad25a
commit 2706aba
Show file tree

Hide file tree

Showing 10 changed files with 861 additions and 72 deletions.
diff --git a/doc/configuration.html b/doc/configuration.html
@@ -385,6 +385,58 @@ <h3>1.1 Property list<a name="Property_list">&nbsp;</a></h3>
           result should be NULL when denominator is NULL or zero.
       </td>
     </tr>
+    <tr>
+        <td>
+            <code>
+                <a href="api/mondrian/olap/MondrianProperties.html#NeedDimensionPrefix">mondrian.olap.elements.NeedDimensionPrefix</a></code>
+        </td>
+        <td>boolean</td>
+        <td>false</td>
+        <td>
+            <p>Property determines if elements of dimension (levels, hierarchies,
+                members) need to be prefixed with dimension name in MDX query.
+                For example when the property is true, the following queries
+                will error out. The same queries will work when this property
+                is set to false.
+            <blockquote>
+            <p>
+            <ul>
+                <li><code>select {[M]} on 0 from sales</code></li>
+                <li><code>select {[USA]} on 0 from sales</code></li>
+                <li><code>select {[USA].[CA].[Santa Monica]} on 0 from sales</code></li>
+            </ul>
+            </blockquote>
+            <p>
+            When the property is set to true, any query where elements are
+            prefixed with dimension name as below will work
+            <blockquote>
+            <p>
+            <ul>
+                <li><code>select {[Gender].[F]} on 0 from sales</code></li>
+                <li><code>select {[Customers].[Santa Monica]} on 0 from sales</code></li>
+            </ul>
+            </blockquote>
+            <p>
+            Please note that this property does not govern the behaviour where in
+            <blockquote>
+            <p>
+                <li><code>[Gender].[M]</code></li>
+            </blockquote>
+            <p>
+            is resolved into a fully qualified
+            <blockquote>
+            <p>
+            <li><code>[Gender].[All Gender].[M]</code></li>
+            </blockquote>
+            <p>In a scenario where the schema is very large and dimensions have
+            large number of members a MDX query that has a invalid member in it
+            will cause mondrian to to go through all the dimensions, levels,
+            hierarchies, members and properties trying to resolve the element
+            name. This behaviour consumes considerable time and resources on the
+            server. Setting this property to true will make it fail fast in a 
+            scenario where it is desirable
+        </td>
+    </tr>
     <tr>
       <td>
         <code>

diff --git a/mondrian.properties b/mondrian.properties
@@ -406,5 +406,35 @@ mondrian.rolap.iterationLimit=0
 #
 #mondrian.olap.agg.IgnoreMeasureForNonJoiningDimension=false
 
+###############################################################################
+# Property determines if elements of dimension (levels, hierarchies, members)
+# need to be prefixed with dimension name in MDX query.
+#
+# For example when the property is true, the following queries
+# will error out. The same queries will work when this property
+# is set to false.
+#     * select {[M]} on 0 from sales
+#     * select {[USA]} on 0 from sales
+#     * select {[USA].[CA].[Santa Monica]}  on 0 from sales
+#
+# When the property is set to true, any query where elements are
+# prefixed with dimension name as below will work
+#     * select {[Gender].[F]} on 0 from sales
+#     * select {[Customers].[Santa Monica]} on 0 from sales
+#
+# Please note that this property does not govern the behaviour where in
+#     * [Gender].[M]
+# is resolved into a fully qualified
+#     * [Gender].[All Gender].[M]
+#
+# In a scenario where the schema is very large and dimensions have large
+# number of members a MDX query that has a invalid member in it will cause
+# mondrian to to go through all the dimensions, levels, hierarchies, members
+# and properties trying to resolve the element name. This behaviour consumes
+# considerable time and resources on the server. Setting this property to
+# true will make it fail fast in a scenario where it is desirable
+#
+#mondrian.olap.elements.NeedDimensionPrefix=false
+
 
 # End mondrian.properties
diff --git a/src/main/mondrian/olap/CubeBase.java b/src/main/mondrian/olap/CubeBase.java
@@ -106,7 +106,8 @@ public OlapElement lookupChild(
         SchemaReader schemaReader, Id.Segment s, MatchType matchType)
     {
         Dimension mdxDimension = (Dimension)lookupDimension(s);
-        if (mdxDimension != null) {
+        if (mdxDimension != null ||
+            MondrianProperties.instance().NeedDimensionPrefix.get()) {
             return mdxDimension;
         }
 

diff --git a/src/main/mondrian/olap/MondrianProperties.java b/src/main/mondrian/olap/MondrianProperties.java
@@ -1098,6 +1098,53 @@ public Property getPropertyDefinition(String path) {
                     this,
                     "mondrian.olap.agg.IgnoreMeasureForNonJoiningDimension",
                     false);
+
+    /**
+     * Property determines if elements of dimension (levels, hierarchies, members)
+     * need to be prefixed with dimension name in MDX query.
+     * For example when the property is true, the following queries
+     * will error out. The same queries will work when this property
+     * is set to false.
+     * <blockquote>
+     * <p>
+     * select {[M]} on 0 from sales
+     * <p>
+     * select {[USA]} on 0 from sales
+     * <p>
+     * select {[USA].[CA].[Santa Monica]}  on 0 from sales
+     * </blockquote>
+     * <p>
+     * When the property is set to true, any query where elements are
+     * prefixed with dimension name as below will work
+     * <blockquote>
+     * <p>
+     * select {[Gender].[F]} on 0 from sales
+     * <p>
+     * select {[Customers].[Santa Monica]} on 0 from sales
+     * </blockquote>
+     * <p>
+     * Please note that this property does not govern the behaviour where in
+     * <blockquote>
+     * <p>
+     * [Gender].[M]
+     * </blockquote>
+     * <p>
+     * is resolved into a fully qualified
+     * <blockquote>
+     * <p>
+     * [Gender].[All Gender].[M]
+     * </blockquote>
+     * <p>
+     * In a scenario where the schema is very large and dimensions have large
+     * number of members a MDX query that has a invalid member in it will cause
+     * mondrian to to go through all the dimensions, levels, hierarchies, members
+     * and properties trying to resolve the element name. This behaviour consumes
+     * considerable time and resources on the server. Setting this property to
+     * true will make it fail fast in a scenario where it is desirable
+     */
+    public transient final BooleanProperty NeedDimensionPrefix =
+            new BooleanProperty(
+                    this, "mondrian.olap.elements.NeedDimensionPrefix", false);
 }
 
 // End MondrianProperties.java
diff --git a/src/main/mondrian/olap/fun/AggregateFunDef.java b/src/main/mondrian/olap/fun/AggregateFunDef.java
@@ -9,15 +9,18 @@
 */
 package mondrian.olap.fun;
 
-import mondrian.olap.*;
-import mondrian.calc.*;
+import mondrian.calc.Calc;
+import mondrian.calc.ExpCompiler;
+import mondrian.calc.ListCalc;
+import mondrian.calc.impl.AbstractDoubleCalc;
 import mondrian.calc.impl.GenericCalc;
 import mondrian.calc.impl.ValueCalc;
-import mondrian.calc.impl.AbstractDoubleCalc;
 import mondrian.mdx.ResolvedFunCall;
+import mondrian.olap.*;
 import mondrian.rolap.RolapAggregator;
-
+import java.util.ArrayList;
 import java.util.List;
+import java.util.Iterator;
 
 /**
  * Definition of the <code>AGGREGATE</code> MDX function.
@@ -71,15 +74,15 @@ public double evaluateDouble(Evaluator evaluator) {
                     null,
                     "Don't know how to rollup aggregator '" + aggregator + "'");
             }
-            final List list = evaluateCurrentList(listCalc, evaluator);
+            List list = evaluateCurrentList(listCalc, evaluator);
             if (aggregator == RolapAggregator.DistinctCount) {
                 // If the list is empty, it means the current context
                 // contains no qualifying cells. The result set is empty.
                 if (list.size() == 0) {
                     return DoubleNull;
                 }
-                
-                // TODO: Optimize the list
+
+                // Optimize the list
                 // E.g.
                 // List consists of:
                 //  (Gender.[All Gender], [Product].[All Products]),
@@ -90,6 +93,10 @@ public double evaluateDouble(Evaluator evaluator) {
                 //
                 // Similar optimization can also be done for list of members.
 
+                if (list.get(0) instanceof Member) {
+                    list = makeTupleList(list);
+                }
+                list = removeOverlappingTupleEntries(list);
                 checkIfAggregationSizeIsTooLarge(list);
 
                 // Can't aggregate distinct-count values in the same way
@@ -108,12 +115,11 @@ public double evaluateDouble(Evaluator evaluator) {
         }
 
         /**
-         * In case of distinct count totals, the Sql generated would have at
-         * least, as many where conditions as the size of the list.
-         * Incase of a large list, the SQL generation would take too much time
-         * and memory. Also the generated SQL would be too large to execute.
+         * In case of distinct count aggregation if a tuple which is a super
+         * set of other tuples in the set exists then the child tuples can be
+         * ignored.
          *
-         * <p>TODO: Optimize the list
+         * <p>
          * E.g.
          * List consists of:
          *  (Gender.[All Gender], [Product].[All Products]),
@@ -122,10 +128,80 @@ public double evaluateDouble(Evaluator evaluator) {
          * Can be optimized to:
          *  (Gender.[All Gender], [Product].[All Products])
          *
-         * <p>Similar optimization can also be done for list of members.
-         *
          * @param list
          */
+
+        public static List removeOverlappingTupleEntries(List<Member[]> list) {
+            List<Member[]> trimmedList = new ArrayList<Member[]>();
+            for (Member[] tuple1 : list) {
+                if (trimmedList.isEmpty()) {
+                    trimmedList.add(tuple1);
+                } else {
+                    boolean ignore = false;
+                    final Iterator<Member[]> iterator = trimmedList.iterator();
+                    while (iterator.hasNext()) {
+                        Member[] tuple2 = iterator.next();
+                        if (isSuperSet(tuple1, tuple2)) {
+                            iterator.remove();
+                        } else if (isSuperSet(tuple2,  tuple1) ||
+                            isEqual(tuple1, tuple2)) {
+                            ignore = true;
+                            break;
+                        }
+                    }
+                    if (!ignore) {
+                        trimmedList.add(tuple1);
+                    }
+                }
+            }
+            return trimmedList;
+        }
+
+        private static boolean isEqual(Member[] tuple1, Member[] tuple2) {
+            for (int i = 0; i < tuple1.length; i++) {
+                if (!tuple1[i].getUniqueName().
+                    equals(tuple2[i].getUniqueName())) {
+                   return false;
+                }
+            }
+            return true; 
+        }
+
+        /**
+         * Forms a list tuples from a list of members
+         * @param list of members
+         * @return list of tuples
+         */
+        public static List<Member[]> makeTupleList(List<Member> list) {
+            List<Member[]> tupleList = new ArrayList<Member[]>(list.size());
+            for (Member member : list) {
+                tupleList.add(new Member[] {member});
+            }
+            return tupleList;
+        }
+
+        /**
+         * Returns whether tuple1 is a superset of tuple2
+         * @param tuple1
+         * @param tuple2
+         * @return boolean
+         */
+        public static boolean isSuperSet(Member[] tuple1, Member[] tuple2) {
+            int parentLevelCount = 0;
+            for (int i = 0; i < tuple1.length; i++) {
+                Member member1 = tuple1[i];
+                Member member2 = tuple2[i];
+
+                if (!member2.isChildOrEqualTo(member1)) {
+                    return false;
+                }
+                if (member1.getLevel().getDepth() < member2.getLevel().getDepth()) {
+                    parentLevelCount++;
+                }
+            }
+            return parentLevelCount > 0;
+        }
+
         private void checkIfAggregationSizeIsTooLarge(List list) {
             if (list.size() > MondrianProperties.instance().MaxConstraints.get()) {
                 throw newEvalException(

diff --git a/src/main/mondrian/rolap/FastBatchingCellReader.java b/src/main/mondrian/rolap/FastBatchingCellReader.java
@@ -657,19 +657,22 @@ private List<RolapStar.Measure> getDistinctSqlMeasures(
          * <p>This is possible if:
          * <li>columns list is super set of other batch's constraint columns;
          *     and
-         * <li>both the batch does not have distinct count measure in it; and
          * <li>both have same Fact Table; and
          * <li>matching columns of this and other batch has the same value; and
          * <li>non matching columns of this batch have ALL VALUES
          * </ul>
          */
         boolean canBatch(Batch other) {
             return hasOverlappingBitKeys(other) &&
-                !hasDistinctCountMeasure() &&
-                !other.hasDistinctCountMeasure() &&
-                haveSameStarAndAggregation(other) &&
-                haveSameValuesForOverlappingColumnsOrHasAllChildrenForOthers(
-                    other);
+                (hasSameCompoundPredicate(other) ||
+                haveSameValuesForOverlappingColumnsOrHasAllChildrenForOthers(other))
+                && hasSameMeasureList(other)
+                && haveSameStarAndAggregation(other);
+        }
+
+        private boolean hasSameMeasureList(Batch other) {
+            return (this.measuresList.size() == other.measuresList.size() &&
+                this.measuresList.containsAll(other.measuresList));
         }
 
         boolean hasOverlappingBitKeys(Batch other) {
@@ -681,19 +684,53 @@ boolean hasDistinctCountMeasure() {
             return getDistinctMeasureCount(measuresList) > 0;
         }
 
+        boolean hasSameCompoundPredicate(Batch other) {
+            final StarPredicate starPredicate = compoundPredicate();
+            final StarPredicate otherStarPredicate = other.compoundPredicate();
+            if (starPredicate == null && otherStarPredicate == null) {
+                return true;
+            } else if (starPredicate != null && otherStarPredicate != null) {
+                return starPredicate.equalConstraint(otherStarPredicate);
+            }
+            return false;
+        }
+
+        private StarPredicate compoundPredicate() {
+            StarPredicate predicate = null;
+            for (Set<StarColumnPredicate> valueSet : valueSets) {
+                StarPredicate orPredicate = null;
+                for (StarColumnPredicate starColumnPredicate : valueSet) {
+                    if (orPredicate == null) {
+                        orPredicate = starColumnPredicate;
+                    } else {
+                        orPredicate = orPredicate.or(starColumnPredicate);
+                    }
+                }
+                if (predicate == null) {
+                    predicate = orPredicate;
+                } else {
+                    predicate = predicate.and(orPredicate);
+                }
+            }
+            for (StarPredicate starPredicate : this.batchKey.getCompoundPredicateList()) {
+                if (predicate == null) {
+                    predicate = starPredicate;
+                } else {
+                    predicate = predicate.and(starPredicate);
+                }
+            }
+            return predicate;
+        }
+
         boolean haveSameStarAndAggregation(Batch other) {
             boolean rollup[] = {false};
             boolean otherRollup[] = {false};
-            boolean hasSameCompoundPredicates =
-                batchKey.hasSameCompoundPredicate(other.batchKey);
 
             boolean hasSameAggregation = getAgg(rollup) == other.getAgg(otherRollup);
             boolean hasSameRollupOption = rollup[0] == otherRollup[0];
 
             boolean hasSameStar = getStar().equals(other.getStar());
-            return 
-                hasSameCompoundPredicates && hasSameStar && 
-                hasSameAggregation && hasSameRollupOption;
+            return hasSameStar && hasSameAggregation && hasSameRollupOption;
         }
 
         /**