Skip to content
This repository has been archived by the owner on Mar 30, 2021. It is now read-only.

Commit

Permalink
Logical Optimizer: Pull up VCols in to Agg
Browse files Browse the repository at this point in the history
  • Loading branch information
jpullokkaran committed Sep 23, 2016
1 parent 04a780c commit 2d73cd2
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ object DruidLogicalOptimizer {

val batches: Seq[(String, SparkShim.RuleStrategy, Rule[LogicalPlan])] = Seq(
("Rewrite Sum(Literal) as Count(1)*Literal", SparkShim.fixedPoint(100), SumOfLiteralRewrite),
("Push GB through Project, Join", SparkShim.fixedPoint(100), PushGB)
("Push GB through Project, Join", SparkShim.fixedPoint(100), PushGB),
("Pull true VC up in to Agg", SparkShim.fixedPoint(100), PullVColsIntoAgg)
)

def apply(conf : SQLConf) : Optimizer = {
Expand Down Expand Up @@ -303,4 +304,31 @@ object SumOfLiteralRewrite extends Rule[LogicalPlan] with PredicateHelper {
}
}

}

object PullVColsIntoAgg extends Rule[LogicalPlan] with PredicateHelper {
override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case Pull(plan) => plan
}

object Pull {
def unapply(op: LogicalPlan): Option[LogicalPlan] = op match {
case a@Aggregate(ge, ae, p@Project(plst, pc)) if plst.forall(pe => pe.deterministic) &&
plst.exists(pe =>
pe match{
case Alias(ae, _) if ae.children.size > 1 => true
case _ => false}) =>
for (te <- ExprUtil.translateAggBelowProject(ge, ae, None, p)) yield {
val newChildOp = te._4 match {
case Project(_, _) => te._4
case _ => Project(
(te._1 ++ te._2).foldLeft(Seq[Attribute]())((ars, e) => ars ++ e.references.toSeq),
te._4)
}
Aggregate(te._1, te._2, newChildOp)
}
case _ => None
}
}

}
18 changes: 18 additions & 0 deletions src/test/scala/org/sparklinedata/druid/client/CodeGenTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -903,5 +903,23 @@ class CodeGenTest extends BaseTest with BeforeAndAfterAll with Logging {
|order by x, y, z
""".stripMargin
,0,true,true)

test("subquery1",
"""
select x, sum(z) as z from
( select
Substring(o_orderstatus, 1, 2) x, Substring(l_shipdate, 1, 2) as y, c_acctbal as z
from orderLineItemPartSupplier) r1 group by x, y
""".stripMargin
, 1, true, true)

test("subquery2",
"""
select x, sum(z) as z from
( select
Substring(o_orderstatus, 1, rand()) x, Substring(l_shipdate, 1, 2) as y, c_acctbal as z
from orderLineItemPartSupplier) r1 group by x, y
""".stripMargin
, 0, true, true)
}

0 comments on commit 2d73cd2

Please sign in to comment.