Skip to content

Commit

Permalink
plpgsql: implement tail-call optimization for PLpgSQL routines
Browse files Browse the repository at this point in the history
This patch implements tail-call optimization for the nested routine
execution that is used to handle PLpgSQL control flow. PLpgSQL sub-routines
are always tail calls because they are built as "continuation" functions,
so we can always use the optimization for PLpgSQL. Tail-call optimization
is only possible if the plan is not distributed (although we may not
currently distribute such plans anyway).

The optimization is performed by setting a `deferredRoutineReceiver`
field on the planner before planning and running a nested routine.
This `deferredRoutineReceiver` allows a routine in tail-call
position to send the information needed to evaluate itself to its
parent, and then return NULL. Once the parent routine receives the
result, it checks whether `deferredRoutineReceiver` received a
deferred nested routine, and if so, evaluates it to obtain the
actual result.

Given a simple looping function like the following:
```
CREATE FUNCTION f(n INT) RETURNS INT AS $$
  DECLARE
    i INT := 0;
  BEGIN
    LOOP
      IF i >= n THEN
        EXIT;
      END IF;
      i := i + 1;
    END LOOP;
    RETURN i;
  END
$$ LANGUAGE PLpgSQL;
```
This optimization takes runtime on my machine for `n=100000` from >20m
to ~2s.

Informs cockroachdb#105254

Release note: None
  • Loading branch information
DrewKimball authored and THardy98 committed Jul 21, 2023
1 parent 191e2cd commit 86b514d
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 8 deletions.
17 changes: 14 additions & 3 deletions pkg/sql/apply_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
Expand Down Expand Up @@ -254,7 +255,9 @@ func (a *applyJoinNode) runNextRightSideIteration(params runParams, leftRow tree
}
plan := p.(*planComponents)
rowResultWriter := NewRowResultWriter(&a.run.rightRows)
if err := runPlanInsidePlan(ctx, params, plan, rowResultWriter); err != nil {
if err := runPlanInsidePlan(
ctx, params, plan, rowResultWriter, nil, /* deferredRoutineSender */
); err != nil {
return err
}
a.run.rightRowsIterator = newRowContainerIterator(ctx, a.run.rightRows)
Expand All @@ -264,7 +267,11 @@ func (a *applyJoinNode) runNextRightSideIteration(params runParams, leftRow tree
// runPlanInsidePlan is used to run a plan and gather the results in the
// resultWriter, as part of the execution of an "outer" plan.
func runPlanInsidePlan(
ctx context.Context, params runParams, plan *planComponents, resultWriter rowResultWriter,
ctx context.Context,
params runParams,
plan *planComponents,
resultWriter rowResultWriter,
deferredRoutineSender eval.DeferredRoutineSender,
) error {
defer plan.close(ctx)
execCfg := params.ExecCfg()
Expand All @@ -285,9 +292,13 @@ func runPlanInsidePlan(
// we make sure to unset pausablePortal field on the planner.
plannerCopy.pausablePortal = nil
evalCtxFactory := func() *extendedEvalContext {
evalCtx := params.p.ExtendedEvalContextCopy()
plannerCopy.extendedEvalCtx = *params.p.ExtendedEvalContextCopy()
evalCtx := &plannerCopy.extendedEvalCtx
evalCtx.Planner = &plannerCopy
evalCtx.StreamManagerFactory = &plannerCopy
if deferredRoutineSender != nil {
evalCtx.RoutineSender = deferredRoutineSender
}
return evalCtx
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ func (b *Builder) buildExistsSubquery(
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
false, /* tailCall */
),
tree.DBoolFalse,
}, types.Bool), nil
Expand Down Expand Up @@ -815,6 +816,7 @@ func (b *Builder) buildSubquery(
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
false, /* tailCall */
), nil
}

Expand Down Expand Up @@ -869,6 +871,7 @@ func (b *Builder) buildSubquery(
true, /* calledOnNullInput */
false, /* multiColOutput */
false, /* generator */
false, /* tailCall */
), nil
}

Expand Down Expand Up @@ -964,6 +967,7 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
udf.Def.CalledOnNullInput,
udf.Def.MultiColDataSource,
udf.Def.SetReturning,
udf.TailCall,
), nil
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/opt/ops/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,11 @@ define UDFCall {
define UDFCallPrivate {
# Def points to the UDF SQL body.
Def UDFDefinition

# TailCall indicates whether the UDF is in tail-call position, meaning that
# it is nested in a parent routine which will not perform any additional
# processing once this call is evaluated.
TailCall bool
}

# KVOptions is a set of KVOptionItems that specify arbitrary keys and values
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/optbuilder/plpgsql.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ func (b *plpgsqlBuilder) callContinuation(con *continuation, s *scope) *scope {
for _, param := range b.params {
addArg(tree.Name(param.Name), param.Typ)
}
call := b.ob.factory.ConstructUDFCall(args, &memo.UDFCallPrivate{Def: con.def})
// PLpgSQL continuation routines are always in tail-call position.
call := b.ob.factory.ConstructUDFCall(args, &memo.UDFCallPrivate{Def: con.def, TailCall: true})

returnColName := scopeColName("").WithMetadataName(con.def.Name)
returnScope := s.push()
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/recursive_cte.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ func (n *recursiveCTENode) Next(params runParams) (bool, error) {
opName := "recursive-cte-iteration-" + strconv.Itoa(n.iterationCount)
ctx, sp := tracing.ChildSpan(params.ctx, opName)
defer sp.Finish()
if err := runPlanInsidePlan(ctx, params, newPlan.(*planComponents), rowResultWriter(n)); err != nil {
if err := runPlanInsidePlan(
ctx, params, newPlan.(*planComponents), rowResultWriter(n), nil, /* deferredRoutineSender */
); err != nil {
return false, err
}

Expand Down
53 changes: 50 additions & 3 deletions pkg/sql/routine.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
)
Expand All @@ -44,6 +45,18 @@ func (p *planner) EvalRoutineExpr(
return expr.CachedResult, nil
}

if expr.TailCall && !expr.Generator && p.EvalContext().RoutineSender != nil {
// This is a nested routine in tail-call position.
if !p.curPlan.flags.IsDistributed() && tailCallOptimizationEnabled {
// Tail-call optimizations are enabled. Send the information needed to
// evaluate this routine to the parent routine, then return. It is safe to
// return NULL here because the parent is guaranteed not to perform any
// processing on the result of the child.
p.EvalContext().RoutineSender.SendDeferredRoutine(expr, args)
return tree.DNull, nil
}
}

var g routineGenerator
g.init(p, expr, args)
defer g.Close(ctx)
Expand Down Expand Up @@ -98,9 +111,16 @@ type routineGenerator struct {
rch rowContainerHelper
rci *rowContainerIterator
currVals tree.Datums
// deferredRoutine encapsulates the information needed to execute a nested
// routine that has deferred its execution.
deferredRoutine struct {
expr *tree.RoutineExpr
args tree.Datums
}
}

var _ eval.ValueGenerator = &routineGenerator{}
var _ eval.DeferredRoutineSender = &routineGenerator{}

// init initializes a routineGenerator.
func (g *routineGenerator) init(p *planner, expr *tree.RoutineExpr, args tree.Datums) {
Expand All @@ -117,11 +137,28 @@ func (g *routineGenerator) ResolvedType() *types.T {
}

// Start is part of the ValueGenerator interface.
func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) {
for {
err = g.startInternal(ctx, txn)
if err != nil || g.deferredRoutine.expr == nil {
// No tail-call optimization.
return err
}
// A nested routine in tail-call position deferred its execution until now.
// Since it's in tail-call position, evaluating it will give the result of
// this routine as well.
p, expr, args := g.p, g.deferredRoutine.expr, g.deferredRoutine.args
g.Close(ctx)
g.init(p, expr, args)
}
}

// startInternal implements logic for a single execution of a routine.
// TODO(mgartner): We can cache results for future invocations of the routine by
// creating a new iterator over an existing row container helper if the routine
// is cache-able (i.e., there are no arguments to the routine and stepping is
// disabled).
func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) {
func (g *routineGenerator) startInternal(ctx context.Context, txn *kv.Txn) (err error) {
rt := g.expr.ResolvedType()
var retTypes []*types.T
if g.expr.MultiColOutput {
Expand Down Expand Up @@ -179,7 +216,7 @@ func (g *routineGenerator) Start(ctx context.Context, txn *kv.Txn) (err error) {
}

// Run the plan.
err = runPlanInsidePlan(ctx, g.p.RunParams(ctx), plan.(*planComponents), w)
err = runPlanInsidePlan(ctx, g.p.RunParams(ctx), plan.(*planComponents), w, g)
if err != nil {
return err
}
Expand Down Expand Up @@ -213,9 +250,19 @@ func (g *routineGenerator) Values() (tree.Datums, error) {
func (g *routineGenerator) Close(ctx context.Context) {
if g.rci != nil {
g.rci.Close()
g.rci = nil
}
g.rch.Close(ctx)
*g = routineGenerator{}
}

var tailCallOptimizationEnabled = util.ConstantWithMetamorphicTestBool(
"tail-call-optimization-enabled",
true,
)

func (g *routineGenerator) SendDeferredRoutine(routine *tree.RoutineExpr, args tree.Datums) {
g.deferredRoutine.expr = routine
g.deferredRoutine.args = args
}

// droppingResultWriter drops all rows that are added to it. It only tracks
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sem/eval/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ type Context struct {
// JobsProfiler is the interface for builtins to extract job specific
// execution details that may have been aggregated during a job's lifetime.
JobsProfiler JobsProfiler

// RoutineSender allows nested routines in tail-call position to defer their
// execution until control returns to the parent routine. It is only valid
// during local execution. It may be unset.
RoutineSender DeferredRoutineSender
}

// JobsProfiler is the interface used to fetch job specific execution details
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/sem/eval/deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,15 @@ type ClientNoticeSender interface {
BufferClientNotice(ctx context.Context, notice pgnotice.Notice)
}

// DeferredRoutineSender allows a nested routine to send the information needed
// for its own evaluation to a parent routine. This is used to defer execution
// for tail-call optimization. It can only be used during local execution.
type DeferredRoutineSender interface {
// SendDeferredRoutine sends a local nested routine and its arguments to its
// parent routine.
SendDeferredRoutine(expr *tree.RoutineExpr, args tree.Datums)
}

// PrivilegedAccessor gives access to certain queries that would otherwise
// require someone with RootUser access to query a given data source.
// It is defined independently to prevent a circular dependency on sql, tree and sqlbase.
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/sem/tree/routine.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ type RoutineExpr struct {

// Generator is true if the function may output a set of rows.
Generator bool

// TailCall is true if the routine is in a tail-call position in a parent
// routine. This means that once execution reaches this routine, the parent
// routine will return the result of evaluating this routine with no further
// changes. For routines in a tail-call position we implement an optimization
// to avoid nesting execution. This is necessary for performant PLpgSQL loops.
TailCall bool
}

// NewTypedRoutineExpr returns a new RoutineExpr that is well-typed.
Expand All @@ -123,6 +130,7 @@ func NewTypedRoutineExpr(
calledOnNullInput bool,
multiColOutput bool,
generator bool,
tailCall bool,
) *RoutineExpr {
return &RoutineExpr{
Args: args,
Expand All @@ -133,6 +141,7 @@ func NewTypedRoutineExpr(
CalledOnNullInput: calledOnNullInput,
MultiColOutput: multiColOutput,
Generator: generator,
TailCall: tailCall,
}
}

Expand Down

0 comments on commit 86b514d

Please sign in to comment.