Skip to content

Commit

Permalink
[JIT] Optimize multiple loads to a COPY
Browse files Browse the repository at this point in the history
In the tiling process, it is possible that a single LOAD node that is
referenced by multiple nodes will be evaluated twice, because the
tiling process assigns a tile with an implicit load. The COPY node is
opaque to tiling and semantically equivalent, so it can be used to
'force' a value into a register if it has multiple referents.

We can probably improve on that if we can prove that we
will (probably) spill that value so that reifying it via COPY is a net
loss.

This uncovered an ordering bug in the sp_p6obind_o and sp_p6obind_s
templates, in that the ^write_barrier macro will evaluate its second
argument conditionally; if that second argument is then used in the
consequent block it might not be available. This is a bug in the
template and the only way we got away with it before now is that the
use in ^write_barrier folds the entire (load) subtree into a (nz)
node, thereby avoiding to evaluate the (load) node itself into a
separate value. Adding a (discard) which also orders the load resolves
the issue for now, but I'm thinking about more comprehensive solution.
  • Loading branch information
bdw committed Nov 7, 2017
1 parent ebf87d5 commit 373ab72
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 41 deletions.
6 changes: 4 additions & 2 deletions src/jit/core_templates.expr
Expand Up @@ -104,11 +104,13 @@
(template: sp_p6obind_n (store (add (^p6obody $0) $1) $2 int_sz))

(template: sp_p6obind_o
(dov (^write_barrier $0 $2)
(dov (discard $2)
(^write_barrier $0 $2)
(store (add (^p6obody $0) $1) $2 ptr_sz)))

(template: sp_p6obind_s
(dov (^write_barrier $0 $2)
(dov (discard $2)
(^write_barrier $0 $2)
(store (add (^p6obody $0) $1) $2 ptr_sz)))

(template: sp_p6ogetvt_o
Expand Down
96 changes: 57 additions & 39 deletions src/jit/optimize.c
@@ -1,7 +1,7 @@
#include "moar.h"

#if defined(MVM_JIT_DEBUG) && MVM_JIT_DEBUG == MVM_JIT_DEBUG_OPTIMIZE
#define _DEBUG(fmt, ...) do { fprintf(stderr, fmt "%s", __VA_ARGS__, "\n"); } while(0)
#if defined(MVM_JIT_DEBUG) && (MVM_JIT_DEBUG & MVM_JIT_DEBUG_OPTIMIZER) != 0
#define _DEBUG(fmt, ...) do { MVM_jit_log(tc, fmt "%s", __VA_ARGS__, "\n"); } while(0)
#else
#define _DEBUG(fmt, ...) do {} while(0)
#endif
Expand All @@ -21,6 +21,7 @@ struct Optimizer {
MVM_VECTOR_DECL(struct NodeRef, refs);
MVM_VECTOR_DECL(struct NodeInfo, info);
MVM_VECTOR_DECL(MVMint32, replacements);
MVMint32 replacement_cnt;
};

static void optimize_preorder(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
Expand All @@ -29,15 +30,61 @@ static void optimize_preorder(MVMThreadContext *tc, MVMJitTreeTraverser *travers

}

static void replace_node(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node, MVMint32 replacement) {
MVMint32 *c;
struct Optimizer *o = traverser->data;
/* double pointer iteration to update all children */
_DEBUG("Replaced node %d with %d", node, replacement);

MVM_VECTOR_ENSURE_SIZE(traverser->visits, replacement);
MVM_VECTOR_ENSURE_SIZE(o->info, replacement);
MVM_VECTOR_ENSURE_SIZE(o->replacements, replacement);

for (c = &o->info[node].refs; *c > 0; c = &o->refs[*c].next) {
tree->nodes[o->refs[*c].ptr] = replacement;
}

/* append existing to list */
if (o->info[replacement].refs > 0) {
*c = o->info[replacement].refs;
}
o->info[replacement].refs = o->info[node].refs;
o->info[replacement].ref_cnt += o->info[node].ref_cnt;

o->replacements[node] = replacement;
o->replacement_cnt++;

/* NB - we only need this for the op_info which is looked up everywhere, and
* we only need that for the nargs < 0 check, which I want to get rid of and
* replace with a bitmap - and that implies that I might be able to make the
* info structure transient. */
MVM_VECTOR_ENSURE_SIZE(tree->info, replacement);
tree->info[replacement].op_info = MVM_jit_expr_op_info(tc, tree->nodes[replacement]);
}

static void optimize_child(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node, MVMint32 child) {
/* add reference from parent to child, replace child with reference if possible */
MVMint32 first_child = tree->info[node].op_info->nchild < 0 ? node + 2 : node + 1;
MVMint32 child_node = tree->nodes[first_child+child];
struct Optimizer *o = traverser->data;

/* double referenced LOAD replacement */
if (tree->nodes[child_node] == MVM_JIT_LOAD &&
o->info[child_node].ref_cnt > 1) {
_DEBUG("optimizing multiple (ref_cnt=%d) LOAD (%d) to COPY", o->info[child_node].ref_cnt, child_node);
MVMint32 replacement = MVM_jit_expr_apply_template_adhoc(tc, tree, "..", MVM_JIT_COPY, child_node);
replace_node(tc, traverser, tree, child_node, replacement);
}


if (o->replacements[child_node] > 0) {
tree->nodes[first_child+child] = o->replacements[child_node];
_DEBUG("Parent node %d assigning replacement node (%d -> %d)",
node, child_node, o->replacements[child_node]);
child_node = o->replacements[child_node];
tree->nodes[first_child+child] = child_node;
o->replacement_cnt++;
}

/* add this parent node as a reference */
Expand All @@ -52,66 +99,36 @@ static void optimize_child(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
o->info[child_node].refs = r;
o->info[child_node].ref_cnt++;
}
}

/* TODO - figure out a more general way to do it (it is kind of template-like, I think */
MVMint32 wrap_copy(MVMThreadContext *tc, struct Optimizer *o, MVMJitExprTree *tree, MVMint32 node) {
MVMint32 top = tree->nodes_num;
MVMJitExprNode nodes[] = { MVM_JIT_COPY, node };
MVM_VECTOR_APPEND(tree->nodes, nodes, 2);
MVM_VECTOR_ENSURE_SIZE(o->info, top);
return top;
}




static void optimize_postorder(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node) {
/* after postorder, we will not revisit the node, so it's time to replace it */
struct Optimizer *o = traverser->data;
MVMint32 replacement = -1;

switch(tree->nodes[node]) {
case MVM_JIT_LOAD:
if (o->info[node].ref_cnt > 1) {
_DEBUG("Replacing a double-referenced load with a copy\n");
replacement = wrap_copy(tc, o, tree, node);
}
break;
case MVM_JIT_IDX:
{
MVMint32 base = tree->nodes[node+1];
MVMint32 idx = tree->nodes[node+2];
MVMint32 scale = tree->nodes[node+3];
if (tree->nodes[idx] == MVM_JIT_CONST) {
MVMint32 ofs = tree->nodes[idx+1] * scale;
MVMJitExprNode addr[] = { MVM_JIT_ADDR, base, ofs };
_DEBUG("Const idx (node=%d, base=%d, idx=%d, scale=%d, ofs=%d)\n", node, base, idx, scale, ofs);
_DEBUG("Const idx (node=%d, base=%d, idx=%d, scale=%d, ofs=%d)", node, base, idx, scale, ofs);
/* insert addr (base, $ofs) */
replacement = tree->nodes_num;
MVM_VECTOR_APPEND(tree->nodes, addr, 3);
replacement = MVM_jit_expr_apply_template_adhoc(tc, tree, "...", MVM_JIT_ADDR, base, ofs);
}
break;
}
}

if (replacement > 0) {
/* double pointer iteration to update all children */
MVMint32 *c;
_DEBUG("Replaced node %d with %d\n", node, replacement);

MVM_VECTOR_ENSURE_SIZE(traverser->visits, replacement);
MVM_VECTOR_ENSURE_SIZE(o->info, replacement);
MVM_VECTOR_ENSURE_SIZE(o->replacements, replacement);

for (c = &o->info[node].refs; *c > 0; c = &o->refs[*c].next) {
tree->nodes[o->refs[*c].ptr] = replacement;
}

/* append existing to list */
if (o->info[replacement].refs > 0) {
*c = o->info[replacement].refs;
}
o->info[replacement].refs = o->info[node].refs;
o->info[replacement].ref_cnt += o->info[node].ref_cnt;
replace_node(tc, traverser, tree, node, replacement);
}
}

Expand All @@ -125,6 +142,7 @@ void MVM_jit_expr_tree_optimize(MVMThreadContext *tc, MVMJitGraph *jg, MVMJitExp
MVM_VECTOR_INIT(o.refs, tree->nodes_num);
MVM_VECTOR_INIT(o.info, tree->nodes_num * 2);
MVM_VECTOR_INIT(o.replacements, tree->nodes_num * 2);
o.replacement_cnt = 0;

/* Weasly trick: we increment the o->refs_num by one, so that we never
* allocate zero, so that a zero *refs is the same as 'nothing', so that we
Expand Down

0 comments on commit 373ab72

Please sign in to comment.