Skip to content

Commit

Permalink
[JIT] Optimize multiple loads to a COPY
Browse files Browse the repository at this point in the history
In the tiling process, it is possible that a single LOAD node that is
referenced by multiple nodes will be evaluated twice, because the
tiling process assigns a tile with an implicit load. The COPY node is
opaque to tiling and semantically equivalent, so it can be used to
'force' a value into a register if it has multiple referents.

We can probably improve on that if we can prove that we
will (probably) spill that value so that reifying it via COPY is a net
loss.

This uncovered an ordering bug in the sp_p6obind_o and sp_p6obind_s
templates, in that the ^write_barrier macro will evaluate its second
argument conditionally; if that second argument is then used in the
consequent block it might not be available. This is a bug in the
template and the only way we got away with it before now is that the
use in ^write_barrier folds the entire (load) subtree into a (nz)
node, thereby avoiding to evaluate the (load) node itself into a
separate value. Adding a (discard) which also orders the load resolves
the issue for now, but I'm thinking about more comprehensive solution.
  • Loading branch information
bdw committed Aug 5, 2018
1 parent 8ece93e commit e072183
Showing 1 changed file with 48 additions and 33 deletions.
81 changes: 48 additions & 33 deletions src/jit/optimize.c
@@ -1,7 +1,7 @@
#include "moar.h"

#if defined(MVM_JIT_DEBUG) && MVM_JIT_DEBUG == MVM_JIT_DEBUG_OPTIMIZE
#define _DEBUG(fmt, ...) do { fprintf(stderr, fmt "%s", __VA_ARGS__, "\n"); } while(0)
#if defined(MVM_JIT_DEBUG) && (MVM_JIT_DEBUG & MVM_JIT_DEBUG_OPTIMIZER) != 0
#define _DEBUG(fmt, ...) do { MVM_jit_log(tc, fmt "%s", __VA_ARGS__, "\n"); } while(0)
#else
#define _DEBUG(fmt, ...) do {} while(0)
#endif
Expand All @@ -21,6 +21,7 @@ struct Optimizer {
MVM_VECTOR_DECL(struct NodeRef, refs);
MVM_VECTOR_DECL(struct NodeInfo, info);
MVM_VECTOR_DECL(MVMint32, replacements);
MVMint32 replacement_cnt;
};

static void optimize_preorder(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
Expand All @@ -29,15 +30,54 @@ static void optimize_preorder(MVMThreadContext *tc, MVMJitTreeTraverser *travers

}

static void replace_node(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node, MVMint32 replacement) {
MVMint32 *c;
struct Optimizer *o = traverser->data;
/* double pointer iteration to update all children */
_DEBUG("Replaced node %d with %d", node, replacement);

MVM_VECTOR_ENSURE_SIZE(traverser->visits, replacement);
MVM_VECTOR_ENSURE_SIZE(o->info, replacement);
MVM_VECTOR_ENSURE_SIZE(o->replacements, replacement);

for (c = &o->info[node].refs; *c > 0; c = &o->refs[*c].next) {
tree->nodes[o->refs[*c].ptr] = replacement;
}

/* append existing to list */
if (o->info[replacement].refs > 0) {
*c = o->info[replacement].refs;
}
o->info[replacement].refs = o->info[node].refs;
o->info[replacement].ref_cnt += o->info[node].ref_cnt;

o->replacements[node] = replacement;
o->replacement_cnt++;
}

static void optimize_child(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node, MVMint32 child) {
/* add reference from parent to child, replace child with reference if possible */
MVMint32 first_child = MVM_JIT_EXPR_FIRST_CHILD(tree, node);
MVMint32 child_node = tree->nodes[first_child+child];
struct Optimizer *o = traverser->data;

/* double referenced LOAD replacement */
if (tree->nodes[child_node] == MVM_JIT_LOAD &&
o->info[child_node].ref_cnt > 1) {
MVMint32 replacement = MVM_jit_expr_apply_template_adhoc(tc, tree, "ns.", MVM_JIT_COPY, 1, child_node);
_DEBUG("optimizing multiple (ref_cnt=%d) LOAD (%d) to COPY", o->info[child_node].ref_cnt, child_node);
replace_node(tc, traverser, tree, child_node, replacement);
}


if (o->replacements[child_node] > 0) {
tree->nodes[first_child+child] = o->replacements[child_node];
_DEBUG("Parent node %d assigning replacement node (%d -> %d)",
node, child_node, o->replacements[child_node]);
child_node = o->replacements[child_node];
tree->nodes[first_child+child] = child_node;
o->replacement_cnt++;
}

/* add this parent node as a reference */
Expand All @@ -52,28 +92,19 @@ static void optimize_child(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
o->info[child_node].refs = r;
o->info[child_node].ref_cnt++;
}
}

/* TODO - figure out a more general way to do it (it is kind of template-like, I think */
MVMint32 wrap_copy(MVMThreadContext *tc, struct Optimizer *o, MVMJitExprTree *tree, MVMint32 node) {
MVMint32 root = MVM_jit_expr_apply_template_adhoc(tc, tree, "ns.", MVM_JIT_COPY, 1, node);
MVM_VECTOR_ENSURE_SIZE(o->info, root);
return root;
}




static void optimize_postorder(MVMThreadContext *tc, MVMJitTreeTraverser *traverser,
MVMJitExprTree *tree, MVMint32 node) {
/* after postorder, we will not revisit the node, so it's time to replace it */
struct Optimizer *o = traverser->data;
MVMint32 replacement = -1;

switch(tree->nodes[node]) {
case MVM_JIT_LOAD:
if (o->info[node].ref_cnt > 1) {
_DEBUG("Replacing a double-referenced load with a copy\n");
replacement = wrap_copy(tc, o, tree, node);
}
break;
case MVM_JIT_IDX:
{
MVMint32 *links = MVM_JIT_EXPR_LINKS(tree, node);
Expand All @@ -91,24 +122,7 @@ static void optimize_postorder(MVMThreadContext *tc, MVMJitTreeTraverser *traver
}

if (replacement > 0) {
/* double pointer iteration to update all children */
MVMint32 *c;
_DEBUG("Replaced node %d with %d\n", node, replacement);

MVM_VECTOR_ENSURE_SIZE(traverser->visits, replacement);
MVM_VECTOR_ENSURE_SIZE(o->info, replacement);
MVM_VECTOR_ENSURE_SIZE(o->replacements, replacement);

for (c = &o->info[node].refs; *c > 0; c = &o->refs[*c].next) {
tree->nodes[o->refs[*c].ptr] = replacement;
}

/* append existing to list */
if (o->info[replacement].refs > 0) {
*c = o->info[replacement].refs;
}
o->info[replacement].refs = o->info[node].refs;
o->info[replacement].ref_cnt += o->info[node].ref_cnt;
replace_node(tc, traverser, tree, node, replacement);
}
}

Expand All @@ -122,6 +136,7 @@ void MVM_jit_expr_tree_optimize(MVMThreadContext *tc, MVMJitGraph *jg, MVMJitExp
MVM_VECTOR_INIT(o.refs, tree->nodes_num);
MVM_VECTOR_INIT(o.info, tree->nodes_num * 2);
MVM_VECTOR_INIT(o.replacements, tree->nodes_num * 2);
o.replacement_cnt = 0;

/* Weasly trick: we increment the o->refs_num by one, so that we never
* allocate zero, so that a zero *refs is the same as 'nothing', so that we
Expand Down

0 comments on commit e072183

Please sign in to comment.