Skip to content
This repository has been archived by the owner on Jun 20, 2019. It is now read-only.

Bug 228: First pass at reducing the number of temporaries created. #219

Merged
merged 1 commit into from
Jun 10, 2016

Conversation

ibuclaw
Copy link
Member

@ibuclaw ibuclaw commented Jun 9, 2016

Improves upon #217 by removing the get_compound_value pass and moving the generation directly into the expression builders.

The compiler now only wraps cleanups in a try/finally if the result has side effects.

Also omproves d_build_call by only creating temporaries if more than argument needs saving.

@jpf91 - This should be enough to fix the regression in 4.8 branch. But there may be new regressions created as a result, let's find out... 😄

Improves upon D-Programming-GDC#217 by removing the get_compound_value pass and
moving the generation directly into the expression builders.

The compiler now only wraps cleanups in a try/finally if the result
has side effects.

Also omproves d_build_call by only creating temporaries if more than
argument needs saving.
@ibuclaw
Copy link
Member Author

ibuclaw commented Jun 10, 2016

Passes.

@ibuclaw ibuclaw merged commit d8538bc into D-Programming-GDC:master Jun 10, 2016
@ibuclaw ibuclaw deleted the bug228a branch June 10, 2016 17:39
@jpf91
Copy link
Contributor

jpf91 commented Jun 10, 2016

This fixed the GCC-4.8 regression, great 👍

The DMD frontend merge introduced a new dtor related test case failure in GCC-4.9 / sdtor.d:4005
Interestingly GCC-4.8 works fine. I'll have a look at this tomorrow :-)

@jpf91
Copy link
Contributor

jpf91 commented Jun 11, 2016

void testFunc()
{
    foo(len >= 2 ? null : (len == 1 ?        makeS(1).get() : null)       ); 
}

Full example: https://paste.gnome.org/phxxr3cot with -O
with https://github.com/jpf91/GDC/tree/gdc-4.9

generates:

  struct S __tmpfordtor58;
  bool __cond59;
  bool __cond60;

  try
    {
      SAVE_EXPR <foo (this, (__cond60 = this->len > 1;, (signed int) __cond60;) == 0 ? (__cond59 = this->len == 1;, (signed int) __cond59;) != 0 ? __tmpfordtor58 = makeS (this, 1) [return slot optimization];, get (&__tmpfordtor58, 0B); : 0B : 0B, 0B)>;
    }
  finally
    {
      if (!((signed int) __cond60 != 0))
        {
          if ((signed int) __cond59 != 0)
            {
              __dtor (&__tmpfordtor58);
            }
        }
    }, SAVE_EXPR <foo (this, (__cond60 = this->len > 1;, (signed int) __cond60;) == 0 ? (__cond59 = this->len == 1;, (signed int) __cond59;) != 0 ? __tmpfordtor58 = makeS (this, 1) [return slot optimization];, get (&__tmpfordtor58, 0B); : 0B : 0B, 0B)>;;

which looks fine. Unfortunately gcc-4.9 is 'clever' and optimizes the two ifs into if (__cond59 > __cond60):

  if (__cond59_1 > __cond60_13)
    goto <bb 12>;
  else
    goto <bb 14>;

Unfortunately, if len >= 2 then __cond59_1 is never initialized.... (jg .L27)

    .type   _D5sdtor4mainFZ8testFuncMFZv, @function
_D5sdtor4mainFZ8testFuncMFZv:
.LFB9:
    .loc 1 49 0
    .cfi_startproc
    .cfi_personality 0x3,__gdc_personality_v0
    .cfi_lsda 0x3,.LLSDA9
.LVL52:
    pushq   %r12
    .cfi_def_cfa_offset 16
    .cfi_offset 12, -16
    pushq   %rbp
    .cfi_def_cfa_offset 24
    .cfi_offset 6, -24
    pushq   %rbx
    .cfi_def_cfa_offset 32
    .cfi_offset 3, -32
    subq    $80, %rsp
    .cfi_def_cfa_offset 112
    movq    %rdi, %rbx
    .loc 1 51 0
    movl    24(%rdi), %eax
    cmpl    $1, %eax
.LVL53:
    setg    %bpl
.LVL54:
    jg  .L27
.LVL55:
    movl    $0, %r12d
    cmpl    $1, %eax
    jne .L27
    .loc 1 51 0 is_stmt 0 discriminator 1
    movl    $1, %edx
    movq    %rdi, %rsi
    leaq    48(%rsp), %rdi
.LVL56:
.LEHB2:
    call    _D5sdtor4mainFZ5makeSMFiZS5sdtor4mainFZ1S
.LVL57:
.LBB11:
.LBB12:
    .loc 1 12 0 is_stmt 1
    movzbl  48(%rsp), %eax
    addl    $48, %eax
    movb    %al, 78(%rsp)
    movq    $2, 32(%rsp)
    movq    $.LC1, 40(%rsp)
    movq    $1, 16(%rsp)
    leaq    78(%rsp), %rax
    movq    %rax, 24(%rsp)
    movq    $4, (%rsp)
    movq    $.LC5, 8(%rsp)
    movl    $3, %esi
    movq    %rsp, %rdx
    movl    $_D12TypeInfo_Aya6__initZ, %edi
    call    _d_arraycatnTX
.LVL58:
    movq    %rdx, %rcx
    movq    56(%rsp), %rdx
    leaq    8(%rdx), %rsi
    movq    %rax, %rdx
    movl    $_D12TypeInfo_Aya6__initZ, %edi
    call    _d_arrayappendT
.LEHE2:
.LVL59:
.LBE12:
.LBE11:
    .loc 1 51 0
    movl    $1, %r12d
.LVL60:
.L27:
    movl    $0, %edx
    movl    $0, %esi
    movq    %rbx, %rdi
.LEHB3:
    call    _D5sdtor4mainFZ3fooMFPvPvZv
.LEHE3:
.LVL61:
    jmp .L37
.L35:
    movq    %rax, %rbx
.LVL62:
    cmpb    %bpl, %r12b
    jbe .L31
    jmp .L33
.LVL63:
.L37:
    .loc 1 51 0 is_stmt 0 discriminator 7
    cmpb    %bpl, %r12b
    jbe .L26
.LVL64:
.LBB13:
.LBB14:
    .loc 1 18 0 is_stmt 1
    movzbl  48(%rsp), %eax
    addl    $48, %eax
    movb    %al, 79(%rsp)
    movq    $2, 32(%rsp)
    movq    $.LC1, 40(%rsp)
    movq    $1, 16(%rsp)
    leaq    79(%rsp), %rax
    movq    %rax, 24(%rsp)
    movq    $5, (%rsp)
    movq    $.LC2, 8(%rsp)
    movl    $3, %esi
    movq    %rsp, %rdx
    movl    $_D12TypeInfo_Aya6__initZ, %edi
.LEHB4:
    call    _d_arraycatnTX
.LVL65:
    movq    %rdx, %rcx
    movq    56(%rsp), %rsi
    addq    $8, %rsi
    movq    %rax, %rdx
    movl    $_D12TypeInfo_Aya6__initZ, %edi
    call    _d_arrayappendT
.LVL66:
    jmp .L26
.LVL67:
.L36:
    movq    %rax, %rbx
.LVL68:
.L33:
.LBE14:
.LBE13:
    .loc 1 51 0 discriminator 13
    leaq    48(%rsp), %rdi
    call    _D5sdtor4mainFZ1S6__dtorMFZv
.LVL69:
.L31:
    movq    %rbx, %rdi
    call    _Unwind_Resume
.LEHE4:
.LVL70:
.L26:
    .loc 1 52 0
    addq    $80, %rsp
    .cfi_def_cfa_offset 32
    popq    %rbx
    .cfi_def_cfa_offset 24
.LVL71:
    popq    %rbp
    .cfi_def_cfa_offset 16
.LVL72:
    popq    %r12
    .cfi_def_cfa_offset 8
.LVL73:
    ret
    .cfi_endproc
.LFE9:
    .section    .gcc_except_table
.LLSDA9:
    .byte   0xff
    .byte   0xff
    .byte   0x1
    .uleb128 .LLSDACSE9-.LLSDACSB9
.LLSDACSB9:
    .uleb128 .LEHB2-.LFB9
    .uleb128 .LEHE2-.LEHB2
    .uleb128 .L36-.LFB9
    .uleb128 0
    .uleb128 .LEHB3-.LFB9
    .uleb128 .LEHE3-.LEHB3
    .uleb128 .L35-.LFB9
    .uleb128 0
    .uleb128 .LEHB4-.LFB9
    .uleb128 .LEHE4-.LEHB4
    .uleb128 0
    .uleb128 0

Any idea what could be the problem in this case? Or is this even a GCC bug?

Something that's also suspicious: the movl $1, %r12d (setting __cond59) is done after the call _D5sdtor4mainFZ5makeSMFiZS5sdtor4mainFZ1S (calling makeS). Is that correct?

@ibuclaw
Copy link
Member Author

ibuclaw commented Jun 12, 2016

@jpf91 - Does it help if we didn't convert boolean to signed integers?

diff --git a/gcc/d/d-convert.cc b/gcc/d/d-convert.cc
index 938f8e7..34cbfc1 100644
--- a/gcc/d/d-convert.cc
+++ b/gcc/d/d-convert.cc
@@ -78,7 +78,7 @@ d_build_truthvalue_op (tree_code code, tree op0, tree op1)
        op1 = convert (result_type, op1);
     }

-  return build2 (code, bool_type_node, op0, op1);
+  return fold_build2 (code, bool_type_node, op0, op1);
 }


@@ -249,13 +249,8 @@ d_truthvalue_conversion (tree expr)
       if (promoted_type)
        expr = convert (promoted_type, expr);

-      // Without this, the backend tries to load a float reg with and integer
-      // value with fails (on i386 and rs6000, at least).
-      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (expr)))
-       return d_build_truthvalue_op (NE_EXPR, expr,
-                                     convert (TREE_TYPE (expr), integer_zero_node));
-
-      return d_build_truthvalue_op (NE_EXPR, expr, integer_zero_node);
+      return d_build_truthvalue_op (NE_EXPR, expr,
+                                   build_zero_cst (TREE_TYPE (expr)));
     }
 }

Generates:

  struct S __tmpfordtor58;
  bool __cond59;
  bool __cond60;

  try
    {
      SAVE_EXPR <foo (this, NON_LVALUE_EXPR <__cond60 = this->len > 1;, __cond60> ? 0B : NON_LVALUE_EXPR <__cond59 = this->len == 1;, __cond59> ? __tmpfordtor58 = makeS (this, 1) [return slot optimization];, get (&__tmpfordtor58, 0B); : 0B, 0B)>;
    }
  finally
    {
      if (!NON_LVALUE_EXPR <__cond60>)
        {
          if (NON_LVALUE_EXPR <__cond59>)
            {
              __dtor (&__tmpfordtor58);
            }
        }
    }, SAVE_EXPR <foo (this, NON_LVALUE_EXPR <__cond60 = this->len > 1;, __cond60> ? 0B : NON_LVALUE_EXPR <__cond59 = this->len == 1;, __cond59> ? __tmpfordtor58 = makeS (this, 1) [return slot optimization];, get (&__tmpfordtor58, 0B); : 0B, 0B)>;;

@jpf91
Copy link
Contributor

jpf91 commented Jun 12, 2016

Does it help if we didn't convert boolean to signed integers?

Unfortunately no. But I now managed to reproduce the problem in C++, so it's definitely a GCC (4.9.3 only) bug:
https://paste.gnome.org/pqwxq3fek

/opt/gdc/bin/g++ sdtor.cpp && ./a.out
foo

/opt/gdc/bin/g++ sdtor.cpp -O && ./a.out
foo
fail

Any idea what to search for in the GCC bugzilla? And what shall we do for GCC-4.9 now? Ignore the error / disable the test case with a warning?

@ibuclaw
Copy link
Member Author

ibuclaw commented Jun 12, 2016

And this is as a result of the 2.068 update, and not cleanups I've done since?

@jpf91
Copy link
Contributor

jpf91 commented Jun 12, 2016

The test was introduced in the 2.068 update and doesn't really compile without the new frontend. The reduced test case works fine before the 2.068 merge and fails after the merge. But the frontend produced completely different code:

before:

{
  struct S __slS42;

  SAVE_EXPR <len <= 1 ? len == 1 ? try
    {
      SAVE_EXPR <&(__slS42 = {};, __slS42)>;, SAVE_EXPR <get (SAVE_EXPR <&(__slS42 = {};, __slS42)>)>;;
    }
  finally
    {
      __dtor (&__slS42);
    }, SAVE_EXPR <get (SAVE_EXPR <&(__slS42 = {};, __slS42)>)>; : 0B : 0B>;, foo (SAVE_EXPR <len <= 1 ? len == 1 ? try
    {
      SAVE_EXPR <&(__slS42 = {};, __slS42)>;, SAVE_EXPR <get (SAVE_EXPR <&(__slS42 = {};, __slS42)>)>;;
    }
  finally
    {
      __dtor (&__slS42);
    }, SAVE_EXPR <get (SAVE_EXPR <&(__slS42 = {};, __slS42)>)>; : 0B : 0B>);;
}

after:

{
  struct S __slS50;
  bool __cond51;
  bool __cond52;

  try
    {
      SAVE_EXPR <(__cond52 = len > 1;, (signed int) __cond52;) == 0 ? (__cond51 = len == 1;, (signed int) __cond51;) != 0 ? SAVE_EXPR <&(__slS50 = {};, __slS50)>;, get (SAVE_EXPR <&(__slS50 = {};, __slS50)>); : 0B : 0B>;, SAVE_EXPR <foo (SAVE_EXPR <(__cond52 = len > 1;, (signed int) __cond52;) == 0 ? (__cond51 = len == 1;, (signed int) __cond51;) != 0 ? SAVE_EXPR <&(__slS50 = {};, __slS50)>;, get (SAVE_EXPR <&(__slS50 = {};, __slS50)>); : 0B : 0B>)>;;
    }
  finally
    {
      if (!((signed int) __cond52 != 0))
        {
          if ((signed int) __cond51 != 0)
            {
              __dtor (&__slS50);
            }
        }
    }
}

ibuclaw added a commit to ibuclaw/GDC that referenced this pull request Jun 15, 2016
Improves upon D-Programming-GDC#219 by refactoring the checks for COMPOUND_EXPR into
a routine that recursively iterates all expressions, splitting the
result from the rest of the expression.

There are a few more cases that are caught too, including component
references, and when generating constructors for array and struct
literals, both when using new() and without.

The compiler also now extracts the temporary object from constructor
expressions in build_expr_dtor(), which elides a temporary of the
call result being made.

Finally, add_stmt() both now recursively adds COMPOUND_EXPR
expressions as separate statements, and checks for side effects in
all statements to be written.  This removes a lot of noise from
common rewrites where ((e1, e2), e3) is generated, but only the
first and last expression actually do anything.
ibuclaw added a commit to ibuclaw/GDC that referenced this pull request Jun 15, 2016
Improves upon D-Programming-GDC#219 by refactoring the checks for COMPOUND_EXPR into
a routine that recursively iterates all expressions, splitting the
result from the rest of the expression.

There are a few more cases that are caught too, including component
references, and when generating constructors for array and struct
literals, both when using new() and without.

The compiler also now extracts the temporary object from constructor
expressions in build_expr_dtor(), which elides a temporary of the
call result being made.

Finally, add_stmt() both now recursively adds COMPOUND_EXPR
expressions as separate statements, and checks for side effects in
all statements to be written.  This removes a lot of noise from
common rewrites where ((e1, e2), e3) is generated, but only the
first and last expression actually do anything.
ibuclaw added a commit to ibuclaw/GDC that referenced this pull request Jun 15, 2016
Improves upon D-Programming-GDC#219 by refactoring the checks for COMPOUND_EXPR into
a routine that recursively iterates all expressions, splitting the
result from the rest of the expression.

There are a few more cases that are caught too, including component
references, and when generating constructors for array and struct
literals, both when using new() and without.

The compiler also now extracts the temporary object from constructor
expressions in build_expr_dtor(), which elides a temporary of the
call result being made.

Finally, add_stmt() both now recursively adds COMPOUND_EXPR
expressions as separate statements, and checks for side effects in
all statements to be written.  This removes a lot of noise from
common rewrites where ((e1, e2), e3) is generated, but only the
first and last expression actually do anything.
@ibuclaw ibuclaw mentioned this pull request Jul 3, 2016
5 tasks
@ibuclaw
Copy link
Member Author

ibuclaw commented Jul 4, 2016

@ibuclaw
Copy link
Member Author

ibuclaw commented Jul 4, 2016

Also go the first good commit, but I don't think it's of any use. Raised a bug against gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71762

@ibuclaw
Copy link
Member Author

ibuclaw commented Jul 5, 2016

@jpf91 - found an upstream patch that fixes the bug in gcc-4.9 - see bug report.

@jpf91
Copy link
Contributor

jpf91 commented Jul 5, 2016

found an upstream patch that fixes the bug in gcc-4.9 - see bug report.

Great! What does that mean for the GDC port? We could

  • disable the test case
  • add the backport patch to setup-gcc.sh & add patching to the CI script
  • ask users to apply the patch manually & add patching to the CI script

BTW: It sounds as if the fix only works for boolean values. This is fine for GDC as we now always use booleans in that compiler-generated code. But IIRC the GCC bug is also reproducible using integer types.

BTW2: As we now have 0 unresolved tests, shall we extend the semaphore CI script to check unresolved cases as well?

if grep -q FAIL gcc/testsuite/gdc*/gdc.sum; then false; fi

@ibuclaw
Copy link
Member Author

ibuclaw commented Jul 5, 2016

Great! What does that mean for the GDC port? We could
disable the test case
add the backport patch to setup-gcc.sh & add patching to the CI script
ask users to apply the patch manually & add patching to the CI script

Well, it seems that I have the GCC maintainers attention for the moment. If they fix it, then there's nothing for us to do.

BTW: It sounds as if the fix only works for boolean values. This is fine for GDC as we now always use booleans in that compiler-generated code. But IIRC the GCC bug is also reproducible using integer types.

I can't reproduce using integers.

BTW2: As we now have 0 unresolved tests, shall we extend the semaphore CI script to check unresolved cases as well?

Yes.

@jpf91
Copy link
Contributor

jpf91 commented Jul 6, 2016

I can't reproduce using integers.

You're right. The bug was also present when we still converted these boolean values to integers in the comparison, but the variables were always typed as bools. So that was a mix-up on my part ;-)

BTW2: As we now have 0 unresolved tests, shall we extend the semaphore CI script to check >>unresolved cases as well?

Yes.

done.

@ibuclaw
Copy link
Member Author

ibuclaw commented Jul 6, 2016

They've actually made a test case reproducible in later gcc versions too.

@ibuclaw
Copy link
Member Author

ibuclaw commented Sep 16, 2016

FYI https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71762#c5

If they're closing the 4.9 branch, that's probably a sign that we should too.

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants