Skip to content

Commit 754d258

Browse files
committed
[CGP] Update MemIntrinsic alignment if possible
Previously it was only being done if shouldAlignPointerArgs() returned true, which right now is only true for ARM targets. Updating the argument alignment attributes of memcpy/memset intrinsics if the underlying object has larger alignment can be beneficial even when CGP didn't increase alignment (as can be seen from the test changes), so invert the loop and if condition. Differential Revision: https://reviews.llvm.org/D134281
1 parent 89a86ed commit 754d258

File tree

5 files changed

+74
-69
lines changed

5 files changed

+74
-69
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,19 +2252,19 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
22522252
DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
22532253
GV->setAlignment(PrefAlign);
22542254
}
2255-
// If this is a memcpy (or similar) then we may be able to improve the
2256-
// alignment
2257-
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2258-
Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2259-
MaybeAlign MIDestAlign = MI->getDestAlign();
2260-
if (!MIDestAlign || DestAlign > *MIDestAlign)
2261-
MI->setDestAlignment(DestAlign);
2262-
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2263-
MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2264-
Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2265-
if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2266-
MTI->setSourceAlignment(SrcAlign);
2267-
}
2255+
}
2256+
// If this is a memcpy (or similar) then we may be able to improve the
2257+
// alignment.
2258+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2259+
Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2260+
MaybeAlign MIDestAlign = MI->getDestAlign();
2261+
if (!MIDestAlign || DestAlign > *MIDestAlign)
2262+
MI->setDestAlignment(DestAlign);
2263+
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2264+
MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2265+
Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2266+
if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2267+
MTI->setSourceAlignment(SrcAlign);
22682268
}
22692269
}
22702270

llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@ define protected amdgpu_kernel void @test(i8 addrspace(1)* nocapture %ptr.coerce
1919
; GCN-LABEL: test:
2020
; GCN: ; %bb.0: ; %entry
2121
; GCN-NEXT: v_mov_b32_e32 v0, 0
22-
; GCN-NEXT: ds_read_u8 v1, v0 offset:1
22+
; GCN-NEXT: v_mov_b32_e32 v1, 2
23+
; GCN-NEXT: ds_write_b8 v0, v1
2324
; GCN-NEXT: ds_read_u8 v2, v0 offset:2
25+
; GCN-NEXT: ds_read_u16 v3, v0
2426
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
25-
; GCN-NEXT: v_mov_b32_e32 v3, 2
26-
; GCN-NEXT: ds_write_b8 v0, v3
27-
; GCN-NEXT: ds_write_b8 v0, v3 offset:4
2827
; GCN-NEXT: s_waitcnt lgkmcnt(0)
29-
; GCN-NEXT: ds_write_b8 v0, v1 offset:5
3028
; GCN-NEXT: ds_write_b8 v0, v2 offset:6
31-
; GCN-NEXT: v_mov_b32_e32 v1, 1
29+
; GCN-NEXT: ds_write_b16 v0, v3 offset:4
30+
; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD
31+
; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
3232
; GCN-NEXT: global_store_byte v0, v1, s[0:1]
3333
; GCN-NEXT: s_endpgm
3434
; CHECK-LABEL: @test(

llvm/test/CodeGen/X86/mcu-abi.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,14 @@ entry:
6464
define void @ret_large_struct(ptr noalias nocapture sret(%struct.st12_t) %agg.result, ptr byval(%struct.st12_t) nocapture readonly align 4 %r) #0 {
6565
; CHECK-LABEL: ret_large_struct:
6666
; CHECK: # %bb.0: # %entry
67+
; CHECK-NEXT: pushl %edi
6768
; CHECK-NEXT: pushl %esi
68-
; CHECK-NEXT: movl %eax, %esi
69-
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx
70-
; CHECK-NEXT: movl $48, %ecx
71-
; CHECK-NEXT: calll memcpy
72-
; CHECK-NEXT: movl %esi, %eax
69+
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi
70+
; CHECK-NEXT: movl $12, %ecx
71+
; CHECK-NEXT: movl %eax, %edi
72+
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
7373
; CHECK-NEXT: popl %esi
74+
; CHECK-NEXT: popl %edi
7475
; CHECK-NEXT: retl
7576
entry:
7677
call void @llvm.memcpy.p0.p0.i32(ptr %agg.result, ptr %r, i32 48, i1 false)

llvm/test/CodeGen/X86/memset-2.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,33 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=yonah < %s | FileCheck %s
33

4-
define fastcc void @t1() nounwind {
4+
define fastcc void @t1(ptr nocapture %s) nounwind {
55
; CHECK-LABEL: t1:
66
; CHECK: ## %bb.0: ## %entry
77
; CHECK-NEXT: subl $16, %esp
88
; CHECK-NEXT: pushl $188
99
; CHECK-NEXT: pushl $0
10-
; CHECK-NEXT: pushl $0
10+
; CHECK-NEXT: pushl %ecx
1111
; CHECK-NEXT: calll _memset
1212
; CHECK-NEXT: addl $16, %esp
1313
; CHECK-NEXT: ud2
1414
entry:
15-
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 188, i1 false)
15+
call void @llvm.memset.p0.i32(ptr %s, i8 0, i32 188, i1 false)
1616
unreachable
1717
}
1818

19-
define fastcc void @t2(i8 signext %c) nounwind {
19+
define fastcc void @t2(ptr nocapture %s, i8 signext %c) nounwind {
2020
; CHECK-LABEL: t2:
2121
; CHECK: ## %bb.0: ## %entry
22-
; CHECK-NEXT: subl $12, %esp
23-
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24-
; CHECK-NEXT: movl $76, {{[0-9]+}}(%esp)
22+
; CHECK-NEXT: subl $16, %esp
23+
; CHECK-NEXT: pushl $76
24+
; CHECK-NEXT: pushl %edx
25+
; CHECK-NEXT: pushl %ecx
2526
; CHECK-NEXT: calll _memset
27+
; CHECK-NEXT: addl $16, %esp
2628
; CHECK-NEXT: ud2
2729
entry:
28-
call void @llvm.memset.p0.i32(ptr undef, i8 %c, i32 76, i1 false)
30+
call void @llvm.memset.p0.i32(ptr %s, i8 %c, i32 76, i1 false)
2931
unreachable
3032
}
3133

llvm/test/CodeGen/X86/memset64-on-x86-32.ll

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,55 +3,57 @@
33
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=SLOW_32
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=SLOW_64
55

6-
define void @bork() nounwind {
6+
define void @bork(ptr nocapture align 4 %dst) nounwind {
77
; FAST-LABEL: bork:
88
; FAST: # %bb.0:
9+
; FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
910
; FAST-NEXT: xorps %xmm0, %xmm0
10-
; FAST-NEXT: movups %xmm0, 64
11-
; FAST-NEXT: movups %xmm0, 48
12-
; FAST-NEXT: movups %xmm0, 32
13-
; FAST-NEXT: movups %xmm0, 16
14-
; FAST-NEXT: movups %xmm0, 0
11+
; FAST-NEXT: movups %xmm0, 64(%eax)
12+
; FAST-NEXT: movups %xmm0, 48(%eax)
13+
; FAST-NEXT: movups %xmm0, 32(%eax)
14+
; FAST-NEXT: movups %xmm0, 16(%eax)
15+
; FAST-NEXT: movups %xmm0, (%eax)
1516
; FAST-NEXT: retl
1617
;
1718
; SLOW_32-LABEL: bork:
1819
; SLOW_32: # %bb.0:
19-
; SLOW_32-NEXT: movl $0, 4
20-
; SLOW_32-NEXT: movl $0, 0
21-
; SLOW_32-NEXT: movl $0, 12
22-
; SLOW_32-NEXT: movl $0, 8
23-
; SLOW_32-NEXT: movl $0, 20
24-
; SLOW_32-NEXT: movl $0, 16
25-
; SLOW_32-NEXT: movl $0, 28
26-
; SLOW_32-NEXT: movl $0, 24
27-
; SLOW_32-NEXT: movl $0, 36
28-
; SLOW_32-NEXT: movl $0, 32
29-
; SLOW_32-NEXT: movl $0, 44
30-
; SLOW_32-NEXT: movl $0, 40
31-
; SLOW_32-NEXT: movl $0, 52
32-
; SLOW_32-NEXT: movl $0, 48
33-
; SLOW_32-NEXT: movl $0, 60
34-
; SLOW_32-NEXT: movl $0, 56
35-
; SLOW_32-NEXT: movl $0, 68
36-
; SLOW_32-NEXT: movl $0, 64
37-
; SLOW_32-NEXT: movl $0, 76
38-
; SLOW_32-NEXT: movl $0, 72
20+
; SLOW_32-NEXT: movl {{[0-9]+}}(%esp), %eax
21+
; SLOW_32-NEXT: movl $0, 4(%eax)
22+
; SLOW_32-NEXT: movl $0, (%eax)
23+
; SLOW_32-NEXT: movl $0, 12(%eax)
24+
; SLOW_32-NEXT: movl $0, 8(%eax)
25+
; SLOW_32-NEXT: movl $0, 20(%eax)
26+
; SLOW_32-NEXT: movl $0, 16(%eax)
27+
; SLOW_32-NEXT: movl $0, 28(%eax)
28+
; SLOW_32-NEXT: movl $0, 24(%eax)
29+
; SLOW_32-NEXT: movl $0, 36(%eax)
30+
; SLOW_32-NEXT: movl $0, 32(%eax)
31+
; SLOW_32-NEXT: movl $0, 44(%eax)
32+
; SLOW_32-NEXT: movl $0, 40(%eax)
33+
; SLOW_32-NEXT: movl $0, 52(%eax)
34+
; SLOW_32-NEXT: movl $0, 48(%eax)
35+
; SLOW_32-NEXT: movl $0, 60(%eax)
36+
; SLOW_32-NEXT: movl $0, 56(%eax)
37+
; SLOW_32-NEXT: movl $0, 68(%eax)
38+
; SLOW_32-NEXT: movl $0, 64(%eax)
39+
; SLOW_32-NEXT: movl $0, 76(%eax)
40+
; SLOW_32-NEXT: movl $0, 72(%eax)
3941
; SLOW_32-NEXT: retl
4042
;
4143
; SLOW_64-LABEL: bork:
4244
; SLOW_64: # %bb.0:
43-
; SLOW_64-NEXT: movq $0, 72
44-
; SLOW_64-NEXT: movq $0, 64
45-
; SLOW_64-NEXT: movq $0, 56
46-
; SLOW_64-NEXT: movq $0, 48
47-
; SLOW_64-NEXT: movq $0, 40
48-
; SLOW_64-NEXT: movq $0, 32
49-
; SLOW_64-NEXT: movq $0, 24
50-
; SLOW_64-NEXT: movq $0, 16
51-
; SLOW_64-NEXT: movq $0, 8
52-
; SLOW_64-NEXT: movq $0, 0
45+
; SLOW_64-NEXT: movq $0, 72(%rdi)
46+
; SLOW_64-NEXT: movq $0, 64(%rdi)
47+
; SLOW_64-NEXT: movq $0, 56(%rdi)
48+
; SLOW_64-NEXT: movq $0, 48(%rdi)
49+
; SLOW_64-NEXT: movq $0, 40(%rdi)
50+
; SLOW_64-NEXT: movq $0, 32(%rdi)
51+
; SLOW_64-NEXT: movq $0, 24(%rdi)
52+
; SLOW_64-NEXT: movq $0, 16(%rdi)
53+
; SLOW_64-NEXT: movq $0, 8(%rdi)
54+
; SLOW_64-NEXT: movq $0, (%rdi)
5355
; SLOW_64-NEXT: retq
54-
call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 80, i1 false)
56+
call void @llvm.memset.p0.i64(ptr align 4 %dst, i8 0, i64 80, i1 false)
5557
ret void
5658
}
5759

0 commit comments

Comments
 (0)