Skip to content

Commit

Permalink
[CHERI-{MIPS,RISC-V}] Allow inlining of copies without capabilities
Browse files Browse the repository at this point in the history
Now that TargetLowering::getOptimalMemOpType() has access to a
PreserveCheriTags value, we can inline copies >= sizeof(cap) if they are
known not to require tag-preserving behaviour. This means that for a
struct foo {long a; long b; } we can now inline a copy-assignment
whereas previously we assumed that it might contain capabilities and
always called memcpy.
  • Loading branch information
arichardson committed Oct 5, 2022
1 parent 5f2cd5a commit f8a0d53
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 142 deletions.
5 changes: 4 additions & 1 deletion llvm/lib/Target/Mips/MipsISelLowering.cpp
Expand Up @@ -5668,7 +5668,10 @@ EVT MipsTargetLowering::getOptimalMemOpType(
// memcpy/memmove call (by returning MVT::isVoid), since it could still
// contain a capability if sufficiently aligned at runtime. Zeroing
// memsets can fall back on non-capability loads/stores.
return MVT::isVoid;
// Note: We can still inline the memcpy if the frontend has marked the
// copy as not requiring tag preserving behaviour.
if (Op.PreserveTags != PreserveCheriTags::Unnecessary)
return MVT::isVoid;
}
if (Subtarget.isGP64bit() && Op.isAligned(Align(8)))
return MVT::i64;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -9295,7 +9295,10 @@ EVT RISCVTargetLowering::getOptimalMemOpType(
// memcpy/memmove call (by returning MVT::isVoid), since it could still
// contain a capability if sufficiently aligned at runtime. Zeroing
// memsets can fall back on non-capability loads/stores.
return MVT::isVoid;
// Note: We can still inline the memcpy if the frontend has marked the
// copy as not requiring tag preserving behaviour.
if (Op.PreserveTags != PreserveCheriTags::Unnecessary)
return MVT::isVoid;
}
}
}
Expand Down
Expand Up @@ -44,7 +44,6 @@ entry:
}

; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags:
; TODO: we should be able to elide this memcpy call
define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
Expand Down
Expand Up @@ -98,21 +98,14 @@ entry:
}

; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags:
; TODO: we should be able to elide this memcpy call
define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memcpy_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset $c11, $c11, -16
; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-NEXT: cgetpccincoffset $c1, $1
; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-NEXT: cjalr $c12, $c17
; CHECK-NEXT: daddiu $4, $zero, 16
; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-NEXT: cld $1, $zero, 0($c4)
; CHECK-NEXT: csd $1, $zero, 0($c3)
; CHECK-NEXT: cld $1, $zero, 8($c4)
; CHECK-NEXT: cjr $c17
; CHECK-NEXT: cincoffset $c11, $c11, 16
; CHECK-NEXT: csd $1, $zero, 8($c3)
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
%b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)*
Expand All @@ -123,17 +116,11 @@ entry:
define void @memmove_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memmove_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset $c11, $c11, -16
; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-NEXT: cgetpccincoffset $c1, $1
; CHECK-NEXT: clcbi $c12, %capcall20(memmove)($c1)
; CHECK-NEXT: cjalr $c12, $c17
; CHECK-NEXT: daddiu $4, $zero, 16
; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-NEXT: cld $1, $zero, 8($c4)
; CHECK-NEXT: cld $2, $zero, 0($c4)
; CHECK-NEXT: csd $1, $zero, 8($c3)
; CHECK-NEXT: cjr $c17
; CHECK-NEXT: cincoffset $c11, $c11, 16
; CHECK-NEXT: csd $2, $zero, 0($c3)
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
%b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)*
Expand Down
45 changes: 21 additions & 24 deletions llvm/test/CodeGen/CHERI-Generic/MIPS/strcpy-to-memcpy-no-tags.ll
Expand Up @@ -20,18 +20,17 @@ declare i8 addrspace(200)* @stpncpy(i8 addrspace(200)*, i8 addrspace(200)*, i64)
define void @test_strcpy_to_memcpy(i8 addrspace(200)* align 8 %dst) addrspace(200) nounwind {
; CHECK-ASM-LABEL: test_strcpy_to_memcpy:
; CHECK-ASM: # %bb.0: # %entry
; CHECK-ASM-NEXT: cincoffset $c11, $c11, -16
; CHECK-ASM-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-ASM-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-ASM-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-ASM-NEXT: cgetpccincoffset $c1, $1
; CHECK-ASM-NEXT: clcbi $c4, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-ASM-NEXT: cjalr $c12, $c17
; CHECK-ASM-NEXT: daddiu $4, $zero, 17
; CHECK-ASM-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-ASM-NEXT: clcbi $c1, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: cld $1, $zero, 0($c1)
; CHECK-ASM-NEXT: clb $2, $zero, 16($c1)
; CHECK-ASM-NEXT: cld $3, $zero, 8($c1)
; CHECK-ASM-NEXT: csd $1, $zero, 0($c3)
; CHECK-ASM-NEXT: csb $2, $zero, 16($c3)
; CHECK-ASM-NEXT: cjr $c17
; CHECK-ASM-NEXT: cincoffset $c11, $c11, 16
; CHECK-ASM-NEXT: csd $3, $zero, 8($c3)
; CHECK-IR-LABEL: define {{[^@]+}}@test_strcpy_to_memcpy
; CHECK-IR-SAME: (i8 addrspace(200)* align 8 [[DST:%.*]]) addrspace(200) #[[ATTR0:[0-9]+]] {
; CHECK-IR-NEXT: entry:
Expand All @@ -46,18 +45,17 @@ entry:
define void @test_stpcpy_to_memcpy(i8 addrspace(200)* align 8 %dst) addrspace(200) nounwind {
; CHECK-ASM-LABEL: test_stpcpy_to_memcpy:
; CHECK-ASM: # %bb.0: # %entry
; CHECK-ASM-NEXT: cincoffset $c11, $c11, -16
; CHECK-ASM-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-ASM-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-ASM-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-ASM-NEXT: cgetpccincoffset $c1, $1
; CHECK-ASM-NEXT: clcbi $c4, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-ASM-NEXT: cjalr $c12, $c17
; CHECK-ASM-NEXT: daddiu $4, $zero, 17
; CHECK-ASM-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-ASM-NEXT: clcbi $c1, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: cld $1, $zero, 0($c1)
; CHECK-ASM-NEXT: clb $2, $zero, 16($c1)
; CHECK-ASM-NEXT: cld $3, $zero, 8($c1)
; CHECK-ASM-NEXT: csd $1, $zero, 0($c3)
; CHECK-ASM-NEXT: csb $2, $zero, 16($c3)
; CHECK-ASM-NEXT: cjr $c17
; CHECK-ASM-NEXT: cincoffset $c11, $c11, 16
; CHECK-ASM-NEXT: csd $3, $zero, 8($c3)
; CHECK-IR-LABEL: define {{[^@]+}}@test_stpcpy_to_memcpy
; CHECK-IR-SAME: (i8 addrspace(200)* align 8 [[DST:%.*]]) addrspace(200) #[[ATTR0]] {
; CHECK-IR-NEXT: entry:
Expand Down Expand Up @@ -109,18 +107,17 @@ entry:
define void @test_strncpy_to_memcpy(i8 addrspace(200)* align 8 %dst) addrspace(200) nounwind {
; CHECK-ASM-LABEL: test_strncpy_to_memcpy:
; CHECK-ASM: # %bb.0: # %entry
; CHECK-ASM-NEXT: cincoffset $c11, $c11, -16
; CHECK-ASM-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-ASM-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-ASM-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-ASM-NEXT: cgetpccincoffset $c1, $1
; CHECK-ASM-NEXT: clcbi $c4, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-ASM-NEXT: cjalr $c12, $c17
; CHECK-ASM-NEXT: daddiu $4, $zero, 17
; CHECK-ASM-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-ASM-NEXT: clcbi $c1, %captab20(.Lstr)($c1)
; CHECK-ASM-NEXT: cld $1, $zero, 0($c1)
; CHECK-ASM-NEXT: clb $2, $zero, 16($c1)
; CHECK-ASM-NEXT: cld $3, $zero, 8($c1)
; CHECK-ASM-NEXT: csd $1, $zero, 0($c3)
; CHECK-ASM-NEXT: csb $2, $zero, 16($c3)
; CHECK-ASM-NEXT: cjr $c17
; CHECK-ASM-NEXT: cincoffset $c11, $c11, 16
; CHECK-ASM-NEXT: csd $3, $zero, 8($c3)
; CHECK-IR-LABEL: define {{[^@]+}}@test_strncpy_to_memcpy
; CHECK-IR-SAME: (i8 addrspace(200)* align 8 [[DST:%.*]]) addrspace(200) #[[ATTR0]] {
; CHECK-IR-NEXT: entry:
Expand Down
Expand Up @@ -86,21 +86,17 @@ entry:
}

; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags:
; TODO: we should be able to elide this memcpy call
define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memcpy_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: csc cra, 8(csp)
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca4, %captab_pcrel_hi(memcpy)
; CHECK-NEXT: clc ca4, %pcrel_lo(.LBB4_1)(ca4)
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: mv a3, zero
; CHECK-NEXT: cjalr ca4
; CHECK-NEXT: clc cra, 8(csp)
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: clw a2, 12(ca1)
; CHECK-NEXT: csw a2, 12(ca0)
; CHECK-NEXT: clw a2, 8(ca1)
; CHECK-NEXT: csw a2, 8(ca0)
; CHECK-NEXT: clw a2, 4(ca1)
; CHECK-NEXT: csw a2, 4(ca0)
; CHECK-NEXT: clw a1, 0(ca1)
; CHECK-NEXT: csw a1, 0(ca0)
; CHECK-NEXT: cret
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
Expand All @@ -112,17 +108,14 @@ entry:
define void @memmove_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memmove_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: csc cra, 8(csp)
; CHECK-NEXT: .LBB5_1: # %entry
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca4, %captab_pcrel_hi(memmove)
; CHECK-NEXT: clc ca4, %pcrel_lo(.LBB5_1)(ca4)
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: mv a3, zero
; CHECK-NEXT: cjalr ca4
; CHECK-NEXT: clc cra, 8(csp)
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: clw a2, 12(ca1)
; CHECK-NEXT: clw a3, 8(ca1)
; CHECK-NEXT: clw a4, 4(ca1)
; CHECK-NEXT: clw a1, 0(ca1)
; CHECK-NEXT: csw a2, 12(ca0)
; CHECK-NEXT: csw a3, 8(ca0)
; CHECK-NEXT: csw a4, 4(ca0)
; CHECK-NEXT: csw a1, 0(ca0)
; CHECK-NEXT: cret
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
Expand Down
46 changes: 26 additions & 20 deletions llvm/test/CodeGen/CHERI-Generic/RISCV32/strcpy-to-memcpy-no-tags.ll
Expand Up @@ -20,16 +20,19 @@ declare i8 addrspace(200)* @stpncpy(i8 addrspace(200)*, i8 addrspace(200)*, i64)
define void @test_strcpy_to_memcpy(i8 addrspace(200)* align 4 %dst) addrspace(200) nounwind {
; CHECK-ASM-LABEL: test_strcpy_to_memcpy:
; CHECK-ASM: # %bb.0: # %entry
; CHECK-ASM-NEXT: cincoffset csp, csp, -16
; CHECK-ASM-NEXT: csc cra, 8(csp) # 8-byte Folded Spill
; CHECK-ASM-NEXT: .LBB0_1: # %entry
; CHECK-ASM-NEXT: # Label of block must be emitted
; CHECK-ASM-NEXT: auipcc ca1, %captab_pcrel_hi(.Lstr)
; CHECK-ASM-NEXT: clc ca1, %pcrel_lo(.LBB0_1)(ca1)
; CHECK-ASM-NEXT: addi a2, zero, 17
; CHECK-ASM-NEXT: ccall memcpy
; CHECK-ASM-NEXT: clc cra, 8(csp) # 8-byte Folded Reload
; CHECK-ASM-NEXT: cincoffset csp, csp, 16
; CHECK-ASM-NEXT: csb zero, 16(ca0)
; CHECK-ASM-NEXT: lui a1, 472870
; CHECK-ASM-NEXT: addi a1, a1, 360
; CHECK-ASM-NEXT: csw a1, 12(ca0)
; CHECK-ASM-NEXT: lui a1, 406019
; CHECK-ASM-NEXT: addi a1, a1, 1585
; CHECK-ASM-NEXT: csw a1, 8(ca0)
; CHECK-ASM-NEXT: lui a1, 133015
; CHECK-ASM-NEXT: addi a1, a1, -908
; CHECK-ASM-NEXT: csw a1, 4(ca0)
; CHECK-ASM-NEXT: lui a1, 407064
; CHECK-ASM-NEXT: addi a1, a1, -1947
; CHECK-ASM-NEXT: csw a1, 0(ca0)
; CHECK-ASM-NEXT: cret
; CHECK-IR-LABEL: define {{[^@]+}}@test_strcpy_to_memcpy
; CHECK-IR-SAME: (i8 addrspace(200)* align 4 [[DST:%.*]]) addrspace(200) #[[ATTR0:[0-9]+]] {
Expand All @@ -45,16 +48,19 @@ entry:
define void @test_stpcpy_to_memcpy(i8 addrspace(200)* align 4 %dst) addrspace(200) nounwind {
; CHECK-ASM-LABEL: test_stpcpy_to_memcpy:
; CHECK-ASM: # %bb.0: # %entry
; CHECK-ASM-NEXT: cincoffset csp, csp, -16
; CHECK-ASM-NEXT: csc cra, 8(csp) # 8-byte Folded Spill
; CHECK-ASM-NEXT: .LBB1_1: # %entry
; CHECK-ASM-NEXT: # Label of block must be emitted
; CHECK-ASM-NEXT: auipcc ca1, %captab_pcrel_hi(.Lstr)
; CHECK-ASM-NEXT: clc ca1, %pcrel_lo(.LBB1_1)(ca1)
; CHECK-ASM-NEXT: addi a2, zero, 17
; CHECK-ASM-NEXT: ccall memcpy
; CHECK-ASM-NEXT: clc cra, 8(csp) # 8-byte Folded Reload
; CHECK-ASM-NEXT: cincoffset csp, csp, 16
; CHECK-ASM-NEXT: csb zero, 16(ca0)
; CHECK-ASM-NEXT: lui a1, 472870
; CHECK-ASM-NEXT: addi a1, a1, 360
; CHECK-ASM-NEXT: csw a1, 12(ca0)
; CHECK-ASM-NEXT: lui a1, 406019
; CHECK-ASM-NEXT: addi a1, a1, 1585
; CHECK-ASM-NEXT: csw a1, 8(ca0)
; CHECK-ASM-NEXT: lui a1, 133015
; CHECK-ASM-NEXT: addi a1, a1, -908
; CHECK-ASM-NEXT: csw a1, 4(ca0)
; CHECK-ASM-NEXT: lui a1, 407064
; CHECK-ASM-NEXT: addi a1, a1, -1947
; CHECK-ASM-NEXT: csw a1, 0(ca0)
; CHECK-ASM-NEXT: cret
; CHECK-IR-LABEL: define {{[^@]+}}@test_stpcpy_to_memcpy
; CHECK-IR-SAME: (i8 addrspace(200)* align 4 [[DST:%.*]]) addrspace(200) #[[ATTR0]] {
Expand Down
Expand Up @@ -82,20 +82,13 @@ entry:
}

; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags:
; TODO: we should be able to elide this memcpy call
define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memcpy_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: csc cra, 0(csp)
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca3, %captab_pcrel_hi(memcpy)
; CHECK-NEXT: clc ca3, %pcrel_lo(.LBB4_1)(ca3)
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: cjalr ca3
; CHECK-NEXT: clc cra, 0(csp)
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: cld a2, 8(ca1)
; CHECK-NEXT: csd a2, 8(ca0)
; CHECK-NEXT: cld a1, 0(ca1)
; CHECK-NEXT: csd a1, 0(ca0)
; CHECK-NEXT: cret
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
Expand All @@ -107,16 +100,10 @@ entry:
define void @memmove_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind {
; CHECK-LABEL: memmove_no_preserve:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: csc cra, 0(csp)
; CHECK-NEXT: .LBB5_1: # %entry
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca3, %captab_pcrel_hi(memmove)
; CHECK-NEXT: clc ca3, %pcrel_lo(.LBB5_1)(ca3)
; CHECK-NEXT: addi a2, zero, 16
; CHECK-NEXT: cjalr ca3
; CHECK-NEXT: clc cra, 0(csp)
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: cld a2, 8(ca1)
; CHECK-NEXT: cld a1, 0(ca1)
; CHECK-NEXT: csd a2, 8(ca0)
; CHECK-NEXT: csd a1, 0(ca0)
; CHECK-NEXT: cret
entry:
%a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)*
Expand Down

0 comments on commit f8a0d53

Please sign in to comment.