| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,291 @@ | ||
| /* Copyright (c) 2012-2013, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ | ||
|
|
||
| /* | ||
| * Copyright (c) 2015 ARM Ltd | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following conditions | ||
| * are met: | ||
| * 1. Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * 2. Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in the | ||
| * documentation and/or other materials provided with the distribution. | ||
| * 3. The name of the company may not be used to endorse or promote | ||
| * products derived from this software without specific prior written | ||
| * permission. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | ||
| * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64, unaligned accesses. | ||
| * | ||
| */ | ||
| #include <private/bionic_asm.h> | ||
|
|
||
| #define dstin x0 | ||
| #define src x1 | ||
| #define count x2 | ||
| #define dst x3 | ||
| #define srcend x4 | ||
| #define dstend x5 | ||
| #define A_l x6 | ||
| #define A_lw w6 | ||
| #define A_h x7 | ||
| #define A_hw w7 | ||
| #define B_l x8 | ||
| #define B_h x9 | ||
| #define C_l x10 | ||
| #define C_h x11 | ||
| #define D_l x12 | ||
| #define D_h x13 | ||
| #define E_l src | ||
| #define E_h count | ||
| #define F_l srcend | ||
| #define F_h dst | ||
| #define tmp1 x9 | ||
|
|
||
| #define L(l) .L ## l | ||
|
|
||
| .macro def_fn f p2align=0 | ||
| .text | ||
| .p2align \p2align | ||
| .global \f | ||
| .type \f, %function | ||
| \f: | ||
| .endm | ||
| ENTRY(__memcpy_chk) | ||
| cmp x2, x3 | ||
| b.hi __memcpy_chk_fail | ||
|
|
||
| // Fall through to memcpy... | ||
| END(__memcpy_chk) | ||
| ENTRY(memcpy) | ||
| /* Copies are split into 3 main cases: small copies of up to 16 bytes, | ||
| medium copies of 17..96 bytes which are fully unrolled. Large copies | ||
| of more than 96 bytes align the destination and use an unrolled loop | ||
| processing 64 bytes per iteration. | ||
| Small and medium copies read all data before writing, allowing any | ||
| kind of overlap, and memmove tailcalls memcpy for these cases as | ||
| well as non-overlapping copies. | ||
| */ | ||
|
|
||
| //def_fn memcpy p2align=6 | ||
| add srcend, src, count | ||
| add dstend, dstin, count | ||
| cmp count, 65536 | ||
| b.ge L(copy_huge) | ||
| cmp count, 96 | ||
| b.hi L(copy_long) | ||
| cmp count, 16 | ||
| b.hs L(copy_medium) | ||
|
|
||
| /* Small copies: 0..16 bytes. */ | ||
| L(copy16): | ||
| tbz count, 3, 1f | ||
| ldr A_l, [src] | ||
| ldr A_h, [srcend, -8] | ||
| str A_l, [dstin] | ||
| str A_h, [dstend, -8] | ||
| ret | ||
| 1: | ||
| tbz count, 2, 1f | ||
| ldr A_lw, [src] | ||
| ldr A_hw, [srcend, -4] | ||
| str A_lw, [dstin] | ||
| str A_hw, [dstend, -4] | ||
| ret | ||
| .p2align 4 | ||
| 1: | ||
| cbz count, 2f | ||
| ldrb A_lw, [src] | ||
| tbz count, 1, 1f | ||
| ldrh A_hw, [srcend, -2] | ||
| strh A_hw, [dstend, -2] | ||
| 1: strb A_lw, [dstin] | ||
| 2: ret | ||
|
|
||
| .p2align 4 | ||
| /* Medium copies: 17..96 bytes. */ | ||
| L(copy_medium): | ||
| ldp A_l, A_h, [src] | ||
| tbnz count, 6, L(copy96) | ||
| ldp D_l, D_h, [srcend, -16] | ||
| tbz count, 5, 1f | ||
| ldp B_l, B_h, [src, 16] | ||
| ldp C_l, C_h, [srcend, -32] | ||
| stp B_l, B_h, [dstin, 16] | ||
| stp C_l, C_h, [dstend, -32] | ||
| 1: | ||
| stp A_l, A_h, [dstin] | ||
| stp D_l, D_h, [dstend, -16] | ||
| ret | ||
|
|
||
| .p2align 4 | ||
| /* Copy 64..96 bytes. Copy 64 bytes from the start and | ||
| 32 bytes from the end. */ | ||
| L(copy96): | ||
| ldp B_l, B_h, [src, 16] | ||
| ldp C_l, C_h, [src, 32] | ||
| ldp D_l, D_h, [src, 48] | ||
| ldp E_l, E_h, [srcend, -32] | ||
| ldp F_l, F_h, [srcend, -16] | ||
| stp A_l, A_h, [dstin] | ||
| stp B_l, B_h, [dstin, 16] | ||
| stp C_l, C_h, [dstin, 32] | ||
| stp D_l, D_h, [dstin, 48] | ||
| stp E_l, E_h, [dstend, -32] | ||
| stp F_l, F_h, [dstend, -16] | ||
| ret | ||
|
|
||
| /* Align DST to 16 byte alignment so that we don't cross cache line | ||
| boundaries on both loads and stores. There are at least 96 bytes | ||
| to copy, so copy 16 bytes unaligned and then align. The loop | ||
| copies 64 bytes per iteration and prefetches one iteration ahead. */ | ||
|
|
||
| .p2align 4 | ||
| L(copy_long): | ||
| and tmp1, dstin, 15 | ||
| bic dst, dstin, 15 | ||
| ldp D_l, D_h, [src] | ||
| sub src, src, tmp1 | ||
| add count, count, tmp1 /* Count is now 16 too large. */ | ||
| ldp A_l, A_h, [src, 16] | ||
| stp D_l, D_h, [dstin] | ||
| ldp B_l, B_h, [src, 32] | ||
| ldp C_l, C_h, [src, 48] | ||
| ldp D_l, D_h, [src, 64]! | ||
| subs count, count, 128 + 16 /* Test and readjust count. */ | ||
| b.ls 2f | ||
| 1: | ||
| stp A_l, A_h, [dst, 16] | ||
| ldp A_l, A_h, [src, 16] | ||
| stp B_l, B_h, [dst, 32] | ||
| ldp B_l, B_h, [src, 32] | ||
| stp C_l, C_h, [dst, 48] | ||
| ldp C_l, C_h, [src, 48] | ||
| stp D_l, D_h, [dst, 64]! | ||
| ldp D_l, D_h, [src, 64]! | ||
| subs count, count, 64 | ||
| b.hi 1b | ||
|
|
||
| /* Write the last full set of 64 bytes. The remainder is at most 64 | ||
| bytes, so it is safe to always copy 64 bytes from the end even if | ||
| there is just 1 byte left. */ | ||
| 2: | ||
| ldp E_l, E_h, [srcend, -64] | ||
| stp A_l, A_h, [dst, 16] | ||
| ldp A_l, A_h, [srcend, -48] | ||
| stp B_l, B_h, [dst, 32] | ||
| ldp B_l, B_h, [srcend, -32] | ||
| stp C_l, C_h, [dst, 48] | ||
| ldp C_l, C_h, [srcend, -16] | ||
| stp D_l, D_h, [dst, 64] | ||
| stp E_l, E_h, [dstend, -64] | ||
| stp A_l, A_h, [dstend, -48] | ||
| stp B_l, B_h, [dstend, -32] | ||
| stp C_l, C_h, [dstend, -16] | ||
| ret | ||
|
|
||
| .p2align 4 | ||
| L(copy_huge): | ||
| and tmp1, dstin, 15 | ||
| bic dst, dstin, 15 | ||
| ldp D_l, D_h, [src] | ||
| sub src, src, tmp1 | ||
| add count, count, tmp1 /* Count is now 16 too large. */ | ||
| ldp A_l, A_h, [src, 16] | ||
| stp D_l, D_h, [dstin] | ||
| ldp B_l, B_h, [src, 32] | ||
| ldp C_l, C_h, [src, 48] | ||
| ldp D_l, D_h, [src, 64]! | ||
| subs count, count, 128 + 16 /* Test and readjust count. */ | ||
| b.ls 2f | ||
| 1: | ||
| prfm pldl1keep, [src, #(64*48)] | ||
| stp A_l, A_h, [dst, 16] | ||
| ldp A_l, A_h, [src, 16] | ||
| stp B_l, B_h, [dst, 32] | ||
| ldp B_l, B_h, [src, 32] | ||
| stp C_l, C_h, [dst, 48] | ||
| ldp C_l, C_h, [src, 48] | ||
| stp D_l, D_h, [dst, 64]! | ||
| ldp D_l, D_h, [src, 64]! | ||
| subs count, count, 64 | ||
| b.hi 1b | ||
|
|
||
| /* Write the last full set of 64 bytes. The remainder is at most 64 | ||
| bytes, so it is safe to always copy 64 bytes from the end even if | ||
| there is just 1 byte left. */ | ||
| 2: | ||
| ldp E_l, E_h, [srcend, -64] | ||
| stp A_l, A_h, [dst, 16] | ||
| ldp A_l, A_h, [srcend, -48] | ||
| stp B_l, B_h, [dst, 32] | ||
| ldp B_l, B_h, [srcend, -32] | ||
| stp C_l, C_h, [dst, 48] | ||
| ldp C_l, C_h, [srcend, -16] | ||
| stp D_l, D_h, [dst, 64] | ||
| stp E_l, E_h, [dstend, -64] | ||
| stp A_l, A_h, [dstend, -48] | ||
| stp B_l, B_h, [dstend, -32] | ||
| stp C_l, C_h, [dstend, -16] | ||
| ret | ||
| .size memcpy, . - memcpy | ||
|
|
||
| END(memcpy) | ||
| ENTRY_PRIVATE(__memcpy_chk_fail) | ||
| // Preserve for accurate backtrace. | ||
| stp x29, x30, [sp, -16]! | ||
| .cfi_def_cfa_offset 16 | ||
| .cfi_rel_offset x29, 0 | ||
| .cfi_rel_offset x30, 8 | ||
|
|
||
| adrp x0, error_string | ||
| add x0, x0, :lo12:error_string | ||
| //ldr x1, error_code | ||
| bl __fortify_chk_fail | ||
| //error_code: | ||
| //.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW | ||
| END(__memcpy_chk_fail) | ||
|
|
||
| .data | ||
| .align 2 | ||
| error_string: | ||
| .string "memcpy: prevented write past end of buffer" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,179 @@ | ||
| /* Copyright (c) 2012, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| * Unaligned accesses | ||
| * | ||
| */ | ||
|
|
||
| #define dstin x0 | ||
| #define src x1 | ||
| #define count x2 | ||
| #define tmp1 x3 | ||
| #define tmp1w w3 | ||
| #define tmp2 x4 | ||
| #define tmp2w w4 | ||
| #define tmp3 x5 | ||
| #define tmp3w w5 | ||
| #define dst x6 | ||
|
|
||
| #define A_l x7 | ||
| #define A_h x8 | ||
| #define B_l x9 | ||
| #define B_h x10 | ||
| #define C_l x11 | ||
| #define C_h x12 | ||
| #define D_l x13 | ||
| #define D_h x14 | ||
|
|
||
| mov dst, dstin | ||
| cmp count, #64 | ||
| b.ge .Lcpy_not_short | ||
| cmp count, #15 | ||
| b.le .Ltail15tiny | ||
|
|
||
| /* Deal with small copies quickly by dropping straight into the | ||
| * exit block. */ | ||
| .Ltail63: | ||
| /* Copy up to 48 bytes of data. At this point we only need the | ||
| * bottom 6 bits of count to be accurate. */ | ||
| ands tmp1, count, #0x30 | ||
| b.eq .Ltail15 | ||
| add dst, dst, tmp1 | ||
| add src, src, tmp1 | ||
| cmp tmp1w, #0x20 | ||
| b.eq 1f | ||
| b.lt 2f | ||
| ldp A_l, A_h, [src, #-48] | ||
| stp A_l, A_h, [dst, #-48] | ||
| 1: | ||
| ldp A_l, A_h, [src, #-32] | ||
| stp A_l, A_h, [dst, #-32] | ||
| 2: | ||
| ldp A_l, A_h, [src, #-16] | ||
| stp A_l, A_h, [dst, #-16] | ||
|
|
||
| .Ltail15: | ||
| ands count, count, #15 | ||
| beq 1f | ||
| add src, src, count | ||
| ldp A_l, A_h, [src, #-16] | ||
| add dst, dst, count | ||
| stp A_l, A_h, [dst, #-16] | ||
| 1: | ||
| ret | ||
|
|
||
| .Ltail15tiny: | ||
| /* Copy up to 15 bytes of data. Does not assume additional data | ||
| being copied. */ | ||
| tbz count, #3, 1f | ||
| ldr tmp1, [src], #8 | ||
| str tmp1, [dst], #8 | ||
| 1: | ||
| tbz count, #2, 1f | ||
| ldr tmp1w, [src], #4 | ||
| str tmp1w, [dst], #4 | ||
| 1: | ||
| tbz count, #1, 1f | ||
| ldrh tmp1w, [src], #2 | ||
| strh tmp1w, [dst], #2 | ||
| 1: | ||
| tbz count, #0, 1f | ||
| ldrb tmp1w, [src] | ||
| strb tmp1w, [dst] | ||
| 1: | ||
| ret | ||
|
|
||
| .Lcpy_not_short: | ||
| /* We don't much care about the alignment of DST, but we want SRC | ||
| * to be 128-bit (16 byte) aligned so that we don't cross cache line | ||
| * boundaries on both loads and stores. */ | ||
| neg tmp2, src | ||
| ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ | ||
| b.eq 2f | ||
| sub count, count, tmp2 | ||
| /* Copy more data than needed; it's faster than jumping | ||
| * around copying sub-Quadword quantities. We know that | ||
| * it can't overrun. */ | ||
| ldp A_l, A_h, [src] | ||
| add src, src, tmp2 | ||
| stp A_l, A_h, [dst] | ||
| add dst, dst, tmp2 | ||
| /* There may be less than 63 bytes to go now. */ | ||
| cmp count, #63 | ||
| b.le .Ltail63 | ||
| 2: | ||
| subs count, count, #128 | ||
| b.ge .Lcpy_body_large | ||
| /* Less than 128 bytes to copy, so handle 64 here and then jump | ||
| * to the tail. */ | ||
| ldp A_l, A_h, [src] | ||
| ldp B_l, B_h, [src, #16] | ||
| ldp C_l, C_h, [src, #32] | ||
| ldp D_l, D_h, [src, #48] | ||
| stp A_l, A_h, [dst] | ||
| stp B_l, B_h, [dst, #16] | ||
| stp C_l, C_h, [dst, #32] | ||
| stp D_l, D_h, [dst, #48] | ||
| tst count, #0x3f | ||
| add src, src, #64 | ||
| add dst, dst, #64 | ||
| b.ne .Ltail63 | ||
| ret | ||
|
|
||
| /* Critical loop. Start at a new cache line boundary. Assuming | ||
| * 64 bytes per line this ensures the entire loop is in one line. */ | ||
| .p2align 6 | ||
| .Lcpy_body_large: | ||
| /* There are at least 128 bytes to copy. */ | ||
| ldp A_l, A_h, [src, #0] | ||
| sub dst, dst, #16 /* Pre-bias. */ | ||
| ldp B_l, B_h, [src, #16] | ||
| ldp C_l, C_h, [src, #32] | ||
| ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ | ||
| 1: | ||
| stp A_l, A_h, [dst, #16] | ||
| ldp A_l, A_h, [src, #16] | ||
| stp B_l, B_h, [dst, #32] | ||
| ldp B_l, B_h, [src, #32] | ||
| stp C_l, C_h, [dst, #48] | ||
| ldp C_l, C_h, [src, #48] | ||
| stp D_l, D_h, [dst, #64]! | ||
| ldp D_l, D_h, [src, #64]! | ||
| subs count, count, #64 | ||
| b.ge 1b | ||
| stp A_l, A_h, [dst, #16] | ||
| stp B_l, B_h, [dst, #32] | ||
| stp C_l, C_h, [dst, #48] | ||
| stp D_l, D_h, [dst, #64] | ||
| add src, src, #16 | ||
| add dst, dst, #64 + 16 | ||
| tst count, #0x3f | ||
| b.ne .Ltail63 | ||
| ret |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,341 @@ | ||
| /* Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| * Unaligned accesses | ||
| * wchar_t is 4 bytes | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* Parameters and result. */ | ||
| #ifdef BCOPY | ||
| #define origdstin x1 | ||
| #define origsrc x0 | ||
| #endif | ||
| #define dstin x0 | ||
| #define src x1 | ||
| #define count x2 | ||
| #define tmp1 x3 | ||
| #define tmp1w w3 | ||
| #define tmp2 x4 | ||
| #define tmp2w w4 | ||
| #define tmp3 x5 | ||
| #define tmp3w w5 | ||
| #define dst x6 | ||
|
|
||
| #define A_l x7 | ||
| #define A_h x8 | ||
| #define B_l x9 | ||
| #define B_h x10 | ||
| #define C_l x11 | ||
| #define C_h x12 | ||
| #define D_l x13 | ||
| #define D_h x14 | ||
|
|
||
| #ifdef BCOPY | ||
| ENTRY(bcopy) | ||
| /* Swap src and dst so that a branch to memcpy doesn't cause issues. */ | ||
| mov tmp1, origsrc | ||
| mov origsrc, origdstin | ||
| mov origdstin, tmp1 | ||
| #elif defined(WMEMMOVE) | ||
| ENTRY(wmemmove) | ||
| lsl count, count, #2 | ||
| #else | ||
| ENTRY(memmove) | ||
| #endif | ||
| cmp dstin, src | ||
| b.lo .Ldownwards | ||
| add tmp1, src, count | ||
| cmp dstin, tmp1 | ||
| b.hs memcpy /* No overlap. */ | ||
|
|
||
| /* Upwards move with potential overlap. | ||
| * Need to move from the tail backwards. SRC and DST point one | ||
| * byte beyond the remaining data to move. */ | ||
| add dst, dstin, count | ||
| add src, src, count | ||
| cmp count, #64 | ||
| b.ge .Lmov_not_short_up | ||
|
|
||
| /* Deal with small moves quickly by dropping straight into the | ||
| * exit block. */ | ||
| .Ltail63up: | ||
| /* Move up to 48 bytes of data. At this point we only need the | ||
| * bottom 6 bits of count to be accurate. */ | ||
| ands tmp1, count, #0x30 | ||
| b.eq .Ltail15up | ||
| sub dst, dst, tmp1 | ||
| sub src, src, tmp1 | ||
| cmp tmp1w, #0x20 | ||
| b.eq 1f | ||
| b.lt 2f | ||
| ldp A_l, A_h, [src, #32] | ||
| stp A_l, A_h, [dst, #32] | ||
| 1: | ||
| ldp A_l, A_h, [src, #16] | ||
| stp A_l, A_h, [dst, #16] | ||
| 2: | ||
| ldp A_l, A_h, [src] | ||
| stp A_l, A_h, [dst] | ||
| .Ltail15up: | ||
| /* Move up to 15 bytes of data. Does not assume additional data | ||
| * being moved. */ | ||
| tbz count, #3, 1f | ||
| ldr tmp1, [src, #-8]! | ||
| str tmp1, [dst, #-8]! | ||
| 1: | ||
| tbz count, #2, 1f | ||
| ldr tmp1w, [src, #-4]! | ||
| str tmp1w, [dst, #-4]! | ||
| 1: | ||
| tbz count, #1, 1f | ||
| ldrh tmp1w, [src, #-2]! | ||
| strh tmp1w, [dst, #-2]! | ||
| 1: | ||
| tbz count, #0, 1f | ||
| ldrb tmp1w, [src, #-1] | ||
| strb tmp1w, [dst, #-1] | ||
| 1: | ||
| ret | ||
|
|
||
| .Lmov_not_short_up: | ||
| /* We don't much care about the alignment of DST, but we want SRC | ||
| * to be 128-bit (16 byte) aligned so that we don't cross cache line | ||
| * boundaries on both loads and stores. */ | ||
| ands tmp2, src, #15 /* Bytes to reach alignment. */ | ||
| b.eq 2f | ||
| sub count, count, tmp2 | ||
| /* Move enough data to reach alignment; unlike memcpy, we have to | ||
| * be aware of the overlap, which means we can't move data twice. */ | ||
| tbz tmp2, #3, 1f | ||
| ldr tmp1, [src, #-8]! | ||
| str tmp1, [dst, #-8]! | ||
| 1: | ||
| tbz tmp2, #2, 1f | ||
| ldr tmp1w, [src, #-4]! | ||
| str tmp1w, [dst, #-4]! | ||
| 1: | ||
| tbz tmp2, #1, 1f | ||
| ldrh tmp1w, [src, #-2]! | ||
| strh tmp1w, [dst, #-2]! | ||
| 1: | ||
| tbz tmp2, #0, 1f | ||
| ldrb tmp1w, [src, #-1]! | ||
| strb tmp1w, [dst, #-1]! | ||
| 1: | ||
|
|
||
| /* There may be less than 63 bytes to go now. */ | ||
| cmp count, #63 | ||
| b.le .Ltail63up | ||
| 2: | ||
| subs count, count, #128 | ||
| b.ge .Lmov_body_large_up | ||
| /* Less than 128 bytes to move, so handle 64 here and then jump | ||
| * to the tail. */ | ||
| ldp A_l, A_h, [src, #-64]! | ||
| ldp B_l, B_h, [src, #16] | ||
| ldp C_l, C_h, [src, #32] | ||
| ldp D_l, D_h, [src, #48] | ||
| stp A_l, A_h, [dst, #-64]! | ||
| stp B_l, B_h, [dst, #16] | ||
| stp C_l, C_h, [dst, #32] | ||
| stp D_l, D_h, [dst, #48] | ||
| tst count, #0x3f | ||
| b.ne .Ltail63up | ||
| ret | ||
|
|
||
| /* Critical loop. Start at a new Icache line boundary. Assuming | ||
| * 64 bytes per line this ensures the entire loop is in one line. */ | ||
| .p2align 6 | ||
| .Lmov_body_large_up: | ||
| /* There are at least 128 bytes to move. */ | ||
| ldp A_l, A_h, [src, #-16] | ||
| ldp B_l, B_h, [src, #-32] | ||
| ldp C_l, C_h, [src, #-48] | ||
| ldp D_l, D_h, [src, #-64]! | ||
| 1: | ||
| stp A_l, A_h, [dst, #-16] | ||
| ldp A_l, A_h, [src, #-16] | ||
| stp B_l, B_h, [dst, #-32] | ||
| ldp B_l, B_h, [src, #-32] | ||
| stp C_l, C_h, [dst, #-48] | ||
| ldp C_l, C_h, [src, #-48] | ||
| stp D_l, D_h, [dst, #-64]! | ||
| ldp D_l, D_h, [src, #-64]! | ||
| subs count, count, #64 | ||
| b.ge 1b | ||
| stp A_l, A_h, [dst, #-16] | ||
| stp B_l, B_h, [dst, #-32] | ||
| stp C_l, C_h, [dst, #-48] | ||
| stp D_l, D_h, [dst, #-64]! | ||
| tst count, #0x3f | ||
| b.ne .Ltail63up | ||
| ret | ||
|
|
||
|
|
||
| .Ldownwards: | ||
| /* For a downwards move we can safely use memcpy provided that | ||
| * DST is more than 16 bytes away from SRC. */ | ||
| sub tmp1, src, #16 | ||
| cmp dstin, tmp1 | ||
| b.ls memcpy /* May overlap, but not critically. */ | ||
|
|
||
| mov dst, dstin /* Preserve DSTIN for return value. */ | ||
| cmp count, #64 | ||
| b.ge .Lmov_not_short_down | ||
|
|
||
| /* Deal with small moves quickly by dropping straight into the | ||
| * exit block. */ | ||
| .Ltail63down: | ||
| /* Move up to 48 bytes of data. At this point we only need the | ||
| * bottom 6 bits of count to be accurate. */ | ||
| ands tmp1, count, #0x30 | ||
| b.eq .Ltail15down | ||
| add dst, dst, tmp1 | ||
| add src, src, tmp1 | ||
| cmp tmp1w, #0x20 | ||
| b.eq 1f | ||
| b.lt 2f | ||
| ldp A_l, A_h, [src, #-48] | ||
| stp A_l, A_h, [dst, #-48] | ||
| 1: | ||
| ldp A_l, A_h, [src, #-32] | ||
| stp A_l, A_h, [dst, #-32] | ||
| 2: | ||
| ldp A_l, A_h, [src, #-16] | ||
| stp A_l, A_h, [dst, #-16] | ||
| .Ltail15down: | ||
| /* Move up to 15 bytes of data. Does not assume additional data | ||
| being moved. */ | ||
| tbz count, #3, 1f | ||
| ldr tmp1, [src], #8 | ||
| str tmp1, [dst], #8 | ||
| 1: | ||
| tbz count, #2, 1f | ||
| ldr tmp1w, [src], #4 | ||
| str tmp1w, [dst], #4 | ||
| 1: | ||
| tbz count, #1, 1f | ||
| ldrh tmp1w, [src], #2 | ||
| strh tmp1w, [dst], #2 | ||
| 1: | ||
| tbz count, #0, 1f | ||
| ldrb tmp1w, [src] | ||
| strb tmp1w, [dst] | ||
| 1: | ||
| ret | ||
|
|
||
| .Lmov_not_short_down: | ||
| /* We don't much care about the alignment of DST, but we want SRC | ||
| * to be 128-bit (16 byte) aligned so that we don't cross cache line | ||
| * boundaries on both loads and stores. */ | ||
| neg tmp2, src | ||
| ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ | ||
| b.eq 2f | ||
| sub count, count, tmp2 | ||
| /* Move enough data to reach alignment; unlike memcpy, we have to | ||
| * be aware of the overlap, which means we can't move data twice. */ | ||
| tbz tmp2, #3, 1f | ||
| ldr tmp1, [src], #8 | ||
| str tmp1, [dst], #8 | ||
| 1: | ||
| tbz tmp2, #2, 1f | ||
| ldr tmp1w, [src], #4 | ||
| str tmp1w, [dst], #4 | ||
| 1: | ||
| tbz tmp2, #1, 1f | ||
| ldrh tmp1w, [src], #2 | ||
| strh tmp1w, [dst], #2 | ||
| 1: | ||
| tbz tmp2, #0, 1f | ||
| ldrb tmp1w, [src], #1 | ||
| strb tmp1w, [dst], #1 | ||
| 1: | ||
|
|
||
| /* There may be less than 63 bytes to go now. */ | ||
| cmp count, #63 | ||
| b.le .Ltail63down | ||
| 2: | ||
| subs count, count, #128 | ||
| b.ge .Lmov_body_large_down | ||
| /* Less than 128 bytes to move, so handle 64 here and then jump | ||
| * to the tail. */ | ||
| ldp A_l, A_h, [src] | ||
| ldp B_l, B_h, [src, #16] | ||
| ldp C_l, C_h, [src, #32] | ||
| ldp D_l, D_h, [src, #48] | ||
| stp A_l, A_h, [dst] | ||
| stp B_l, B_h, [dst, #16] | ||
| stp C_l, C_h, [dst, #32] | ||
| stp D_l, D_h, [dst, #48] | ||
| tst count, #0x3f | ||
| add src, src, #64 | ||
| add dst, dst, #64 | ||
| b.ne .Ltail63down | ||
| ret | ||
|
|
||
| /* Critical loop. Start at a new cache line boundary. Assuming | ||
| * 64 bytes per line this ensures the entire loop is in one line. */ | ||
| .p2align 6 | ||
| .Lmov_body_large_down: | ||
| /* There are at least 128 bytes to move. */ | ||
| ldp A_l, A_h, [src, #0] | ||
| sub dst, dst, #16 /* Pre-bias. */ | ||
| ldp B_l, B_h, [src, #16] | ||
| ldp C_l, C_h, [src, #32] | ||
| ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ | ||
| 1: | ||
| stp A_l, A_h, [dst, #16] | ||
| ldp A_l, A_h, [src, #16] | ||
| stp B_l, B_h, [dst, #32] | ||
| ldp B_l, B_h, [src, #32] | ||
| stp C_l, C_h, [dst, #48] | ||
| ldp C_l, C_h, [src, #48] | ||
| stp D_l, D_h, [dst, #64]! | ||
| ldp D_l, D_h, [src, #64]! | ||
| subs count, count, #64 | ||
| b.ge 1b | ||
| stp A_l, A_h, [dst, #16] | ||
| stp B_l, B_h, [dst, #32] | ||
| stp C_l, C_h, [dst, #48] | ||
| stp D_l, D_h, [dst, #64] | ||
| add src, src, #16 | ||
| add dst, dst, #64 + 16 | ||
| tst count, #0x3f | ||
| b.ne .Ltail63down | ||
| ret | ||
| #ifdef BCOPY | ||
| END(bcopy) | ||
| #elif defined(WMEMMOVE) | ||
| END(wmemmove) | ||
| #else | ||
| END(memmove) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,247 @@ | ||
| /* Copyright (c) 2012, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| * Unaligned accesses | ||
| * | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* By default we assume that the DC instruction can be used to zero | ||
| data blocks more efficiently. In some circumstances this might be | ||
| unsafe, for example in an asymmetric multiprocessor environment with | ||
| different DC clear lengths (neither the upper nor lower lengths are | ||
| safe to use). | ||
| If code may be run in a virtualized environment, then define | ||
| MAYBE_VIRT. This will cause the code to cache the system register | ||
| values rather than re-reading them each call. */ | ||
|
|
||
| #define dstin x0 | ||
| #ifdef BZERO | ||
| #define count x1 | ||
| #else | ||
| #define count x2 | ||
| #endif | ||
| #define val w1 | ||
| #define tmp1 x3 | ||
| #define tmp1w w3 | ||
| #define tmp2 x4 | ||
| #define tmp2w w4 | ||
| #define zva_len_x x5 | ||
| #define zva_len w5 | ||
| #define zva_bits_x x6 | ||
|
|
||
| #define A_l x7 | ||
| #define A_lw w7 | ||
| #define dst x8 | ||
| #define tmp3w w9 | ||
|
|
||
| #ifdef BZERO | ||
| ENTRY(bzero) | ||
| #else | ||
| ENTRY(memset) | ||
| #endif | ||
|
|
||
| mov dst, dstin /* Preserve return value. */ | ||
| #ifdef BZERO | ||
| b .Lzero_mem | ||
| #endif | ||
| ands A_lw, val, #255 | ||
| b.eq .Lzero_mem | ||
| orr A_lw, A_lw, A_lw, lsl #8 | ||
| orr A_lw, A_lw, A_lw, lsl #16 | ||
| orr A_l, A_l, A_l, lsl #32 | ||
| .Ltail_maybe_long: | ||
| cmp count, #64 | ||
| b.ge .Lnot_short | ||
| .Ltail_maybe_tiny: | ||
| cmp count, #15 | ||
| b.le .Ltail15tiny | ||
| .Ltail63: | ||
| ands tmp1, count, #0x30 | ||
| b.eq .Ltail15 | ||
| add dst, dst, tmp1 | ||
| cmp tmp1w, #0x20 | ||
| b.eq 1f | ||
| b.lt 2f | ||
| stp A_l, A_l, [dst, #-48] | ||
| 1: | ||
| stp A_l, A_l, [dst, #-32] | ||
| 2: | ||
| stp A_l, A_l, [dst, #-16] | ||
|
|
||
| .Ltail15: | ||
| and count, count, #15 | ||
| add dst, dst, count | ||
| stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ | ||
| ret | ||
|
|
||
| .Ltail15tiny: | ||
| /* Set up to 15 bytes. Does not assume earlier memory | ||
| being set. */ | ||
| tbz count, #3, 1f | ||
| str A_l, [dst], #8 | ||
| 1: | ||
| tbz count, #2, 1f | ||
| str A_lw, [dst], #4 | ||
| 1: | ||
| tbz count, #1, 1f | ||
| strh A_lw, [dst], #2 | ||
| 1: | ||
| tbz count, #0, 1f | ||
| strb A_lw, [dst] | ||
| 1: | ||
| ret | ||
|
|
||
| /* Critical loop. Start at a new cache line boundary. Assuming | ||
| * 64 bytes per line, this ensures the entire loop is in one line. */ | ||
| .p2align 6 | ||
| .Lnot_short: | ||
| neg tmp2, dst | ||
| ands tmp2, tmp2, #15 | ||
| b.eq 2f | ||
| /* Bring DST to 128-bit (16-byte) alignment. We know that there's | ||
| * more than that to set, so we simply store 16 bytes and advance by | ||
| * the amount required to reach alignment. */ | ||
| sub count, count, tmp2 | ||
| stp A_l, A_l, [dst] | ||
| add dst, dst, tmp2 | ||
| /* There may be less than 63 bytes to go now. */ | ||
| cmp count, #63 | ||
| b.le .Ltail63 | ||
| 2: | ||
| sub dst, dst, #16 /* Pre-bias. */ | ||
| sub count, count, #64 | ||
| 1: | ||
| stp A_l, A_l, [dst, #16] | ||
| stp A_l, A_l, [dst, #32] | ||
| stp A_l, A_l, [dst, #48] | ||
| stp A_l, A_l, [dst, #64]! | ||
| subs count, count, #64 | ||
| b.ge 1b | ||
| tst count, #0x3f | ||
| add dst, dst, #16 | ||
| b.ne .Ltail63 | ||
| ret | ||
|
|
||
| /* For zeroing memory, check to see if we can use the ZVA feature to | ||
| * zero entire 'cache' lines. */ | ||
| .Lzero_mem: | ||
| mov A_l, #0 | ||
| cmp count, #63 | ||
| b.le .Ltail_maybe_tiny | ||
| neg tmp2, dst | ||
| ands tmp2, tmp2, #15 | ||
| b.eq 1f | ||
| sub count, count, tmp2 | ||
| stp A_l, A_l, [dst] | ||
| add dst, dst, tmp2 | ||
| cmp count, #63 | ||
| b.le .Ltail63 | ||
| 1: | ||
| /* For zeroing small amounts of memory, it's not worth setting up | ||
| * the line-clear code. */ | ||
| cmp count, #128 | ||
| b.lt .Lnot_short | ||
| #ifdef MAYBE_VIRT | ||
| /* For efficiency when virtualized, we cache the ZVA capability. */ | ||
| adrp tmp2, .Lcache_clear | ||
| ldr zva_len, [tmp2, #:lo12:.Lcache_clear] | ||
| tbnz zva_len, #31, .Lnot_short | ||
| cbnz zva_len, .Lzero_by_line | ||
| mrs tmp1, dczid_el0 | ||
| tbz tmp1, #4, 1f | ||
| /* ZVA not available. Remember this for next time. */ | ||
| mov zva_len, #~0 | ||
| str zva_len, [tmp2, #:lo12:.Lcache_clear] | ||
| b .Lnot_short | ||
| 1: | ||
| mov tmp3w, #4 | ||
| and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ | ||
| lsl zva_len, tmp3w, zva_len | ||
| str zva_len, [tmp2, #:lo12:.Lcache_clear] | ||
| #else | ||
| mrs tmp1, dczid_el0 | ||
| tbnz tmp1, #4, .Lnot_short | ||
| mov tmp3w, #4 | ||
| and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ | ||
| lsl zva_len, tmp3w, zva_len | ||
| #endif | ||
|
|
||
| .Lzero_by_line: | ||
| /* Compute how far we need to go to become suitably aligned. We're | ||
| * already at quad-word alignment. */ | ||
| cmp count, zva_len_x | ||
| b.lt .Lnot_short /* Not enough to reach alignment. */ | ||
| sub zva_bits_x, zva_len_x, #1 | ||
| neg tmp2, dst | ||
| ands tmp2, tmp2, zva_bits_x | ||
| b.eq 1f /* Already aligned. */ | ||
| /* Not aligned, check that there's enough to copy after alignment. */ | ||
| sub tmp1, count, tmp2 | ||
| cmp tmp1, #64 | ||
| ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ | ||
| b.lt .Lnot_short | ||
| /* We know that there's at least 64 bytes to zero and that it's safe | ||
| * to overrun by 64 bytes. */ | ||
| mov count, tmp1 | ||
| 2: | ||
| stp A_l, A_l, [dst] | ||
| stp A_l, A_l, [dst, #16] | ||
| stp A_l, A_l, [dst, #32] | ||
| subs tmp2, tmp2, #64 | ||
| stp A_l, A_l, [dst, #48] | ||
| add dst, dst, #64 | ||
| b.ge 2b | ||
| /* We've overrun a bit, so adjust dst downwards. */ | ||
| add dst, dst, tmp2 | ||
| 1: | ||
| sub count, count, zva_len_x | ||
| 3: | ||
| dc zva, dst | ||
| add dst, dst, zva_len_x | ||
| subs count, count, zva_len_x | ||
| b.ge 3b | ||
| ands count, count, zva_bits_x | ||
| b.ne .Ltail_maybe_long | ||
| ret | ||
| #ifdef BZERO | ||
| END(bzero) | ||
| #else | ||
| END(memset) | ||
| #endif | ||
|
|
||
| #ifdef MAYBE_VIRT | ||
| .bss | ||
| .p2align 2 | ||
| .Lcache_clear: | ||
| .space 4 | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /* | ||
| * Copyright (C) 2014 The Android Open Source Project | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following conditions | ||
| * are met: | ||
| * * Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * * Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in | ||
| * the documentation and/or other materials provided with the | ||
| * distribution. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | ||
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | ||
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | ||
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| * SUCH DAMAGE. | ||
| */ | ||
| #define STPCPY | ||
| #include "string_copy.S" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| /* | ||
| strchr - find a character in a string | ||
| Copyright (c) 2014, ARM Limited | ||
| All rights Reserved. | ||
| Copyright (c) 2014, Linaro Ltd. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the company nor the names of its contributors | ||
| may be used to endorse or promote products derived from this | ||
| software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| * Neon Available. | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* Arguments and results. */ | ||
| #define srcin x0 | ||
| #define chrin w1 | ||
|
|
||
| #define result x0 | ||
|
|
||
| #define src x2 | ||
| #define tmp1 x3 | ||
| #define wtmp2 w4 | ||
| #define tmp3 x5 | ||
|
|
||
| #define vrepchr v0 | ||
| #define vdata1 v1 | ||
| #define vdata2 v2 | ||
| #define vhas_nul1 v3 | ||
| #define vhas_nul2 v4 | ||
| #define vhas_chr1 v5 | ||
| #define vhas_chr2 v6 | ||
| #define vrepmask_0 v7 | ||
| #define vrepmask_c v16 | ||
| #define vend1 v17 | ||
| #define vend2 v18 | ||
|
|
||
| /* Core algorithm. | ||
| For each 32-byte hunk we calculate a 64-bit syndrome value, with | ||
| two bits per byte (LSB is always in bits 0 and 1, for both big | ||
| and little-endian systems). For each tuple, bit 0 is set iff | ||
| the relevant byte matched the requested character; bit 1 is set | ||
| iff the relevant byte matched the NUL end of string (we trigger | ||
| off bit0 for the special case of looking for NUL). Since the bits | ||
| in the syndrome reflect exactly the order in which things occur | ||
| in the original string a count_trailing_zeros() operation will | ||
| identify exactly which byte is causing the termination, and why. */ | ||
|
|
||
| /* Locals and temporaries. */ | ||
|
|
||
| ENTRY(strchr) | ||
| /* Magic constant 0x40100401 to allow us to identify which lane | ||
| matches the requested byte. Magic constant 0x80200802 used | ||
| similarly for NUL termination. */ | ||
| mov wtmp2, #0x0401 | ||
| movk wtmp2, #0x4010, lsl #16 | ||
| dup vrepchr.16b, chrin | ||
| bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ | ||
| dup vrepmask_c.4s, wtmp2 | ||
| ands tmp1, srcin, #31 | ||
| add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ | ||
| b.eq .Lloop | ||
|
|
||
| /* Input string is not 32-byte aligned. Rather than forcing | ||
| the padding bytes to a safe value, we calculate the syndrome | ||
| for all the bytes, but then mask off those bits of the | ||
| syndrome that are related to the padding. */ | ||
| ld1 {vdata1.16b, vdata2.16b}, [src], #32 | ||
| neg tmp1, tmp1 | ||
| cmeq vhas_nul1.16b, vdata1.16b, #0 | ||
| cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b | ||
| cmeq vhas_nul2.16b, vdata2.16b, #0 | ||
| cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b | ||
| and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | ||
| and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | ||
| and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | ||
| and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | ||
| orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | ||
| orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | ||
| lsl tmp1, tmp1, #1 | ||
| addp vend1.16b, vend1.16b, vend2.16b // 256->128 | ||
| mov tmp3, #~0 | ||
| addp vend1.16b, vend1.16b, vend2.16b // 128->64 | ||
| lsr tmp1, tmp3, tmp1 | ||
|
|
||
| mov tmp3, vend1.d[0] | ||
| bic tmp1, tmp3, tmp1 // Mask padding bits. | ||
| cbnz tmp1, .Ltail | ||
|
|
||
| .Lloop: | ||
| ld1 {vdata1.16b, vdata2.16b}, [src], #32 | ||
| cmeq vhas_nul1.16b, vdata1.16b, #0 | ||
| cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b | ||
| cmeq vhas_nul2.16b, vdata2.16b, #0 | ||
| cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b | ||
| /* Use a fast check for the termination condition. */ | ||
| orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | ||
| orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | ||
| orr vend1.16b, vend1.16b, vend2.16b | ||
| addp vend1.2d, vend1.2d, vend1.2d | ||
| mov tmp1, vend1.d[0] | ||
| cbz tmp1, .Lloop | ||
|
|
||
| /* Termination condition found. Now need to establish exactly why | ||
| we terminated. */ | ||
| and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | ||
| and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | ||
| and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | ||
| and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | ||
| orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b | ||
| orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b | ||
| addp vend1.16b, vend1.16b, vend2.16b // 256->128 | ||
| addp vend1.16b, vend1.16b, vend2.16b // 128->64 | ||
|
|
||
| mov tmp1, vend1.d[0] | ||
| .Ltail: | ||
| /* Count the trailing zeros, by bit reversing... */ | ||
| rbit tmp1, tmp1 | ||
| /* Re-bias source. */ | ||
| sub src, src, #32 | ||
| clz tmp1, tmp1 /* And counting the leading zeros. */ | ||
| /* Tmp1 is even if the target charager was found first. Otherwise | ||
| we've found the end of string and we weren't looking for NUL. */ | ||
| tst tmp1, #1 | ||
| add result, src, tmp1, lsr #1 | ||
| csel result, result, xzr, eq | ||
| ret | ||
| END(strchr) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| /* Copyright (c) 2012, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| #define REP8_01 0x0101010101010101 | ||
| #define REP8_7f 0x7f7f7f7f7f7f7f7f | ||
| #define REP8_80 0x8080808080808080 | ||
|
|
||
| /* Parameters and result. */ | ||
| #define src1 x0 | ||
| #define src2 x1 | ||
| #define result x0 | ||
|
|
||
| /* Internal variables. */ | ||
| #define data1 x2 | ||
| #define data1w w2 | ||
| #define data2 x3 | ||
| #define data2w w3 | ||
| #define has_nul x4 | ||
| #define diff x5 | ||
| #define syndrome x6 | ||
| #define tmp1 x7 | ||
| #define tmp2 x8 | ||
| #define tmp3 x9 | ||
| #define zeroones x10 | ||
| #define pos x11 | ||
|
|
||
| /* Start of performance-critical section -- one 64B cache line. */ | ||
| ENTRY(strcmp) | ||
| eor tmp1, src1, src2 | ||
| mov zeroones, #REP8_01 | ||
| tst tmp1, #7 | ||
| b.ne .Lmisaligned8 | ||
| ands tmp1, src1, #7 | ||
| b.ne .Lmutual_align | ||
| /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 | ||
| (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | ||
| can be done in parallel across the entire word. */ | ||
| .Lloop_aligned: | ||
| ldr data1, [src1], #8 | ||
| ldr data2, [src2], #8 | ||
| .Lstart_realigned: | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| eor diff, data1, data2 /* Non-zero if differences found. */ | ||
| bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ | ||
| orr syndrome, diff, has_nul | ||
| cbz syndrome, .Lloop_aligned | ||
| /* End of performance-critical section -- one 64B cache line. */ | ||
|
|
||
| #ifndef __AARCH64EB__ | ||
| rev syndrome, syndrome | ||
| rev data1, data1 | ||
| /* The MS-non-zero bit of the syndrome marks either the first bit | ||
| that is different, or the top bit of the first zero byte. | ||
| Shifting left now will bring the critical information into the | ||
| top bits. */ | ||
| clz pos, syndrome | ||
| rev data2, data2 | ||
| lsl data1, data1, pos | ||
| lsl data2, data2, pos | ||
| /* But we need to zero-extend (char is unsigned) the value and then | ||
| perform a signed 32-bit subtraction. */ | ||
| lsr data1, data1, #56 | ||
| sub result, data1, data2, lsr #56 | ||
| ret | ||
| #else | ||
| /* For big-endian we cannot use the trick with the syndrome value | ||
| as carry-propagation can corrupt the upper bits if the trailing | ||
| bytes in the string contain 0x01. */ | ||
| /* However, if there is no NUL byte in the dword, we can generate | ||
| the result directly. We can't just subtract the bytes as the | ||
| MSB might be significant. */ | ||
| cbnz has_nul, 1f | ||
| cmp data1, data2 | ||
| cset result, ne | ||
| cneg result, result, lo | ||
| ret | ||
| 1: | ||
| /* Re-compute the NUL-byte detection, using a byte-reversed value. */ | ||
| rev tmp3, data1 | ||
| sub tmp1, tmp3, zeroones | ||
| orr tmp2, tmp3, #REP8_7f | ||
| bic has_nul, tmp1, tmp2 | ||
| rev has_nul, has_nul | ||
| orr syndrome, diff, has_nul | ||
| clz pos, syndrome | ||
| /* The MS-non-zero bit of the syndrome marks either the first bit | ||
| that is different, or the top bit of the first zero byte. | ||
| Shifting left now will bring the critical information into the | ||
| top bits. */ | ||
| lsl data1, data1, pos | ||
| lsl data2, data2, pos | ||
| /* But we need to zero-extend (char is unsigned) the value and then | ||
| perform a signed 32-bit subtraction. */ | ||
| lsr data1, data1, #56 | ||
| sub result, data1, data2, lsr #56 | ||
| ret | ||
| #endif | ||
|
|
||
| .Lmutual_align: | ||
| /* Sources are mutually aligned, but are not currently at an | ||
| alignment boundary. Round down the addresses and then mask off | ||
| the bytes that preceed the start point. */ | ||
| bic src1, src1, #7 | ||
| bic src2, src2, #7 | ||
| lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ | ||
| ldr data1, [src1], #8 | ||
| neg tmp1, tmp1 /* Bits to alignment -64. */ | ||
| ldr data2, [src2], #8 | ||
| mov tmp2, #~0 | ||
| #ifdef __AARCH64EB__ | ||
| /* Big-endian. Early bytes are at MSB. */ | ||
| lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | ||
| #else | ||
| /* Little-endian. Early bytes are at LSB. */ | ||
| lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | ||
| #endif | ||
| orr data1, data1, tmp2 | ||
| orr data2, data2, tmp2 | ||
| b .Lstart_realigned | ||
|
|
||
| .Lmisaligned8: | ||
| /* We can do better than this. */ | ||
| ldrb data1w, [src1], #1 | ||
| ldrb data2w, [src2], #1 | ||
| cmp data1w, #1 | ||
| ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ | ||
| b.eq .Lmisaligned8 | ||
| sub result, data1, data2 | ||
| ret | ||
| END(strcmp) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /* | ||
| * Copyright (C) 2014 The Android Open Source Project | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following conditions | ||
| * are met: | ||
| * * Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * * Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in | ||
| * the documentation and/or other materials provided with the | ||
| * distribution. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | ||
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | ||
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | ||
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| * SUCH DAMAGE. | ||
| */ | ||
| #define STRCPY | ||
| #include "string_copy.S" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,245 @@ | ||
| /* | ||
| * Copyright (C) 2014 The Android Open Source Project | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following conditions | ||
| * are met: | ||
| * * Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * * Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in | ||
| * the documentation and/or other materials provided with the | ||
| * distribution. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | ||
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | ||
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | ||
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| * SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* | ||
| Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| */ | ||
|
|
||
| #if !defined(STPCPY) && !defined(STRCPY) | ||
| #error "Either STPCPY or STRCPY must be defined." | ||
| #endif | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* Arguments and results. */ | ||
| #if defined(STPCPY) | ||
| #define dst x0 | ||
| #elif defined(STRCPY) | ||
| #define dstin x0 | ||
| #endif | ||
| #define src x1 | ||
|
|
||
| /* Locals and temporaries. */ | ||
| #if defined(STRCPY) | ||
| #define dst x2 | ||
| #endif | ||
| #define data1 x3 | ||
| #define data1_w w3 | ||
| #define data2 x4 | ||
| #define data2_w w4 | ||
| #define has_nul1 x5 | ||
| #define has_nul1_w w5 | ||
| #define has_nul2 x6 | ||
| #define tmp1 x7 | ||
| #define tmp2 x8 | ||
| #define tmp3 x9 | ||
| #define tmp4 x10 | ||
| #define zeroones x11 | ||
| #define zeroones_w w11 | ||
| #define pos x12 | ||
|
|
||
| #define REP8_01 0x0101010101010101 | ||
| #define REP8_7f 0x7f7f7f7f7f7f7f7f | ||
| #define REP8_80 0x8080808080808080 | ||
|
|
||
| #if defined(STPCPY) | ||
| ENTRY(stpcpy) | ||
| #elif defined(STRCPY) | ||
| ENTRY(strcpy) | ||
| #endif | ||
| mov zeroones, #REP8_01 | ||
| #if defined(STRCPY) | ||
| mov dst, dstin | ||
| #endif | ||
| ands tmp1, src, #15 | ||
| b.ne .Lmisaligned | ||
| // NUL detection works on the principle that (X - 1) & (~X) & 0x80 | ||
| // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | ||
| // can be done in parallel across the entire word. | ||
| // The inner loop deals with two Dwords at a time. This has a | ||
| // slightly higher start-up cost, but we should win quite quickly, | ||
| // especially on cores with a high number of issue slots per | ||
| // cycle, as we get much better parallelism out of the operations. | ||
| .Lloop: | ||
| ldp data1, data2, [src], #16 | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| bic has_nul1, tmp1, tmp2 | ||
| cbnz has_nul1, .Lnul_in_data1 | ||
| sub tmp3, data2, zeroones | ||
| orr tmp4, data2, #REP8_7f | ||
| bic has_nul2, tmp3, tmp4 | ||
| cbnz has_nul2, .Lnul_in_data2 | ||
| // No NUL in either register, copy it in a single instruction. | ||
| stp data1, data2, [dst], #16 | ||
| b .Lloop | ||
|
|
||
| .Lnul_in_data1: | ||
| rev has_nul1, has_nul1 | ||
| clz pos, has_nul1 | ||
| add tmp1, pos, #0x8 | ||
|
|
||
| tbz tmp1, #6, 1f | ||
| #if defined(STPCPY) | ||
| str data1, [dst], #7 | ||
| #elif defined(STRCPY) | ||
| str data1, [dst] | ||
| #endif | ||
| ret | ||
| 1: | ||
| tbz tmp1, #5, 1f | ||
| str data1_w, [dst], #4 | ||
| lsr data1, data1, #32 | ||
| 1: | ||
| tbz tmp1, #4, 1f | ||
| strh data1_w, [dst], #2 | ||
| lsr data1, data1, #16 | ||
| 1: | ||
| tbz tmp1, #3, 1f | ||
| strb data1_w, [dst] | ||
| #if defined(STPCPY) | ||
| ret | ||
| #endif | ||
| 1: | ||
| #if defined(STPCPY) | ||
| // Back up one so that dst points to the '\0' string terminator. | ||
| sub dst, dst, #1 | ||
| #endif | ||
| ret | ||
|
|
||
| .Lnul_in_data2: | ||
| str data1, [dst], #8 | ||
| rev has_nul2, has_nul2 | ||
| clz pos, has_nul2 | ||
| add tmp1, pos, #0x8 | ||
|
|
||
| tbz tmp1, #6, 1f | ||
| #if defined(STPCPY) | ||
| str data2, [dst], #7 | ||
| #elif defined(STRCPY) | ||
| str data2, [dst] | ||
| #endif | ||
| ret | ||
| 1: | ||
| tbz tmp1, #5, 1f | ||
| str data2_w, [dst], #4 | ||
| lsr data2, data2, #32 | ||
| 1: | ||
| tbz tmp1, #4, 1f | ||
| strh data2_w, [dst], #2 | ||
| lsr data2, data2, #16 | ||
| 1: | ||
| tbz tmp1, #3, 1f | ||
| strb data2_w, [dst] | ||
| #if defined(STPCPY) | ||
| ret | ||
| #endif | ||
| 1: | ||
| #if defined(STPCPY) | ||
| // Back up one so that dst points to the '\0' string terminator. | ||
| sub dst, dst, #1 | ||
| #endif | ||
| ret | ||
|
|
||
| .Lmisaligned: | ||
| tbz src, #0, 1f | ||
| ldrb data1_w, [src], #1 | ||
| strb data1_w, [dst], #1 | ||
| cbnz data1_w, 1f | ||
| #if defined(STPCPY) | ||
| // Back up one so that dst points to the '\0' string terminator. | ||
| sub dst, dst, #1 | ||
| #endif | ||
| ret | ||
| 1: | ||
| tbz src, #1, 1f | ||
| ldrb data1_w, [src], #1 | ||
| strb data1_w, [dst], #1 | ||
| cbz data1_w, .Ldone | ||
| ldrb data2_w, [src], #1 | ||
| strb data2_w, [dst], #1 | ||
| cbnz data2_w, 1f | ||
| .Ldone: | ||
| #if defined(STPCPY) | ||
| // Back up one so that dst points to the '\0' string terminator. | ||
| sub dst, dst, #1 | ||
| #endif | ||
| ret | ||
| 1: | ||
| tbz src, #2, 1f | ||
| ldr data1_w, [src], #4 | ||
| // Check for a zero. | ||
| sub has_nul1_w, data1_w, zeroones_w | ||
| bic has_nul1_w, has_nul1_w, data1_w | ||
| ands has_nul1_w, has_nul1_w, #0x80808080 | ||
| b.ne .Lnul_in_data1 | ||
| str data1_w, [dst], #4 | ||
| 1: | ||
| tbz src, #3, .Lloop | ||
| ldr data1, [src], #8 | ||
| // Check for a zero. | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| bics has_nul1, tmp1, tmp2 | ||
| b.ne .Lnul_in_data1 | ||
| str data1, [dst], #8 | ||
| b .Lloop | ||
| #if defined(STPCPY) | ||
| END(stpcpy) | ||
| #elif defined(STRCPY) | ||
| END(strcpy) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| /* Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* Arguments and results. */ | ||
| #define srcin x0 | ||
| #define len x0 | ||
|
|
||
| /* Locals and temporaries. */ | ||
| #define src x1 | ||
| #define data1 x2 | ||
| #define data2 x3 | ||
| #define data2a x4 | ||
| #define has_nul1 x5 | ||
| #define has_nul2 x6 | ||
| #define tmp1 x7 | ||
| #define tmp2 x8 | ||
| #define tmp3 x9 | ||
| #define tmp4 x10 | ||
| #define zeroones x11 | ||
| #define pos x12 | ||
|
|
||
| #define REP8_01 0x0101010101010101 | ||
| #define REP8_7f 0x7f7f7f7f7f7f7f7f | ||
| #define REP8_80 0x8080808080808080 | ||
|
|
||
| /* Start of critial section -- keep to one 64Byte cache line. */ | ||
| ENTRY(strlen) | ||
| mov zeroones, #REP8_01 | ||
| bic src, srcin, #15 | ||
| ands tmp1, srcin, #15 | ||
| b.ne .Lmisaligned | ||
| /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 | ||
| (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | ||
| can be done in parallel across the entire word. */ | ||
| /* The inner loop deals with two Dwords at a time. This has a | ||
| slightly higher start-up cost, but we should win quite quickly, | ||
| especially on cores with a high number of issue slots per | ||
| cycle, as we get much better parallelism out of the operations. */ | ||
| .Lloop: | ||
| ldp data1, data2, [src], #16 | ||
| .Lrealigned: | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| sub tmp3, data2, zeroones | ||
| orr tmp4, data2, #REP8_7f | ||
| bic has_nul1, tmp1, tmp2 | ||
| bics has_nul2, tmp3, tmp4 | ||
| ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ | ||
| b.eq .Lloop | ||
| /* End of critical section -- keep to one 64Byte cache line. */ | ||
|
|
||
| sub len, src, srcin | ||
| cbz has_nul1, .Lnul_in_data2 | ||
| #ifdef __AARCH64EB__ | ||
| mov data2, data1 | ||
| #endif | ||
| sub len, len, #8 | ||
| mov has_nul2, has_nul1 | ||
| .Lnul_in_data2: | ||
| #ifdef __AARCH64EB__ | ||
| /* For big-endian, carry propagation (if the final byte in the | ||
| string is 0x01) means we cannot use has_nul directly. The | ||
| easiest way to get the correct byte is to byte-swap the data | ||
| and calculate the syndrome a second time. */ | ||
| rev data2, data2 | ||
| sub tmp1, data2, zeroones | ||
| orr tmp2, data2, #REP8_7f | ||
| bic has_nul2, tmp1, tmp2 | ||
| #endif | ||
| sub len, len, #8 | ||
| rev has_nul2, has_nul2 | ||
| clz pos, has_nul2 | ||
| add len, len, pos, lsr #3 /* Bits to bytes. */ | ||
| ret | ||
|
|
||
| .Lmisaligned: | ||
| cmp tmp1, #8 | ||
| neg tmp1, tmp1 | ||
| ldp data1, data2, [src], #16 | ||
| lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ | ||
| mov tmp2, #~0 | ||
| #ifdef __AARCH64EB__ | ||
| /* Big-endian. Early bytes are at MSB. */ | ||
| lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | ||
| #else | ||
| /* Little-endian. Early bytes are at LSB. */ | ||
| lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ | ||
| #endif | ||
| orr data1, data1, tmp2 | ||
| orr data2a, data2, tmp2 | ||
| csinv data1, data1, xzr, le | ||
| csel data2, data2, data2a, le | ||
| b .Lrealigned | ||
|
|
||
| END(strlen) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,217 @@ | ||
| /* Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| #define REP8_01 0x0101010101010101 | ||
| #define REP8_7f 0x7f7f7f7f7f7f7f7f | ||
| #define REP8_80 0x8080808080808080 | ||
|
|
||
| /* Parameters and result. */ | ||
| #define src1 x0 | ||
| #define src2 x1 | ||
| #define limit x2 | ||
| #define result x0 | ||
|
|
||
| /* Internal variables. */ | ||
| #define data1 x3 | ||
| #define data1w w3 | ||
| #define data2 x4 | ||
| #define data2w w4 | ||
| #define has_nul x5 | ||
| #define diff x6 | ||
| #define syndrome x7 | ||
| #define tmp1 x8 | ||
| #define tmp2 x9 | ||
| #define tmp3 x10 | ||
| #define zeroones x11 | ||
| #define pos x12 | ||
| #define limit_wd x13 | ||
| #define mask x14 | ||
| #define endloop x15 | ||
|
|
||
| .text | ||
| .p2align 6 | ||
| .rep 7 | ||
| nop /* Pad so that the loop below fits a cache line. */ | ||
| .endr | ||
| ENTRY(strncmp) | ||
| cbz limit, .Lret0 | ||
| eor tmp1, src1, src2 | ||
| mov zeroones, #REP8_01 | ||
| tst tmp1, #7 | ||
| b.ne .Lmisaligned8 | ||
| ands tmp1, src1, #7 | ||
| b.ne .Lmutual_align | ||
| /* Calculate the number of full and partial words -1. */ | ||
| sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ | ||
| lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ | ||
|
|
||
| /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 | ||
| (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | ||
| can be done in parallel across the entire word. */ | ||
| /* Start of performance-critical section -- one 64B cache line. */ | ||
| .Lloop_aligned: | ||
| ldr data1, [src1], #8 | ||
| ldr data2, [src2], #8 | ||
| .Lstart_realigned: | ||
| subs limit_wd, limit_wd, #1 | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| eor diff, data1, data2 /* Non-zero if differences found. */ | ||
| csinv endloop, diff, xzr, pl /* Last Dword or differences. */ | ||
| bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ | ||
| ccmp endloop, #0, #0, eq | ||
| b.eq .Lloop_aligned | ||
| /* End of performance-critical section -- one 64B cache line. */ | ||
|
|
||
| /* Not reached the limit, must have found the end or a diff. */ | ||
| tbz limit_wd, #63, .Lnot_limit | ||
|
|
||
| /* Limit % 8 == 0 => all bytes significant. */ | ||
| ands limit, limit, #7 | ||
| b.eq .Lnot_limit | ||
|
|
||
| lsl limit, limit, #3 /* Bits -> bytes. */ | ||
| mov mask, #~0 | ||
| #ifdef __AARCH64EB__ | ||
| lsr mask, mask, limit | ||
| #else | ||
| lsl mask, mask, limit | ||
| #endif | ||
| bic data1, data1, mask | ||
| bic data2, data2, mask | ||
|
|
||
| /* Make sure that the NUL byte is marked in the syndrome. */ | ||
| orr has_nul, has_nul, mask | ||
|
|
||
| .Lnot_limit: | ||
| orr syndrome, diff, has_nul | ||
|
|
||
| #ifndef __AARCH64EB__ | ||
| rev syndrome, syndrome | ||
| rev data1, data1 | ||
| /* The MS-non-zero bit of the syndrome marks either the first bit | ||
| that is different, or the top bit of the first zero byte. | ||
| Shifting left now will bring the critical information into the | ||
| top bits. */ | ||
| clz pos, syndrome | ||
| rev data2, data2 | ||
| lsl data1, data1, pos | ||
| lsl data2, data2, pos | ||
| /* But we need to zero-extend (char is unsigned) the value and then | ||
| perform a signed 32-bit subtraction. */ | ||
| lsr data1, data1, #56 | ||
| sub result, data1, data2, lsr #56 | ||
| ret | ||
| #else | ||
| /* For big-endian we cannot use the trick with the syndrome value | ||
| as carry-propagation can corrupt the upper bits if the trailing | ||
| bytes in the string contain 0x01. */ | ||
| /* However, if there is no NUL byte in the dword, we can generate | ||
| the result directly. We can't just subtract the bytes as the | ||
| MSB might be significant. */ | ||
| cbnz has_nul, 1f | ||
| cmp data1, data2 | ||
| cset result, ne | ||
| cneg result, result, lo | ||
| ret | ||
| 1: | ||
| /* Re-compute the NUL-byte detection, using a byte-reversed value. */ | ||
| rev tmp3, data1 | ||
| sub tmp1, tmp3, zeroones | ||
| orr tmp2, tmp3, #REP8_7f | ||
| bic has_nul, tmp1, tmp2 | ||
| rev has_nul, has_nul | ||
| orr syndrome, diff, has_nul | ||
| clz pos, syndrome | ||
| /* The MS-non-zero bit of the syndrome marks either the first bit | ||
| that is different, or the top bit of the first zero byte. | ||
| Shifting left now will bring the critical information into the | ||
| top bits. */ | ||
| lsl data1, data1, pos | ||
| lsl data2, data2, pos | ||
| /* But we need to zero-extend (char is unsigned) the value and then | ||
| perform a signed 32-bit subtraction. */ | ||
| lsr data1, data1, #56 | ||
| sub result, data1, data2, lsr #56 | ||
| ret | ||
| #endif | ||
|
|
||
| .Lmutual_align: | ||
| /* Sources are mutually aligned, but are not currently at an | ||
| alignment boundary. Round down the addresses and then mask off | ||
| the bytes that precede the start point. | ||
| We also need to adjust the limit calculations, but without | ||
| overflowing if the limit is near ULONG_MAX. */ | ||
| bic src1, src1, #7 | ||
| bic src2, src2, #7 | ||
| ldr data1, [src1], #8 | ||
| neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ | ||
| ldr data2, [src2], #8 | ||
| mov tmp2, #~0 | ||
| sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ | ||
| #ifdef __AARCH64EB__ | ||
| /* Big-endian. Early bytes are at MSB. */ | ||
| lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ | ||
| #else | ||
| /* Little-endian. Early bytes are at LSB. */ | ||
| lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ | ||
| #endif | ||
| and tmp3, limit_wd, #7 | ||
| lsr limit_wd, limit_wd, #3 | ||
| /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ | ||
| add limit, limit, tmp1 | ||
| add tmp3, tmp3, tmp1 | ||
| orr data1, data1, tmp2 | ||
| orr data2, data2, tmp2 | ||
| add limit_wd, limit_wd, tmp3, lsr #3 | ||
| b .Lstart_realigned | ||
|
|
||
| .Lret0: | ||
| mov result, #0 | ||
| ret | ||
|
|
||
| .p2align 6 | ||
| .Lmisaligned8: | ||
| sub limit, limit, #1 | ||
| 1: | ||
| /* Perhaps we can do better than this. */ | ||
| ldrb data1w, [src1], #1 | ||
| ldrb data2w, [src2], #1 | ||
| subs limit, limit, #1 | ||
| ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */ | ||
| ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ | ||
| b.eq 1b | ||
| sub result, data1, data2 | ||
| ret | ||
| END(strncmp) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,174 @@ | ||
| /* Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| /* Assumptions: | ||
| * | ||
| * ARMv8-a, AArch64 | ||
| */ | ||
|
|
||
| #include <private/bionic_asm.h> | ||
|
|
||
| /* Arguments and results. */ | ||
| #define srcin x0 | ||
| #define len x0 | ||
| #define limit x1 | ||
|
|
||
| /* Locals and temporaries. */ | ||
| #define src x2 | ||
| #define data1 x3 | ||
| #define data2 x4 | ||
| #define data2a x5 | ||
| #define has_nul1 x6 | ||
| #define has_nul2 x7 | ||
| #define tmp1 x8 | ||
| #define tmp2 x9 | ||
| #define tmp3 x10 | ||
| #define tmp4 x11 | ||
| #define zeroones x12 | ||
| #define pos x13 | ||
| #define limit_wd x14 | ||
|
|
||
| #define REP8_01 0x0101010101010101 | ||
| #define REP8_7f 0x7f7f7f7f7f7f7f7f | ||
| #define REP8_80 0x8080808080808080 | ||
|
|
||
| .text | ||
| .p2align 6 | ||
| .Lstart: | ||
| /* Pre-pad to ensure critical loop begins an icache line. */ | ||
| .rep 7 | ||
| nop | ||
| .endr | ||
| /* Put this code here to avoid wasting more space with pre-padding. */ | ||
| .Lhit_limit: | ||
| mov len, limit | ||
| ret | ||
|
|
||
| ENTRY(strnlen) | ||
| cbz limit, .Lhit_limit | ||
| mov zeroones, #REP8_01 | ||
| bic src, srcin, #15 | ||
| ands tmp1, srcin, #15 | ||
| b.ne .Lmisaligned | ||
| /* Calculate the number of full and partial words -1. */ | ||
| sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ | ||
| lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ | ||
|
|
||
| /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 | ||
| (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | ||
| can be done in parallel across the entire word. */ | ||
| /* The inner loop deals with two Dwords at a time. This has a | ||
| slightly higher start-up cost, but we should win quite quickly, | ||
| especially on cores with a high number of issue slots per | ||
| cycle, as we get much better parallelism out of the operations. */ | ||
|
|
||
| /* Start of critial section -- keep to one 64Byte cache line. */ | ||
| .Lloop: | ||
| ldp data1, data2, [src], #16 | ||
| .Lrealigned: | ||
| sub tmp1, data1, zeroones | ||
| orr tmp2, data1, #REP8_7f | ||
| sub tmp3, data2, zeroones | ||
| orr tmp4, data2, #REP8_7f | ||
| bic has_nul1, tmp1, tmp2 | ||
| bic has_nul2, tmp3, tmp4 | ||
| subs limit_wd, limit_wd, #1 | ||
| orr tmp1, has_nul1, has_nul2 | ||
| ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ | ||
| b.eq .Lloop | ||
| /* End of critical section -- keep to one 64Byte cache line. */ | ||
|
|
||
| orr tmp1, has_nul1, has_nul2 | ||
| cbz tmp1, .Lhit_limit /* No null in final Qword. */ | ||
|
|
||
| /* We know there's a null in the final Qword. The easiest thing | ||
| to do now is work out the length of the string and return | ||
| MIN (len, limit). */ | ||
|
|
||
| sub len, src, srcin | ||
| cbz has_nul1, .Lnul_in_data2 | ||
| #ifdef __AARCH64EB__ | ||
| mov data2, data1 | ||
| #endif | ||
| sub len, len, #8 | ||
| mov has_nul2, has_nul1 | ||
| .Lnul_in_data2: | ||
| #ifdef __AARCH64EB__ | ||
| /* For big-endian, carry propagation (if the final byte in the | ||
| string is 0x01) means we cannot use has_nul directly. The | ||
| easiest way to get the correct byte is to byte-swap the data | ||
| and calculate the syndrome a second time. */ | ||
| rev data2, data2 | ||
| sub tmp1, data2, zeroones | ||
| orr tmp2, data2, #REP8_7f | ||
| bic has_nul2, tmp1, tmp2 | ||
| #endif | ||
| sub len, len, #8 | ||
| rev has_nul2, has_nul2 | ||
| clz pos, has_nul2 | ||
| add len, len, pos, lsr #3 /* Bits to bytes. */ | ||
| cmp len, limit | ||
| csel len, len, limit, ls /* Return the lower value. */ | ||
| ret | ||
|
|
||
| .Lmisaligned: | ||
| /* Deal with a partial first word. | ||
| We're doing two things in parallel here; | ||
| 1) Calculate the number of words (but avoiding overflow if | ||
| limit is near ULONG_MAX) - to do this we need to work out | ||
| limit + tmp1 - 1 as a 65-bit value before shifting it; | ||
| 2) Load and mask the initial data words - we force the bytes | ||
| before the ones we are interested in to 0xff - this ensures | ||
| early bytes will not hit any zero detection. */ | ||
| sub limit_wd, limit, #1 | ||
| neg tmp4, tmp1 | ||
| cmp tmp1, #8 | ||
|
|
||
| and tmp3, limit_wd, #15 | ||
| lsr limit_wd, limit_wd, #4 | ||
| mov tmp2, #~0 | ||
|
|
||
| ldp data1, data2, [src], #16 | ||
| lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ | ||
| add tmp3, tmp3, tmp1 | ||
|
|
||
| #ifdef __AARCH64EB__ | ||
| /* Big-endian. Early bytes are at MSB. */ | ||
| lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ | ||
| #else | ||
| /* Little-endian. Early bytes are at LSB. */ | ||
| lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ | ||
| #endif | ||
| add limit_wd, limit_wd, tmp3, lsr #4 | ||
|
|
||
| orr data1, data1, tmp2 | ||
| orr data2a, data2, tmp2 | ||
|
|
||
| csinv data1, data1, xzr, le | ||
| csel data2, data2, data2a, le | ||
| b .Lrealigned | ||
| END(strnlen) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* Copyright (c) 2014, Linaro Limited | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| * Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| * Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| * Neither the name of the Linaro nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| */ | ||
|
|
||
| #define WMEMMOVE | ||
| #include "memmove.S" | ||
| #undef WMEMMOVE |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| libc_bionic_src_files_arm64 += \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/memchr.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/memcmp.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/memcpy.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/memmove.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/memset.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/stpcpy.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strchr.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strcmp.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strcpy.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strlen.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strncmp.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/strnlen.S \ | ||
| arch-arm64/cortex-a72.cortex-a53/bionic/wmemmove.S |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,256 @@ | ||
| /* | ||
| ** Copyright 2014, The Android Open Source Project | ||
| ** | ||
| ** Licensed under the Apache License, Version 2.0 (the "License"); | ||
| ** you may not use this file except in compliance with the License. | ||
| ** You may obtain a copy of the License at | ||
| ** | ||
| ** http://www.apache.org/licenses/LICENSE-2.0 | ||
| ** | ||
| ** Unless required by applicable law or agreed to in writing, software | ||
| ** distributed under the License is distributed on an "AS IS" BASIS, | ||
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| ** See the License for the specific language governing permissions and | ||
| ** limitations under the License. | ||
| */ | ||
|
|
||
| #include <ctype.h> | ||
| #include <pthread.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_ | ||
| #include <sys/_system_properties.h> | ||
|
|
||
| #include <android/log.h> | ||
|
|
||
|
|
||
| static pthread_mutex_t lock_loggable = PTHREAD_MUTEX_INITIALIZER; | ||
|
|
||
| static int lock() | ||
| { | ||
| /* | ||
| * If we trigger a signal handler in the middle of locked activity and the | ||
| * signal handler logs a message, we could get into a deadlock state. | ||
| */ | ||
| /* | ||
| * Any contention, and we can turn around and use the non-cached method | ||
| * in less time than the system call associated with a mutex to deal with | ||
| * the contention. | ||
| */ | ||
| return pthread_mutex_trylock(&lock_loggable); | ||
| } | ||
|
|
||
| static void unlock() | ||
| { | ||
| pthread_mutex_unlock(&lock_loggable); | ||
| } | ||
|
|
||
| struct cache { | ||
| const prop_info *pinfo; | ||
| uint32_t serial; | ||
| unsigned char c; | ||
| }; | ||
|
|
||
| static int check_cache(struct cache *cache) | ||
| { | ||
| return cache->pinfo | ||
| && __system_property_serial(cache->pinfo) != cache->serial; | ||
| } | ||
|
|
||
| #define BOOLEAN_TRUE 0xFF | ||
| #define BOOLEAN_FALSE 0xFE | ||
|
|
||
| static void refresh_cache(struct cache *cache, const char *key) | ||
| { | ||
| char buf[PROP_VALUE_MAX]; | ||
|
|
||
| if (!cache->pinfo) { | ||
| cache->pinfo = __system_property_find(key); | ||
| if (!cache->pinfo) { | ||
| return; | ||
| } | ||
| } | ||
| cache->serial = __system_property_serial(cache->pinfo); | ||
| __system_property_read(cache->pinfo, 0, buf); | ||
| switch(buf[0]) { | ||
| case 't': case 'T': | ||
| cache->c = strcasecmp(buf + 1, "rue") ? buf[0] : BOOLEAN_TRUE; | ||
| break; | ||
| case 'f': case 'F': | ||
| cache->c = strcasecmp(buf + 1, "alse") ? buf[0] : BOOLEAN_FALSE; | ||
| break; | ||
| default: | ||
| cache->c = buf[0]; | ||
| } | ||
| } | ||
|
|
||
| static int __android_log_level(const char *tag, int default_prio) | ||
| { | ||
| /* sizeof() is used on this array below */ | ||
| static const char log_namespace[] = "persist.log.tag."; | ||
| static const size_t base_offset = 8; /* skip "persist." */ | ||
| /* calculate the size of our key temporary buffer */ | ||
| const size_t taglen = (tag && *tag) ? strlen(tag) : 0; | ||
| /* sizeof(log_namespace) = strlen(log_namespace) + 1 */ | ||
| char key[sizeof(log_namespace) + taglen]; /* may be > PROPERTY_KEY_MAX */ | ||
| char *kp; | ||
| size_t i; | ||
| char c = 0; | ||
| /* | ||
| * Single layer cache of four properties. Priorities are: | ||
| * log.tag.<tag> | ||
| * persist.log.tag.<tag> | ||
| * log.tag | ||
| * persist.log.tag | ||
| * Where the missing tag matches all tags and becomes the | ||
| * system global default. We do not support ro.log.tag* . | ||
| */ | ||
| static char last_tag[PROP_NAME_MAX]; | ||
| static uint32_t global_serial; | ||
| /* some compilers erroneously see uninitialized use. !not_locked */ | ||
| uint32_t current_global_serial = 0; | ||
| static struct cache tag_cache[2]; | ||
| static struct cache global_cache[2]; | ||
| int change_detected; | ||
| int global_change_detected; | ||
| int not_locked; | ||
|
|
||
| strcpy(key, log_namespace); | ||
|
|
||
| global_change_detected = change_detected = not_locked = lock(); | ||
|
|
||
| if (!not_locked) { | ||
| /* | ||
| * check all known serial numbers to changes. | ||
| */ | ||
| for (i = 0; i < (sizeof(tag_cache) / sizeof(tag_cache[0])); ++i) { | ||
| if (check_cache(&tag_cache[i])) { | ||
| change_detected = 1; | ||
| } | ||
| } | ||
| for (i = 0; i < (sizeof(global_cache) / sizeof(global_cache[0])); ++i) { | ||
| if (check_cache(&global_cache[i])) { | ||
| global_change_detected = 1; | ||
| } | ||
| } | ||
|
|
||
| current_global_serial = __system_property_area_serial(); | ||
| if (current_global_serial != global_serial) { | ||
| change_detected = 1; | ||
| global_change_detected = 1; | ||
| } | ||
| } | ||
|
|
||
| if (taglen) { | ||
| int local_change_detected = change_detected; | ||
| if (!not_locked) { | ||
| if (!last_tag[0] | ||
| || (last_tag[0] != tag[0]) | ||
| || strncmp(last_tag + 1, tag + 1, sizeof(last_tag) - 1)) { | ||
| /* invalidate log.tag.<tag> cache */ | ||
| for (i = 0; i < (sizeof(tag_cache) / sizeof(tag_cache[0])); ++i) { | ||
| tag_cache[i].pinfo = NULL; | ||
| tag_cache[i].c = '\0'; | ||
| } | ||
| last_tag[0] = '\0'; | ||
| local_change_detected = 1; | ||
| } | ||
| if (!last_tag[0]) { | ||
| strncpy(last_tag, tag, sizeof(last_tag)); | ||
| } | ||
| } | ||
| strcpy(key + sizeof(log_namespace) - 1, tag); | ||
|
|
||
| /* kp = key; */ | ||
| kp = key + base_offset; | ||
| for (i = 0; i < (sizeof(tag_cache) / sizeof(tag_cache[0])); ++i) { | ||
| struct cache *cache = &tag_cache[i]; | ||
| struct cache temp_cache; | ||
|
|
||
| if (not_locked) { | ||
| temp_cache.pinfo = NULL; | ||
| temp_cache.c = '\0'; | ||
| cache = &temp_cache; | ||
| } | ||
| if (local_change_detected) { | ||
| refresh_cache(cache, kp); | ||
| } | ||
|
|
||
| if (cache->c) { | ||
| c = cache->c; | ||
| break; | ||
| } | ||
| kp = key; | ||
| /* kp = key + base_offset; */ | ||
| } | ||
| } | ||
|
|
||
| switch (toupper(c)) { /* if invalid, resort to global */ | ||
| case 'V': | ||
| case 'D': | ||
| case 'I': | ||
| case 'W': | ||
| case 'E': | ||
| case 'F': /* Not officially supported */ | ||
| case 'A': | ||
| case 'S': | ||
| case BOOLEAN_FALSE: /* Not officially supported */ | ||
| break; | ||
| default: | ||
| /* clear '.' after log.tag */ | ||
| key[sizeof(log_namespace) - 2] = '\0'; | ||
|
|
||
| /* kp = key; */ | ||
| kp = key + base_offset; | ||
| for (i = 0; i < (sizeof(global_cache) / sizeof(global_cache[0])); ++i) { | ||
| struct cache *cache = &global_cache[i]; | ||
| struct cache temp_cache; | ||
|
|
||
| if (not_locked) { | ||
| temp_cache = *cache; | ||
| if (temp_cache.pinfo != cache->pinfo) { /* check atomic */ | ||
| temp_cache.pinfo = NULL; | ||
| temp_cache.c = '\0'; | ||
| } | ||
| cache = &temp_cache; | ||
| } | ||
| if (global_change_detected) { | ||
| refresh_cache(cache, kp); | ||
| } | ||
|
|
||
| if (cache->c) { | ||
| c = cache->c; | ||
| break; | ||
| } | ||
| kp = key; | ||
| /* kp = key + base_offset; */ | ||
| } | ||
| break; | ||
| } | ||
|
|
||
| if (!not_locked) { | ||
| global_serial = current_global_serial; | ||
| unlock(); | ||
| } | ||
|
|
||
| switch (toupper(c)) { | ||
| case 'V': return ANDROID_LOG_VERBOSE; | ||
| case 'D': return ANDROID_LOG_DEBUG; | ||
| case 'I': return ANDROID_LOG_INFO; | ||
| case 'W': return ANDROID_LOG_WARN; | ||
| case 'E': return ANDROID_LOG_ERROR; | ||
| case 'F': /* FALLTHRU */ /* Not officially supported */ | ||
| case 'A': return ANDROID_LOG_FATAL; | ||
| case BOOLEAN_FALSE: /* FALLTHRU */ /* Not Officially supported */ | ||
| case 'S': return -1; /* ANDROID_LOG_SUPPRESS */ | ||
| } | ||
| return default_prio; | ||
| } | ||
|
|
||
|
|
||
|
|
||
| int __android_log_is_loggable(int prio, const char *tag, int def) | ||
| { | ||
| int logLevel = __android_log_level(tag, def); | ||
| return logLevel >= 0 && prio >= logLevel; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /* | ||
| * Copyright (C) 2015 MediaTek Inc. | ||
| * Modification based on code covered by the mentioned copyright | ||
| * and/or permission notice(s). | ||
| */ | ||
| /* | ||
| * Copyright (C) 2008 The Android Open Source Project | ||
| * All rights reserved. | ||
| * | ||
| * Redistribution and use in source and binary forms, with or without | ||
| * modification, are permitted provided that the following conditions | ||
| * are met: | ||
| * * Redistributions of source code must retain the above copyright | ||
| * notice, this list of conditions and the following disclaimer. | ||
| * * Redistributions in binary form must reproduce the above copyright | ||
| * notice, this list of conditions and the following disclaimer in | ||
| * the documentation and/or other materials provided with the | ||
| * distribution. | ||
| * | ||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | ||
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | ||
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | ||
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| * SUCH DAMAGE. | ||
| */ | ||
|
|
||
| #include <errno.h> | ||
| #include <stdint.h> | ||
| #include <sys/mman.h> | ||
| #include <unistd.h> | ||
|
|
||
| #include "private/bionic_macros.h" | ||
| #include "private/ErrnoRestorer.h" | ||
|
|
||
| extern "C" void* _mmap_arm64_internal(void*, size_t, int, int, int, off64_t); | ||
|
|
||
| static bool kernel_has_MADV_MERGEABLE = true; | ||
|
|
||
| void* mmap64(void* addr, size_t size, int prot, int flags, int fd, off64_t offset) { | ||
| if (offset < 0) { | ||
| errno = EINVAL; | ||
| return MAP_FAILED; | ||
| } | ||
|
|
||
| // prevent allocations large enough for `end - start` to overflow | ||
| size_t rounded = BIONIC_ALIGN(size, PAGE_SIZE); | ||
| if (rounded < size || rounded > PTRDIFF_MAX) { | ||
| errno = ENOMEM; | ||
| return MAP_FAILED; | ||
| } | ||
|
|
||
| bool is_private_anonymous = | ||
| (flags & (MAP_PRIVATE | MAP_ANONYMOUS)) == (MAP_PRIVATE | MAP_ANONYMOUS); | ||
| bool is_stack_or_grows_down = (flags & (MAP_STACK | MAP_GROWSDOWN)) != 0; | ||
|
|
||
| void* result = _mmap_arm64_internal(addr, size, prot, flags, fd, offset); | ||
|
|
||
| if (result != MAP_FAILED && kernel_has_MADV_MERGEABLE && | ||
| is_private_anonymous && !is_stack_or_grows_down) { | ||
| ErrnoRestorer errno_restorer; | ||
| int rc = madvise(result, size, MADV_MERGEABLE); | ||
| if (rc == -1 && errno == EINVAL) { | ||
| kernel_has_MADV_MERGEABLE = false; | ||
| } | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) { | ||
| return mmap64(addr, size, prot, flags, fd, static_cast<off64_t>(offset)); | ||
| } |