4
4
#include <stddef.h>
5
5
6
6
typedef unsigned (* my_crc32_t )(unsigned , const void * , size_t );
7
+ unsigned crc32_aarch64 (unsigned , const void * , size_t );
7
8
8
9
#ifdef HAVE_ARMV8_CRC
9
10
10
- #ifdef _WIN32
11
- #include <windows.h>
11
+ # ifdef HAVE_ARMV8_CRYPTO
12
+ static unsigned crc32c_aarch64_pmull (unsigned , const void * , size_t );
13
+ # endif
14
+
15
+ # ifdef _WIN32
16
+ # include <windows.h>
17
+ # ifdef __clang__
18
+ # include <arm_acle.h>
19
+ # include <arm_neon.h>
20
+ # endif
12
21
int crc32_aarch64_available (void )
13
22
{
14
23
return IsProcessorFeaturePresent (PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE );
15
24
}
16
25
17
- const char * crc32c_aarch64_available (void )
26
+ unsigned crc32c_aarch64 (unsigned , const void * , size_t );
27
+
28
+ my_crc32_t crc32c_aarch64_available (void )
18
29
{
19
30
if (crc32_aarch64_available () == 0 )
20
31
return NULL ;
21
32
/* TODO : pmull seems supported, but does not compile*/
22
- return "Using ARMv8 crc32 instructions" ;
33
+ return crc32c_aarch64 ;
23
34
}
24
- #endif /* _WIN32 */
25
35
26
- #ifdef HAVE_ARMV8_CRYPTO
27
- static unsigned crc32c_aarch64_pmull (unsigned , const void * , size_t );
28
- # endif
29
-
30
- # ifdef __APPLE__
31
- # include <sys/sysctl.h>
36
+ # else /* _WIN32 */
37
+ # ifdef __APPLE__
38
+ # include <sys/sysctl.h>
32
39
33
40
int crc32_aarch64_available (void )
34
41
{
@@ -41,34 +48,34 @@ int crc32_aarch64_available(void)
41
48
42
49
my_crc32_t crc32c_aarch64_available (void )
43
50
{
44
- # ifdef HAVE_ARMV8_CRYPTO
51
+ # ifdef HAVE_ARMV8_CRYPTO
45
52
if (crc32_aarch64_available ())
46
53
return crc32c_aarch64_pmull ;
47
- # endif
54
+ # endif
48
55
return NULL ;
49
56
}
50
57
51
- # else /* __APPLE__ */
52
- # include <sys/auxv.h>
53
- # ifdef __FreeBSD__
58
+ # else /* __APPLE__ */
59
+ # include <sys/auxv.h>
60
+ # ifdef __FreeBSD__
54
61
static unsigned long getauxval (unsigned int key )
55
62
{
56
63
unsigned long val ;
57
64
if (elf_aux_info (key , (void * )& val , (int )sizeof (val ) != 0 ))
58
65
return 0ul ;
59
66
return val ;
60
67
}
61
- # else
62
- # include <asm/hwcap.h>
63
- # endif
68
+ # else
69
+ # include <asm/hwcap.h>
70
+ # endif
64
71
65
- # ifndef HWCAP_CRC32
66
- # define HWCAP_CRC32 (1 << 7)
67
- # endif
72
+ # ifndef HWCAP_CRC32
73
+ # define HWCAP_CRC32 (1 << 7)
74
+ # endif
68
75
69
- # ifndef HWCAP_PMULL
70
- # define HWCAP_PMULL (1 << 4)
71
- # endif
76
+ # ifndef HWCAP_PMULL
77
+ # define HWCAP_PMULL (1 << 4)
78
+ # endif
72
79
73
80
/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A
74
81
* Runtime check API.
@@ -78,24 +85,25 @@ int crc32_aarch64_available(void)
78
85
unsigned long auxv = getauxval (AT_HWCAP );
79
86
return (auxv & HWCAP_CRC32 ) != 0 ;
80
87
}
81
- # endif /* __APPLE__ */
88
+ # endif /* __APPLE__ */
82
89
83
- # ifndef __APPLE__
90
+ # ifndef __APPLE__
84
91
static unsigned crc32c_aarch64 (unsigned , const void * , size_t );
85
92
86
93
my_crc32_t crc32c_aarch64_available (void )
87
94
{
88
95
unsigned long auxv = getauxval (AT_HWCAP );
89
96
if (!(auxv & HWCAP_CRC32 ))
90
97
return NULL ;
91
- # ifdef HAVE_ARMV8_CRYPTO
98
+ # ifdef HAVE_ARMV8_CRYPTO
92
99
/* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). */
93
100
if (auxv & HWCAP_PMULL )
94
101
return crc32c_aarch64_pmull ;
95
- # endif
102
+ # endif
96
103
return crc32c_aarch64 ;
97
104
}
98
- # endif /* __APPLE__ */
105
+ # endif /* __APPLE__ */
106
+ # endif /* _WIN32 */
99
107
100
108
const char * crc32c_aarch64_impl (my_crc32_t c )
101
109
{
@@ -370,7 +378,7 @@ static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
370
378
/* There are multiple approaches to calculate crc.
371
379
Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes
372
380
Approach-2: Process 8 bytes and remaining workload using 1 bytes
373
- Apporach -3: Process 64 bytes at once by issuing 8 crc call and remaining
381
+ Approach -3: Process 64 bytes at once by issuing 8 crc call and remaining
374
382
using 8/1 combination.
375
383
376
384
Based on micro-benchmark testing we found that Approach-2 works best especially
0 commit comments