@@ -29,7 +29,9 @@ my_crc32_t crc32c_aarch64_available(void)
29
29
{
30
30
if (crc32_aarch64_available () == 0 )
31
31
return NULL ;
32
- /* TODO : pmull seems supported, but does not compile*/
32
+
33
+ if (IsProcessorFeaturePresent (PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE ))
34
+ return crc32c_aarch64_pmull ;
33
35
return crc32c_aarch64 ;
34
36
}
35
37
@@ -181,23 +183,40 @@ asm(".arch_extension crypto");
181
183
CRC32C3X8(buffer, ((ITR) * 7 + 6)) \
182
184
} while(0)
183
185
186
+ #if defined _MSC_VER && !defined __clang__
187
+ #define PREF4X64L1 (buffer , offset , itr )\
188
+ __prefetch(buffer + (offset) + ((itr) + 0)*64);\
189
+ __prefetch(buffer + (offset) + ((itr) + 1)*64);\
190
+ __prefetch(buffer + (offset) + ((itr) + 2)*64);\
191
+ __prefetch(buffer + (offset) + ((itr) + 3)*64);
192
+ #else
184
193
#define PREF4X64L1 (buffer , PREF_OFFSET , ITR ) \
185
194
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
186
195
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
187
196
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
188
197
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
198
+ #endif
189
199
190
200
#define PREF1KL1 (buffer , PREF_OFFSET ) \
191
201
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
192
202
PREF4X64L1(buffer,(PREF_OFFSET), 4) \
193
203
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
194
204
PREF4X64L1(buffer,(PREF_OFFSET), 12)
195
205
206
+ #if defined _MSC_VER && !defined __clang__
207
+ #define MY_PLDL2KEEP 2 /* PLDL2KEEP is 2 in ARMv8 */
208
+ #define PREF4X64L2 (buffer ,offset ,itr )\
209
+ __prefetch2(buffer + offset + ((itr) + 0) * 64, MY_PLDL2KEEP);\
210
+ __prefetch2(buffer + offset + ((itr) + 1) * 64, MY_PLDL2KEEP);\
211
+ __prefetch2(buffer + offset + ((itr) + 2) * 64, MY_PLDL2KEEP);\
212
+ __prefetch2(buffer + offset + ((itr) + 3) * 64, MY_PLDL2KEEP);
213
+ #else
196
214
#define PREF4X64L2 (buffer , PREF_OFFSET , ITR ) \
197
215
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
198
216
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
199
217
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
200
218
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
219
+ #endif
201
220
202
221
#define PREF1KL2 (buffer , PREF_OFFSET ) \
203
222
PREF4X64L2(buffer,(PREF_OFFSET), 0) \
@@ -240,6 +259,16 @@ static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len)
240
259
#endif
241
260
242
261
#ifdef HAVE_ARMV8_CRYPTO
262
+
263
+ static inline uint64_t poly_mul (uint64_t a , uint64_t b )
264
+ {
265
+ #if defined _MSC_VER && !defined __clang__
266
+ return vgetq_lane_u64 (vreinterpretq_u64_p128 (neon_pmull_64 (vcreate_p64 (a ), vcreate_p64 (b ))),0 );
267
+ #else
268
+ return (uint64_t ) vmull_p64 (a , b );
269
+ #endif
270
+ }
271
+
243
272
static unsigned crc32c_aarch64_pmull (unsigned crc , const void * buf , size_t len )
244
273
{
245
274
int64_t length = (int64_t )len ;
@@ -286,8 +315,8 @@ static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
286
315
* crc1 multiply by K2
287
316
* crc0 multiply by K1
288
317
*/
289
- t1 = ( uint64_t ) vmull_p64 (crc1 , k2 );
290
- t0 = ( uint64_t ) vmull_p64 (crc0 , k1 );
318
+ t1 = poly_mul (crc1 , k2 );
319
+ t0 = poly_mul (crc0 , k1 );
291
320
crc = __crc32cd (crc2 , * (const uint64_t * )buffer );
292
321
crc1 = __crc32cd (0 , t1 );
293
322
crc ^= crc1 ;
0 commit comments