Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
security/putty: fix ARM NEON AES-GCM code
Cherry-pick a test patch from upstream maintainer: - this uses fallback code for compilers not providing vaddq_p128 (f.i. clang-10 on FreeBSD 12.3 AMD64/aarch64) - and uses vaddq_p128 on systems that provide it, for instance, FreeBSD 13-STABLE AMD64/aarch64 with clang 14.0.5 Obtained from: Simon Tatham MFH: 2022Q4
- Loading branch information
Showing
3 changed files
with
66 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
commit 2222cd104dc5bd424fe025b98c133c91195cf9f3 | ||
Author: Simon Tatham <anakin@pobox.com> | ||
Date: Wed Oct 12 12:54:36 2022 +0100 | ||
|
||
AES-GCM NEON: cope with missing vaddq_p128. | ||
|
||
In some compilers (I'm told clang 10, in particular), the NEON | ||
intrinsic vaddq_p128 is missing, even though its input type poly128_t | ||
is provided. | ||
|
||
vaddq_p128 is just an XOR of two vector registers, so that's easy to | ||
work around by casting to a more mundane type and back. Added a | ||
configure-time test for that intrinsic, and a workaround to be used in | ||
its absence. | ||
|
||
diff --git a/cmake/cmake.h.in b/cmake/cmake.h.in | ||
index 91d52d78..5ad32515 100644 | ||
--- ./cmake/cmake.h.in | ||
+++ b/cmake/cmake.h.in | ||
@@ -54,6 +54,7 @@ | ||
#cmakedefine01 HAVE_CLMUL | ||
#cmakedefine01 HAVE_NEON_CRYPTO | ||
#cmakedefine01 HAVE_NEON_PMULL | ||
+#cmakedefine01 HAVE_NEON_VADDQ_P128 | ||
#cmakedefine01 HAVE_NEON_SHA512 | ||
#cmakedefine01 HAVE_NEON_SHA512_INTRINSICS | ||
#cmakedefine01 USE_ARM64_NEON_H | ||
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt | ||
index ff04efb5..4b0aa907 100644 | ||
--- ./crypto/CMakeLists.txt | ||
+++ b/crypto/CMakeLists.txt | ||
@@ -195,6 +195,14 @@ if(neon) | ||
int main(void) { r = vmull_p64(a, b); r = vmull_high_p64(u, v); }" | ||
ADD_SOURCES_IF_SUCCESSFUL aesgcm-neon.c) | ||
|
||
+ test_compile_with_flags(HAVE_NEON_VADDQ_P128 | ||
+ GNU_FLAGS -march=armv8-a+crypto | ||
+ MSVC_FLAGS -D_ARM_USE_NEW_NEON_INTRINSICS | ||
+ TEST_SOURCE " | ||
+ #include <${neon_header}> | ||
+ volatile poly128_t r; | ||
+ int main(void) { r = vaddq_p128(r, r); }") | ||
+ | ||
# The 'sha3' architecture extension, despite the name, includes | ||
# support for SHA-512 (from the SHA-2 standard) as well as SHA-3 | ||
# proper. | ||
diff --git a/crypto/aesgcm-neon.c b/crypto/aesgcm-neon.c | ||
index dd7b83cc..64bc8349 100644 | ||
--- ./crypto/aesgcm-neon.c | ||
+++ b/crypto/aesgcm-neon.c | ||
@@ -87,6 +87,14 @@ static inline void store_p128_be(void *p, poly128_t v) | ||
vst1q_u8(p, vrev64q_u8(vreinterpretq_u8_p128(swapped))); | ||
} | ||
|
||
+#if !HAVE_NEON_VADDQ_P128 | ||
+static inline poly128_t vaddq_p128(poly128_t a, poly128_t b) | ||
+{ | ||
+ return vreinterpretq_p128_u32(veorq_u32( | ||
+ vreinterpretq_u32_p128(a), vreinterpretq_u32_p128(b))); | ||
+} | ||
+#endif | ||
+ | ||
/* | ||
* Key setup is just like in aesgcm-ref-poly.c. There's no point using | ||
* vector registers to accelerate this, because it happens rarely. |