From 7aba4b52715f9cb17a90303bab55bd59471d65ae Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Thu, 21 Sep 2023 16:10:53 +0300 Subject: [PATCH] Add asm!-based backend for LoongArch64 targets (#504) Based on code from https://github.com/RustCrypto/asm-hashes/pull/66 --- .github/workflows/sha1.yml | 12 ++ Cargo.lock | 2 +- sha1/CHANGELOG.md | 6 + sha1/Cargo.toml | 5 +- sha1/src/compress.rs | 3 + sha1/src/compress/loongarch64_asm.rs | 255 +++++++++++++++++++++++++++ 6 files changed, 281 insertions(+), 2 deletions(-) create mode 100644 sha1/src/compress/loongarch64_asm.rs diff --git a/.github/workflows/sha1.yml b/.github/workflows/sha1.yml index 4c65146f8..e9ae90cbc 100644 --- a/.github/workflows/sha1.yml +++ b/.github/workflows/sha1.yml @@ -132,6 +132,18 @@ jobs: - uses: msys2/setup-msys2@v2 - run: cargo test --target ${{ matrix.target }} + # Build-only test of the LoongArch64 assembly backend + loongarch64_asm: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: RustCrypto/actions/cargo-cache@master + - uses: dtolnay/rust-toolchain@master + with: + toolchain: 1.72 + targets: loongarch64-unknown-linux-gnu + - run: cargo build --target loongarch64-unknown-linux-gnu --features loongarch64_asm + # Cross-compiled tests cross: strategy: diff --git a/Cargo.lock b/Cargo.lock index 918b6ea9f..6f8aa6fa7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,7 +197,7 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.5" +version = "0.10.6" dependencies = [ "cfg-if", "cpufeatures", diff --git a/sha1/CHANGELOG.md b/sha1/CHANGELOG.md index 78ffc6006..429e67727 100644 --- a/sha1/CHANGELOG.md +++ b/sha1/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.10.6 (2023-09-21) +### Added +- `asm!`-based backend for LoongArch64 targets gated behind `loongarch64_asm` feature [#504] + +[#504]: https://github.com/RustCrypto/hashes/pull/504 + ## 0.10.5 (2022-09-16) ### Added - Feature-gated OID support ([#405]) diff --git a/sha1/Cargo.toml b/sha1/Cargo.toml index 4877331b1..3f4b8d3ff 100644 --- a/sha1/Cargo.toml +++ b/sha1/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sha1" -version = "0.10.5" +version = "0.10.6" description = "SHA-1 hash function" authors = ["RustCrypto Developers"] license = "MIT OR Apache-2.0" @@ -28,6 +28,9 @@ default = ["std"] std = ["digest/std"] oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57 asm = ["sha1-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates +# Use assembly backend for LoongArch64 targets +# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates +loongarch64_asm = [] compress = [] # Expose compress function force-soft = [] # Force software implementation diff --git a/sha1/src/compress.rs b/sha1/src/compress.rs index da4a10a98..6f7e40c41 100644 --- a/sha1/src/compress.rs +++ b/sha1/src/compress.rs @@ -9,6 +9,9 @@ cfg_if::cfg_if! { mod soft; mod aarch64; use aarch64::compress as compress_inner; + } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] { + mod loongarch64_asm; + use loongarch64_asm::compress as compress_inner; } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { #[cfg(not(feature = "asm"))] mod soft; diff --git a/sha1/src/compress/loongarch64_asm.rs b/sha1/src/compress/loongarch64_asm.rs new file mode 100644 index 000000000..facef1b19 --- /dev/null +++ b/sha1/src/compress/loongarch64_asm.rs @@ -0,0 +1,255 @@ +//! LoongArch64 assembly backend + +use core::arch::asm; + +const K: [u32; 4] = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6]; + +macro_rules! c { + ($($l:expr)*) => { + concat!($($l ,)*) + }; +} + +macro_rules! round0a { + ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { + c!( + "ld.w $t5, $a1, (" $i " * 4);" + "revb.2h $t5, $t5;" + "rotri.w $t5, $t5, 16;" + "add.w " $e ", " $e ", $t5;" + "st.w $t5, $sp, (" $i " * 4);" + "xor $t5, " $c "," $d ";" + "and $t5, $t5, " $b ";" + "xor $t5, $t5, " $d ";" + roundtail!($a, $b, $e, $i, "$a4") + ) + }; +} + +macro_rules! scheldule { + ($i:literal, $e:literal) => { + c!( + "ld.w $t5, $sp, (((" $i " - 3) & 0xF) * 4);" + "ld.w $t6, $sp, (((" $i " - 8) & 0xF) * 4);" + "ld.w $t7, $sp, (((" $i " - 14) & 0xF) * 4);" + "ld.w $t8, $sp, (((" $i " - 16) & 0xF) * 4);" + "xor $t5, $t5, $t6;" + "xor $t5, $t5, $t7;" + "xor $t5, $t5, $t8;" + "rotri.w $t5, $t5, 31;" + "add.w " $e "," $e ", $t5;" + "st.w $t5, $sp, ((" $i " & 0xF) * 4);" + ) + }; +} + +macro_rules! round0b { + ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { + c!( + scheldule!($i, $e) + "xor $t5," $c "," $d ";" + "and $t5, $t5," $b ";" + "xor $t5, $t5," $d ";" + roundtail!($a, $b, $e, $i, "$a4") + ) + }; +} + +macro_rules! round1 { + ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { + c!( + scheldule!($i, $e) + "xor $t5," $b "," $c ";" + "xor $t5, $t5," $d ";" + roundtail!($a, $b, $e, $i, "$a5") + ) + }; +} + +macro_rules! round2 { + ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { + c!( + scheldule!($i, $e) + "or $t5," $c "," $d ";" + "and $t5, $t5, " $b ";" + "and $t7," $c "," $d ";" + "or $t5, $t5, $t7;" + roundtail!($a, $b, $e, $i, "$a6") + ) + }; +} + +macro_rules! round3 { + ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { + c!( + scheldule!($i, $e) + "xor $t5," $b "," $c ";" + "xor $t5, $t5," $d ";" + roundtail!($a, $b, $e, $i, "$a7") + ) + }; +} + +macro_rules! roundtail { + ($a:literal, $b:literal, $e:literal, $i:literal, $k:literal) => { + c!( + "rotri.w " $b "," $b ", 2;" + "add.w " $e "," $e ", $t5;" + "add.w " $e "," $e "," $k ";" + "rotri.w $t5," $a ", 27;" + "add.w " $e "," $e ", $t5;" + ) + }; +} + +pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) { + if blocks.is_empty() { + return; + } + + unsafe { + asm!( + // Allocate scratch stack space + "addi.d $sp, $sp, -64;", + + // Load state + "ld.w $t0, $a0, 0", + "ld.w $t1, $a0, 4", + "ld.w $t2, $a0, 8", + "ld.w $t3, $a0, 12", + "ld.w $t4, $a0, 16", + + "42:", + + round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 0), + round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 1), + round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 2), + round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 3), + round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 4), + round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 5), + round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 6), + round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 7), + round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 8), + round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 9), + round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 10), + round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 11), + round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 12), + round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 13), + round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 14), + round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 15), + round0b!("$t4", "$t0", "$t1", "$t2", "$t3", 16), + round0b!("$t3", "$t4", "$t0", "$t1", "$t2", 17), + round0b!("$t2", "$t3", "$t4", "$t0", "$t1", 18), + round0b!("$t1", "$t2", "$t3", "$t4", "$t0", 19), + round1!("$t0", "$t1", "$t2", "$t3", "$t4", 20), + round1!("$t4", "$t0", "$t1", "$t2", "$t3", 21), + round1!("$t3", "$t4", "$t0", "$t1", "$t2", 22), + round1!("$t2", "$t3", "$t4", "$t0", "$t1", 23), + round1!("$t1", "$t2", "$t3", "$t4", "$t0", 24), + round1!("$t0", "$t1", "$t2", "$t3", "$t4", 25), + round1!("$t4", "$t0", "$t1", "$t2", "$t3", 26), + round1!("$t3", "$t4", "$t0", "$t1", "$t2", 27), + round1!("$t2", "$t3", "$t4", "$t0", "$t1", 28), + round1!("$t1", "$t2", "$t3", "$t4", "$t0", 29), + round1!("$t0", "$t1", "$t2", "$t3", "$t4", 30), + round1!("$t4", "$t0", "$t1", "$t2", "$t3", 31), + round1!("$t3", "$t4", "$t0", "$t1", "$t2", 32), + round1!("$t2", "$t3", "$t4", "$t0", "$t1", 33), + round1!("$t1", "$t2", "$t3", "$t4", "$t0", 34), + round1!("$t0", "$t1", "$t2", "$t3", "$t4", 35), + round1!("$t4", "$t0", "$t1", "$t2", "$t3", 36), + round1!("$t3", "$t4", "$t0", "$t1", "$t2", 37), + round1!("$t2", "$t3", "$t4", "$t0", "$t1", 38), + round1!("$t1", "$t2", "$t3", "$t4", "$t0", 39), + round2!("$t0", "$t1", "$t2", "$t3", "$t4", 40), + round2!("$t4", "$t0", "$t1", "$t2", "$t3", 41), + round2!("$t3", "$t4", "$t0", "$t1", "$t2", 42), + round2!("$t2", "$t3", "$t4", "$t0", "$t1", 43), + round2!("$t1", "$t2", "$t3", "$t4", "$t0", 44), + round2!("$t0", "$t1", "$t2", "$t3", "$t4", 45), + round2!("$t4", "$t0", "$t1", "$t2", "$t3", 46), + round2!("$t3", "$t4", "$t0", "$t1", "$t2", 47), + round2!("$t2", "$t3", "$t4", "$t0", "$t1", 48), + round2!("$t1", "$t2", "$t3", "$t4", "$t0", 49), + round2!("$t0", "$t1", "$t2", "$t3", "$t4", 50), + round2!("$t4", "$t0", "$t1", "$t2", "$t3", 51), + round2!("$t3", "$t4", "$t0", "$t1", "$t2", 52), + round2!("$t2", "$t3", "$t4", "$t0", "$t1", 53), + round2!("$t1", "$t2", "$t3", "$t4", "$t0", 54), + round2!("$t0", "$t1", "$t2", "$t3", "$t4", 55), + round2!("$t4", "$t0", "$t1", "$t2", "$t3", 56), + round2!("$t3", "$t4", "$t0", "$t1", "$t2", 57), + round2!("$t2", "$t3", "$t4", "$t0", "$t1", 58), + round2!("$t1", "$t2", "$t3", "$t4", "$t0", 59), + round3!("$t0", "$t1", "$t2", "$t3", "$t4", 60), + round3!("$t4", "$t0", "$t1", "$t2", "$t3", 61), + round3!("$t3", "$t4", "$t0", "$t1", "$t2", 62), + round3!("$t2", "$t3", "$t4", "$t0", "$t1", 63), + round3!("$t1", "$t2", "$t3", "$t4", "$t0", 64), + round3!("$t0", "$t1", "$t2", "$t3", "$t4", 65), + round3!("$t4", "$t0", "$t1", "$t2", "$t3", 66), + round3!("$t3", "$t4", "$t0", "$t1", "$t2", 67), + round3!("$t2", "$t3", "$t4", "$t0", "$t1", 68), + round3!("$t1", "$t2", "$t3", "$t4", "$t0", 69), + round3!("$t0", "$t1", "$t2", "$t3", "$t4", 70), + round3!("$t4", "$t0", "$t1", "$t2", "$t3", 71), + round3!("$t3", "$t4", "$t0", "$t1", "$t2", 72), + round3!("$t2", "$t3", "$t4", "$t0", "$t1", 73), + round3!("$t1", "$t2", "$t3", "$t4", "$t0", 74), + round3!("$t0", "$t1", "$t2", "$t3", "$t4", 75), + round3!("$t4", "$t0", "$t1", "$t2", "$t3", 76), + round3!("$t3", "$t4", "$t0", "$t1", "$t2", 77), + round3!("$t2", "$t3", "$t4", "$t0", "$t1", 78), + round3!("$t1", "$t2", "$t3", "$t4", "$t0", 79), + + // Update state registers + "ld.w $t5, $a0, 0", // a + "ld.w $t6, $a0, 4", // b + "ld.w $t7, $a0, 8", // c + "ld.w $t8, $a0, 12", // d + "add.w $t0, $t0, $t5", + "ld.w $t5, $a0, 16", // e + "add.w $t1, $t1, $t6", + "add.w $t2, $t2, $t7", + "add.w $t3, $t3, $t8", + "add.w $t4, $t4, $t5", + + // Save updated state + "st.w $t0, $a0, 0", + "st.w $t1, $a0, 4", + "st.w $t2, $a0, 8", + "st.w $t3, $a0, 12", + "st.w $t4, $a0, 16", + + // Looping over blocks + "addi.d $a1, $a1, 64", + "addi.d $a2, $a2, -1", + "bnez $a2, 42b", + + // Restore stack register + "addi.d $sp, $sp, 64", + + in("$a0") state, + inout("$a1") blocks.as_ptr() => _, + inout("$a2") blocks.len() => _, + + in("$a4") K[0], + in("$a5") K[1], + in("$a6") K[2], + in("$a7") K[3], + + // Clobbers + out("$t0") _, + out("$t1") _, + out("$t2") _, + out("$t3") _, + out("$t4") _, + out("$t5") _, + out("$t6") _, + out("$t7") _, + out("$t8") _, + + options(preserves_flags), + ); + } +}