From 47bb22dc4fac547b3370a2f9585a3e0ea58bcf3b Mon Sep 17 00:00:00 2001 From: Zhiyong Fang Date: Tue, 10 Sep 2024 21:15:18 -0500 Subject: [PATCH] gf 127 attempt --- arith/src/extension_field.rs | 4 +- arith/src/extension_field/gf2_127/avx.rs | 118 +++++++++------------- arith/src/extension_field/gf2_127/neon.rs | 1 + arith/src/tests.rs | 1 + arith/src/tests/gf2_127.rs | 31 ++++++ 5 files changed, 85 insertions(+), 70 deletions(-) create mode 100644 arith/src/tests/gf2_127.rs diff --git a/arith/src/extension_field.rs b/arith/src/extension_field.rs index e0f3547b..834b0813 100644 --- a/arith/src/extension_field.rs +++ b/arith/src/extension_field.rs @@ -1,12 +1,12 @@ mod fr_ext; -// mod gf2_127; +mod gf2_127; mod gf2_128; mod gf2_128x8; mod m31_ext; mod m31_ext3x16; use crate::{Field, FieldSerde}; -// pub use gf2_127::*; +pub use gf2_127::*; pub use gf2_128::*; pub use gf2_128x8::GF2_128x8; #[cfg(target_arch = "x86_64")] diff --git a/arith/src/extension_field/gf2_127/avx.rs b/arith/src/extension_field/gf2_127/avx.rs index 75480457..9ba0bb77 100644 --- a/arith/src/extension_field/gf2_127/avx.rs +++ b/arith/src/extension_field/gf2_127/avx.rs @@ -79,7 +79,7 @@ impl Field for AVX512GF2_127 { #[inline(always)] fn one() -> Self { AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, + v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, } } @@ -181,7 +181,7 @@ impl ExtensionField for AVX512GF2_127 { res } - /// + /// #[inline] fn mul_by_x(&self) -> Self { unsafe { @@ -189,11 +189,12 @@ impl ExtensionField for AVX512GF2_127 { let shifted = _mm_slli_epi64(self.v, 1); // Get the most significant bit and move it - let msb = _mm_srli_epi64(self.v, 63); - let msb_moved = _mm_slli_si128(msb, 8); + let msb = _mm_srli_epi64(self.v, 63); + let msb_moved = _mm_slli_si128(msb, 8); // Combine the shifted value with the moved msb let shifted_consolidated = _mm_or_si128(shifted, msb_moved); + let x0to126 = _mm_and_si128(shifted_consolidated, X0TO126_MASK); // Create the reduction value (0b11) and the comparison value (1) let reduction = { @@ -201,15 +202,13 @@ impl ExtensionField for AVX512GF2_127 { let one = _mm_set_epi64x(0, 1); // Check if the MSB was 1 and create a mask - let mask = _mm_cmpeq_epi64( - _mm_srli_si128(_mm_srli_epi64(shifted, 63), 8), - one); + let mask = _mm_cmpeq_epi64(_mm_srli_si128(_mm_srli_epi64(shifted, 63), 8), one); _mm_and_si128(mask, multiplier) }; // Apply the reduction conditionally - let res = _mm_xor_si128(shifted_consolidated, reduction); + let res = _mm_xor_si128(x0to126, reduction); Self { v: res } } @@ -225,28 +224,28 @@ impl From for AVX512GF2_127 { } } -const X0TO126_MASK: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F])}; -const X127_MASK: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80])}; -const X127_REMINDER: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0b11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80])}; - - -#[inline(always)] -unsafe fn mm_bitshift_left(x: __m128i) -> __m128i -{ - let mut carry = _mm_bslli_si128(x, 8); - carry = _mm_srli_epi64(carry, 64 - count); - let x = _mm_slli_epi64(x, count); - _mm_or_si128(x, carry) -} - +// WARNING: The following assumes little endian storage +const X0TO126_MASK: __m128i = unsafe { + transmute::<[u8; 16], __m128i>([ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x7F, + ]) +}; +const X127_MASK: __m128i = unsafe { + transmute::<[u8; 16], __m128i>([ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, + ]) +}; +const X127_REMINDER: __m128i = unsafe { + transmute::<[u8; 16], __m128i>([ + 0b11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, + ]) +}; #[inline] unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { - let xmm_mask = _mm_setr_epi32((0xFFffffff_u32) as i32, 0x0, 0x0, 0x0); - // a = a0|a1, b = b0|b1 let mut tmp3 = _mm_clmulepi64_si128(a, b, 0x00); // tmp3 = a0 * b0 @@ -266,51 +265,34 @@ unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { tmp5 = _mm_slli_si128(tmp4, 8); // tmp5 = e0 | 00 tmp4 = _mm_srli_si128(tmp4, 8); // tmp4 = 00 | e1 tmp3 = _mm_xor_si128(tmp3, tmp5); // the lower 128 bits, deg 0 - 127 - tmp6 = _mm_xor_si128(tmp6, tmp4); // the higher 128 bits, deg 128 - 252, the 124 least signicicant bits are non-zero + tmp6 = _mm_xor_si128(tmp6, tmp4); // the higher 128 bits, deg 128 - 252(255), only the 124 least signicicant bits are non-zero // x^0 - x^126 let x0to126 = _mm_and_si128(tmp3, X0TO126_MASK); - // x^127 - tmp4 = _mm_and_si128(tmp3, X127_MASK); - tmp4 = _mm_cmpeq_epi64(tmp4, X127_MASK); - tmp4 = _mm_srli_si128(tmp4, 15); - let x127 = _mm_and_si128(tmp4, X127_REMINDER); - - // x^128 - x^252 - let x128to252 = - _mm_and_si128( - mm_bitshift_left::<2>(tmp6), - mm_bitshift_left::<1>(tmp6), - ); - - _mm_and_si128(_mm_and_si128(x0to126, x127), x128to252) - - // let mut tmp7 = _mm_srli_epi32(tmp6, 31); - // let mut tmp8 = _mm_srli_epi32(tmp6, 30); - // let tmp9 = _mm_srli_epi32(tmp6, 25); - - // tmp7 = _mm_xor_si128(tmp7, tmp8); - // tmp7 = _mm_xor_si128(tmp7, tmp9); - - // tmp8 = _mm_shuffle_epi32(tmp7, 147); - // tmp7 = _mm_and_si128(xmm_mask, tmp8); - // tmp8 = _mm_andnot_si128(xmm_mask, tmp8); - - // tmp3 = _mm_xor_si128(tmp3, tmp8); - // tmp6 = _mm_xor_si128(tmp6, tmp7); - - // let tmp10 = _mm_slli_epi32(tmp6, 1); - // tmp3 = _mm_xor_si128(tmp3, tmp10); - - // let tmp11 = _mm_slli_epi32(tmp6, 2); - // tmp3 = _mm_xor_si128(tmp3, tmp11); - - // let tmp12 = _mm_slli_epi32(tmp6, 7); - // tmp3 = _mm_xor_si128(tmp3, tmp12); - - // _mm_xor_si128(tmp3, tmp6) - + // x^127 + tmp4 = _mm_and_si128(tmp3, X127_MASK); + tmp4 = _mm_cmpeq_epi8(tmp4, X127_MASK); + tmp4 = _mm_srli_si128(tmp4, 15); + let x127 = _mm_and_si128(tmp4, X127_REMINDER); + + // x^128 - x^252 + // shift left tmp6 by 1 bit + tmp3 = _mm_slli_si128(tmp6, 8); + tmp3 = _mm_srli_epi64(tmp3, 64 - 1); + tmp4 = _mm_slli_epi64(tmp6, 1); + tmp3 = _mm_or_si128(tmp3, tmp4); + + // shift left tmp6 by 2 bits + tmp4 = _mm_slli_si128(tmp6, 8); + tmp4 = _mm_srli_epi64(tmp4, 64 - 2); + tmp5 = _mm_slli_epi64(tmp6, 2); + tmp4 = _mm_or_si128(tmp4, tmp5); + + let x128to252 = _mm_xor_si128(tmp3, tmp4); + + tmp3 = _mm_xor_si128(x0to126, x127); + _mm_xor_si128(tmp3, x128to252) } impl Default for AVX512GF2_127 { diff --git a/arith/src/extension_field/gf2_127/neon.rs b/arith/src/extension_field/gf2_127/neon.rs index e69de29b..8b137891 100644 --- a/arith/src/extension_field/gf2_127/neon.rs +++ b/arith/src/extension_field/gf2_127/neon.rs @@ -0,0 +1 @@ + diff --git a/arith/src/tests.rs b/arith/src/tests.rs index b0a3b844..a4445dca 100644 --- a/arith/src/tests.rs +++ b/arith/src/tests.rs @@ -2,6 +2,7 @@ mod bn254; mod extension_field; mod field; mod gf2; +mod gf2_127; mod gf2_128; mod gf2_128x8; mod m31; diff --git a/arith/src/tests/gf2_127.rs b/arith/src/tests/gf2_127.rs new file mode 100644 index 00000000..7c385a3d --- /dev/null +++ b/arith/src/tests/gf2_127.rs @@ -0,0 +1,31 @@ +use ark_std::test_rng; +use std::io::Cursor; + +use crate::{FieldSerde, GF2_127}; + +use super::{ + extension_field::random_extension_field_tests, + field::{random_field_tests, random_inversion_tests}, + simd_field::random_simd_field_tests, +}; + +#[test] +fn test_field() { + random_field_tests::("GF2_127".to_string()); + random_extension_field_tests::("GF2_127".to_string()); + + let mut rng = test_rng(); + random_inversion_tests::(&mut rng, "GF2_127".to_string()); +} + +#[test] +fn test_custom_serde_vectorize_gf2() { + let a = GF2_127::from(0); + let mut buffer = vec![]; + assert!(a.serialize_into(&mut buffer).is_ok()); + let mut cursor = Cursor::new(buffer); + let b = GF2_127::deserialize_from(&mut cursor); + assert!(b.is_ok()); + let b = b.unwrap(); + assert_eq!(a, b); +}