diff --git a/src/wide/f32x4_t.rs b/src/wide/f32x4_t.rs index 6dc44e8..81dfd90 100644 --- a/src/wide/f32x4_t.rs +++ b/src/wide/f32x4_t.rs @@ -14,6 +14,25 @@ cfg_if::cfg_if! { #[derive(Default, Clone, Copy, PartialEq, Debug)] #[repr(C, align(16))] pub struct f32x4(m128); + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + use core::arch::wasm32::*; + + // repr(transparent) allows for directly passing the v128 on the WASM stack. + #[derive(Clone, Copy, Debug)] + #[repr(transparent)] + pub struct f32x4(v128); + + impl Default for f32x4 { + fn default() -> Self { + Self::splat(0.0) + } + } + + impl PartialEq for f32x4 { + fn eq(&self, other: &Self) -> bool { + u32x4_all_true(f32x4_eq(self.0, other.0)) + } + } } else { #[derive(Default, Clone, Copy, PartialEq, Debug)] #[repr(C, align(16))] @@ -33,6 +52,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse"))] { Self(max_m128(self.0, rhs.0)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_max(self.0, rhs.0)) } else { Self([ self.0[0].max(rhs.0[0]), @@ -48,6 +69,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse"))] { Self(min_m128(self.0, rhs.0)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_min(self.0, rhs.0)) } else { Self([ self.0[0].min(rhs.0[0]), @@ -79,6 +102,8 @@ impl core::ops::Add for f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse"))] { Self(add_m128(self.0, rhs.0)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_add(self.0, rhs.0)) } else { Self([ self.0[0] + rhs.0[0], @@ -104,6 +129,8 @@ impl core::ops::Sub for f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse"))] { Self(sub_m128(self.0, rhs.0)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_sub(self.0, rhs.0)) } else { Self([ self.0[0] - rhs.0[0], @@ -123,6 +150,8 @@ impl core::ops::Mul for f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse"))] { Self(mul_m128(self.0, rhs.0)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_mul(self.0, rhs.0)) } else { Self([ self.0[0] * rhs.0[0], diff --git a/src/wide/f32x8_t.rs b/src/wide/f32x8_t.rs index 1561f38..b6287bf 100644 --- a/src/wide/f32x8_t.rs +++ b/src/wide/f32x8_t.rs @@ -25,6 +25,25 @@ cfg_if::cfg_if! { #[derive(Default, Clone, Copy, PartialEq, Debug)] #[repr(C, align(32))] pub struct f32x8(m128, m128); + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + use core::arch::wasm32::*; + + #[derive(Clone, Copy, Debug)] + #[repr(C, align(32))] + pub struct f32x8(v128, v128); + + impl Default for f32x8 { + fn default() -> Self { + Self::splat(0.0) + } + } + + impl PartialEq for f32x8 { + fn eq(&self, other: &Self) -> bool { + u32x4_all_true(f32x4_eq(self.0, other.0)) & + u32x4_all_true(f32x4_eq(self.1, other.1)) + } + } } else { #[derive(Default, Clone, Copy, PartialEq, Debug)] #[repr(C, align(32))] @@ -41,8 +60,14 @@ impl f32x8 { } pub fn floor(self) -> Self { - let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8()); - roundtrip - roundtrip.cmp_gt(self).blend(f32x8::splat(1.0), f32x8::default()) + cfg_if::cfg_if! { + if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_floor(self.0), f32x4_floor(self.1)) + } else { + let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8()); + roundtrip - roundtrip.cmp_gt(self).blend(f32x8::splat(1.0), f32x8::default()) + } + } } pub fn fract(self) -> Self { @@ -67,6 +92,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, EqualOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_eq_mask_m128(self.0, rhs.0), cmp_eq_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_eq(self.0, rhs.0), f32x4_eq(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, eq, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -79,6 +106,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, GreaterEqualOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_ge_mask_m128(self.0, rhs.0), cmp_ge_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_ge(self.0, rhs.0), f32x4_ge(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, ge, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -91,6 +120,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, GreaterThanOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_gt_mask_m128(self.0, rhs.0), cmp_gt_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_gt(self.0, rhs.0), f32x4_gt(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, gt, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -103,6 +134,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, NotEqualOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_neq_mask_m128(self.0, rhs.0), cmp_neq_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_ne(self.0, rhs.0), f32x4_ne(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, ne, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -115,6 +148,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, LessEqualOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_le_mask_m128(self.0, rhs.0), cmp_le_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_le(self.0, rhs.0), f32x4_le(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, le, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -127,6 +162,8 @@ impl f32x8 { Self(cmp_op_mask_m256!(self.0, LessThanOrdered, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_lt_mask_m128(self.0, rhs.0), cmp_lt_mask_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_lt(self.0, rhs.0), f32x4_lt(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, lt, rhs, f32::from_bits(u32::MAX), 0.0)) } @@ -139,6 +176,8 @@ impl f32x8 { Self(blend_varying_m256(f.0, t.0, self.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { Self(blend_varying_m128(f.0, t.0, self.0), blend_varying_m128(f.1, t.1, self.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_bitselect(t.0, f.0, self.0), v128_bitselect(t.1, f.1, self.1)) } else { super::generic_bit_blend(self, t, f) } @@ -146,8 +185,14 @@ impl f32x8 { } pub fn abs(self) -> Self { - let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32)); - self & non_sign_bits + cfg_if::cfg_if! { + if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_abs(self.0), f32x4_abs(self.1)) + } else { + let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32)); + self & non_sign_bits + } + } } pub fn max(self, rhs: Self) -> Self { @@ -156,6 +201,8 @@ impl f32x8 { Self(max_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(max_m128(self.0, rhs.0), max_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_max(self.0, rhs.0), f32x4_max(self.1, rhs.1)) } else { Self(impl_x8_op!(self, max, rhs)) } @@ -168,6 +215,8 @@ impl f32x8 { Self(min_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(min_m128(self.0, rhs.0), min_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_min(self.0, rhs.0), f32x4_min(self.1, rhs.1)) } else { Self(impl_x8_op!(self, min, rhs)) } @@ -188,6 +237,8 @@ impl f32x8 { Self(round_m256!(self.0, Nearest)) } else if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { Self(round_m128!(self.0, Nearest), round_m128!(self.1, Nearest)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_nearest(self.0), f32x4_nearest(self.1)) } else { let to_int = f32x8::splat(1.0 / f32::EPSILON); let u: u32x8 = cast(self); @@ -225,6 +276,9 @@ impl f32x8 { convert_to_i32_m128i_from_m128(self.0), convert_to_i32_m128i_from_m128(self.1), ) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + let rounded = self.round(); + i32x8(i32x4_trunc_sat_f32x4(rounded.0), i32x4_trunc_sat_f32x4(rounded.1)) } else { let rounded: [f32; 8] = cast(self.round()); let rounded_ints: i32x8 = cast([ @@ -251,6 +305,11 @@ impl f32x8 { cast(convert_truncate_to_i32_m256i_from_m256(self.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { i32x8(truncate_m128_to_m128i(self.0), truncate_m128_to_m128i(self.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + cast(Self( + i32x4_trunc_sat_f32x4(self.0), + i32x4_trunc_sat_f32x4(self.1), + )) } else { let n: [f32; 8] = cast(self); let ints: i32x8 = cast([ @@ -274,6 +333,12 @@ impl f32x8 { Self(reciprocal_m256(self.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(reciprocal_m128(self.0), reciprocal_m128(self.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + let one = f32x4_splat(1.0); + Self( + f32x4_div(one, self.0), + f32x4_div(one, self.1), + ) } else { Self::from([ 1.0 / self.0[0], @@ -295,6 +360,12 @@ impl f32x8 { Self(reciprocal_sqrt_m256(self.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(reciprocal_sqrt_m128(self.0), reciprocal_sqrt_m128(self.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + let one = f32x4_splat(1.0); + Self( + f32x4_div(one, f32x4_sqrt(self.0)), + f32x4_div(one, f32x4_sqrt(self.1)), + ) } else { Self::from([ 1.0 / self.0[0].sqrt(), @@ -316,6 +387,8 @@ impl f32x8 { Self(sqrt_m256(self.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(sqrt_m128(self.0), sqrt_m128(self.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_sqrt(self.0), f32x4_sqrt(self.1)) } else { Self::from([ self.0[0].sqrt(), @@ -353,6 +426,8 @@ impl core::ops::Add for f32x8 { Self(add_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(add_m128(self.0, rhs.0), add_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_add(self.0, rhs.0), f32x4_add(self.1, rhs.1)) } else { Self(impl_x8_op!(self, add, rhs)) } @@ -375,6 +450,8 @@ impl core::ops::Sub for f32x8 { Self(sub_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(sub_m128(self.0, rhs.0), sub_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_sub(self.0, rhs.0), f32x4_sub(self.1, rhs.1)) } else { Self(impl_x8_op!(self, sub, rhs)) } @@ -391,6 +468,8 @@ impl core::ops::Mul for f32x8 { Self(mul_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(mul_m128(self.0, rhs.0), mul_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_mul(self.0, rhs.0), f32x4_mul(self.1, rhs.1)) } else { Self(impl_x8_op!(self, mul, rhs)) } @@ -413,6 +492,8 @@ impl core::ops::Div for f32x8 { Self(div_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(div_m128(self.0, rhs.0), div_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(f32x4_div(self.0, rhs.0), f32x4_div(self.1, rhs.1)) } else { Self(impl_x8_op!(self, div, rhs)) } @@ -429,6 +510,8 @@ impl core::ops::BitAnd for f32x8 { Self(bitand_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitand_m128(self.0, rhs.0), bitand_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_and(self.0, rhs.0), v128_and(self.1, rhs.1)) } else { Self([ f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()), @@ -454,6 +537,8 @@ impl core::ops::BitOr for f32x8 { Self(bitor_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitor_m128(self.0, rhs.0), bitor_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_or(self.0, rhs.0), v128_or(self.1, rhs.1)) } else { Self([ f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()), @@ -479,6 +564,8 @@ impl core::ops::BitXor for f32x8 { Self(bitxor_m256(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitxor_m128(self.0, rhs.0), bitxor_m128(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_xor(self.0, rhs.0), v128_xor(self.1, rhs.1)) } else { Self([ f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()), @@ -512,17 +599,10 @@ impl core::ops::Not for f32x8 { Self(self.0.not()) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(self.0.not(), self.1.not()) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_not(self.0), v128_not(self.1)) } else { - Self::from([ - f32::from_bits(self.0[0].to_bits() ^ u32::MAX), - f32::from_bits(self.0[1].to_bits() ^ u32::MAX), - f32::from_bits(self.0[2].to_bits() ^ u32::MAX), - f32::from_bits(self.0[3].to_bits() ^ u32::MAX), - f32::from_bits(self.0[4].to_bits() ^ u32::MAX), - f32::from_bits(self.0[5].to_bits() ^ u32::MAX), - f32::from_bits(self.0[6].to_bits() ^ u32::MAX), - f32::from_bits(self.0[7].to_bits() ^ u32::MAX), - ]) + self ^ Self::splat(cast(u32::MAX)) } } } diff --git a/src/wide/i32x8_t.rs b/src/wide/i32x8_t.rs index 60f6200..ce0033b 100644 --- a/src/wide/i32x8_t.rs +++ b/src/wide/i32x8_t.rs @@ -22,6 +22,24 @@ cfg_if::cfg_if! { #[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] #[repr(C, align(32))] pub struct i32x8(pub m128i, pub m128i); + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + use core::arch::wasm32::*; + + #[derive(Clone, Copy, Debug)] + #[repr(C, align(32))] + pub struct i32x8(pub v128, pub v128); + + impl Default for i32x8 { + fn default() -> Self { + Self::splat(0) + } + } + + impl PartialEq for i32x8 { + fn eq(&self, other: &Self) -> bool { + !v128_any_true(v128_or(v128_xor(self.0, other.0), v128_xor(self.1, other.1))) + } + } } else { #[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] #[repr(C, align(32))] @@ -55,6 +73,8 @@ impl i32x8 { Self(cmp_eq_mask_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_eq_mask_i32_m128i(self.0, rhs.0), cmp_eq_mask_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(i32x4_eq(self.0, rhs.0), i32x4_eq(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, eq, rhs, -1, 0)) } @@ -67,6 +87,8 @@ impl i32x8 { Self(cmp_gt_mask_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_gt_mask_i32_m128i(self.0, rhs.0), cmp_gt_mask_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(i32x4_gt(self.0, rhs.0), i32x4_eq(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, gt, rhs, -1, 0)) } @@ -79,6 +101,8 @@ impl i32x8 { Self(!cmp_gt_mask_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_lt_mask_i32_m128i(self.0, rhs.0), cmp_lt_mask_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(i32x4_lt(self.0, rhs.0), i32x4_lt(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, lt, rhs, -1, 0)) } @@ -94,6 +118,8 @@ impl i32x8 { cast(convert_to_m128_from_i32_m128i(self.0)), cast(convert_to_m128_from_i32_m128i(self.1)), )) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + cast(Self(f32x4_convert_i32x4(self.0), f32x4_convert_i32x4(self.1))) } else { let arr: [i32; 8] = cast(self); cast([ @@ -140,6 +166,8 @@ impl core::ops::Add for i32x8 { Self(add_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(add_i32_m128i(self.0, rhs.0), add_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(i32x4_add(self.0, rhs.0), i32x4_add(self.1, rhs.1)) } else { Self(impl_x8_op!(self, wrapping_add, rhs)) } @@ -156,6 +184,8 @@ impl core::ops::BitAnd for i32x8 { Self(bitand_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitand_m128i(self.0, rhs.0), bitand_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_and(self.0, rhs.0), v128_and(self.1, rhs.1)) } else { Self(impl_x8_op!(self, bitand, rhs)) } @@ -172,6 +202,8 @@ impl core::ops::Mul for i32x8 { Self(mul_i32_keep_low_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { Self(mul_i32_keep_low_m128i(self.0, rhs.0), mul_i32_keep_low_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(i32x4_mul(self.0, rhs.0), i32x4_mul(self.1, rhs.1)) } else { struct Dummy([i32; 8]); let arr1: [i32; 8] = cast(self); @@ -191,6 +223,8 @@ impl core::ops::BitOr for i32x8 { Self(bitor_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitor_m128i(self.0, rhs.0), bitor_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_or(self.0, rhs.0), v128_or(self.1, rhs.1)) } else { Self(impl_x8_op!(self, bitor, rhs)) } @@ -207,6 +241,8 @@ impl core::ops::BitXor for i32x8 { Self(bitxor_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitxor_m128i(self.0, rhs.0), bitxor_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_xor(self.0, rhs.0), v128_xor(self.1, rhs.1)) } else { Self(impl_x8_op!(self, bitxor, rhs)) } diff --git a/src/wide/u32x8_t.rs b/src/wide/u32x8_t.rs index 0ee4e08..71ebc12 100644 --- a/src/wide/u32x8_t.rs +++ b/src/wide/u32x8_t.rs @@ -22,6 +22,24 @@ cfg_if::cfg_if! { #[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] #[repr(C, align(32))] pub struct u32x8(m128i, m128i); + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + use core::arch::wasm32::*; + + #[derive(Clone, Copy, Debug)] + #[repr(C, align(32))] + pub struct u32x8(v128, v128); + + impl Default for u32x8 { + fn default() -> Self { + Self::splat(0) + } + } + + impl PartialEq for u32x8 { + fn eq(&self, other: &Self) -> bool { + !v128_any_true(v128_or(v128_xor(self.0, other.0), v128_xor(self.1, other.1))) + } + } } else { #[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] #[repr(C, align(32))] @@ -51,6 +69,8 @@ impl u32x8 { Self(cmp_eq_mask_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(cmp_eq_mask_i32_m128i(self.0, rhs.0), cmp_eq_mask_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(u32x4_eq(self.0, rhs.0), u32x4_eq(self.1, rhs.1)) } else { Self(impl_x8_cmp!(self, eq, rhs, u32::MAX, 0)) } @@ -67,6 +87,8 @@ impl core::ops::Not for u32x8 { Self(self.0.not()) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(self.0.not(), self.1.not()) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_not(self.0), v128_not(self.1)) } else { Self([ !self.0[0], @@ -92,6 +114,8 @@ impl core::ops::Add for u32x8 { Self(add_i32_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(add_i32_m128i(self.0, rhs.0), add_i32_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(u32x4_add(self.0, rhs.0), u32x4_add(self.1, rhs.1)) } else { Self(impl_x8_op!(self, wrapping_add, rhs)) } @@ -108,6 +132,8 @@ impl core::ops::BitAnd for u32x8 { Self(bitand_m256i(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(bitand_m128i(self.0, rhs.0), bitand_m128i(self.1, rhs.1)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(v128_and(self.0, rhs.0), v128_and(self.1, rhs.1)) } else { Self(impl_x8_op!(self, bitand, rhs)) } @@ -119,15 +145,17 @@ impl core::ops::Shl for u32x8 { type Output = Self; fn shl(self, rhs: i32) -> Self::Output { - let u = rhs as u64; cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - let shift = cast([u, 0]); + let shift = cast([rhs as u64, 0]); Self(shl_all_u32_m256i(self.0, shift)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - let shift = cast([u, 0]); + let shift = cast([rhs as u64, 0]); Self(shl_all_u32_m128i(self.0, shift), shl_all_u32_m128i(self.1, shift)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(u32x4_shl(self.0, rhs as _), u32x4_shl(self.1, rhs as _)) } else { + let u = rhs as u64; Self([ self.0[0] << u, self.0[1] << u, @@ -147,15 +175,17 @@ impl core::ops::Shr for u32x8 { type Output = Self; fn shr(self, rhs: i32) -> Self::Output { - let u = rhs as u64; cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "avx2"))] { - let shift = cast([u, 0]); + let shift = cast([rhs as u64, 0]); Self(shr_all_u32_m256i(self.0, shift)) } else if #[cfg(all(feature = "simd", target_feature = "sse2"))] { - let shift = cast([u, 0]); + let shift = cast([rhs as u64, 0]); Self(shr_all_u32_m128i(self.0, shift), shr_all_u32_m128i(self.1, shift)) + } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(u32x4_shr(self.0, rhs as _), u32x4_shr(self.1, rhs as _)) } else { + let u = rhs as u64; Self([ self.0[0] >> u, self.0[1] >> u,