Skip to content

Commit

Permalink
Add unsigned_abs to i8,i16,i32,i64 to provide well defined behavior f…
Browse files Browse the repository at this point in the history
…or abs (#152)

* add unsigned version of abs for clearer code when handling corner cases

* fix typo

* fixed wasm typo

* fix neon cast

* add pub(crate) to unsigned types

* fixed i8x16

* x86 doesnt have abs i64

* typo

* fix 64 bit on intel

* fix abs

* remove trait accidentally added during merge

* replace map with explicit array after reading warning about bad debug perf
  • Loading branch information
mcroomp committed May 12, 2024
1 parent 4875c14 commit 445451e
Show file tree
Hide file tree
Showing 18 changed files with 318 additions and 18 deletions.
44 changes: 43 additions & 1 deletion src/i16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,10 +696,52 @@ impl i16x8 {
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vabsq_s16(self.neon) }}
} else {
self.is_negative().blend(self.neg(), self)
let arr: [i16; 8] = cast(self);
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
arr[2].wrapping_abs(),
arr[3].wrapping_abs(),
arr[4].wrapping_abs(),
arr[5].wrapping_abs(),
arr[6].wrapping_abs(),
arr[7].wrapping_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u16x8 {
pick! {
if #[cfg(target_feature="sse2")] {
let mask = shr_imm_i16_m128i::<15>(self.sse);
u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
} else if #[cfg(target_feature="ssse3")] {
u16x8 { sse: abs_i16_m128i(self.sse) }
} else if #[cfg(target_feature="simd128")] {
u16x8 { simd: i16x8_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }}
} else {
let arr: [i16; 8] = cast(self);
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
arr[4].unsigned_abs(),
arr[5].unsigned_abs(),
arr[6].unsigned_abs(),
arr[7].unsigned_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn max(self, rhs: Self) -> Self {
Expand Down
22 changes: 22 additions & 0 deletions src/i32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,28 @@ impl i32x4 {
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u32x4 {
pick! {
if #[cfg(target_feature="ssse3")] {
u32x4 { sse: abs_i32_m128i(self.sse) }
} else if #[cfg(target_feature="simd128")] {
u32x4 { simd: i32x4_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }}
} else {
let arr: [i32; 4] = cast(self);
cast([
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
])
}
}
}

/// horizontal add of all the elements of the vector
#[inline]
#[must_use]
Expand Down
16 changes: 16 additions & 0 deletions src/i32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,22 @@ impl i32x8 {
}
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u32x8 {
pick! {
if #[cfg(target_feature="avx2")] {
u32x8 { avx2: abs_i32_m256i(self.avx2) }
} else {
u32x8 {
a : self.a.unsigned_abs(),
b : self.b.unsigned_abs(),
}
}
}
}

#[inline]
#[must_use]
pub fn max(self, rhs: Self) -> Self {
Expand Down
40 changes: 40 additions & 0 deletions src/i64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,46 @@ impl i64x2 {
}
}

#[inline]
#[must_use]
pub fn abs(self) -> Self {
pick! {
// x86 doesn't have this builtin
if #[cfg(target_feature="simd128")] {
Self { simd: i64x2_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vabsq_s64(self.neon) }}
} else {
let arr: [i64; 2] = cast(self);
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u64x2 {
pick! {
// x86 doesn't have this builtin
if #[cfg(target_feature="simd128")] {
u64x2 { simd: i64x2_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }}
} else {
let arr: [i64; 2] = cast(self);
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn round_float(self) -> f64x2 {
Expand Down
46 changes: 46 additions & 0 deletions src/i64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,52 @@ impl i64x4 {
}
}

#[inline]
#[must_use]
pub fn abs(self) -> Self {
pick! {
if #[cfg(target_feature="avx2")] {
// avx x86 doesn't have this builtin
let arr: [i64; 4] = cast(self);
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
arr[2].wrapping_abs(),
arr[3].wrapping_abs(),
])
} else {
Self {
a : self.a.abs(),
b : self.b.abs(),
}
}
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u64x4 {
pick! {
if #[cfg(target_feature="avx2")] {
// avx x86 doesn't have this builtin
let arr: [i64; 4] = cast(self);
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
])
} else {
u64x4 {
a : self.a.unsigned_abs(),
b : self.b.unsigned_abs(),
}
}
}
}

#[inline]
#[must_use]
pub fn round_float(self) -> f64x4 {
Expand Down
36 changes: 36 additions & 0 deletions src/i8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,42 @@ impl i8x16 {
}
}
}

#[inline]
#[must_use]
pub fn unsigned_abs(self) -> u8x16 {
pick! {
if #[cfg(target_feature="ssse3")] {
u8x16 { sse: abs_i8_m128i(self.sse) }
} else if #[cfg(target_feature="simd128")] {
u8x16 { simd: i8x16_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe { u8x16 { neon: vreinterpretq_u8_s8(vabsq_s8(self.neon)) }}
} else {
let arr: [i8; 16] = cast(self);
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
arr[4].unsigned_abs(),
arr[5].unsigned_abs(),
arr[6].unsigned_abs(),
arr[7].unsigned_abs(),
arr[8].unsigned_abs(),
arr[9].unsigned_abs(),
arr[10].unsigned_abs(),
arr[11].unsigned_abs(),
arr[12].unsigned_abs(),
arr[13].unsigned_abs(),
arr[14].unsigned_abs(),
arr[15].unsigned_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn max(self, rhs: Self) -> Self {
Expand Down
8 changes: 4 additions & 4 deletions src/u16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ pick! {
if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(16))]
pub struct u16x8 { sse: m128i }
pub struct u16x8 { pub(crate) sse: m128i }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct u16x8 { simd: v128 }
pub struct u16x8 { pub(crate) simd: v128 }

impl Default for u16x8 {
fn default() -> Self {
Expand All @@ -29,7 +29,7 @@ pick! {
use core::arch::aarch64::*;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct u16x8 { neon : uint16x8_t }
pub struct u16x8 { pub(crate) neon : uint16x8_t }

impl Default for u16x8 {
#[inline]
Expand All @@ -51,7 +51,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(16))]
pub struct u16x8 { arr: [u16;8] }
pub struct u16x8 { pub(crate) arr: [u16;8] }
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/u32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ pick! {
if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(16))]
pub struct u32x4 { sse: m128i }
pub struct u32x4 { pub(crate) sse: m128i }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct u32x4 { simd: v128 }
pub struct u32x4 { pub(crate) simd: v128 }

impl Default for u32x4 {
fn default() -> Self {
Expand All @@ -29,7 +29,7 @@ pick! {
use core::arch::aarch64::*;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct u32x4 { neon : uint32x4_t }
pub struct u32x4 { pub(crate) neon : uint32x4_t }

impl Default for u32x4 {
#[inline]
Expand Down
4 changes: 2 additions & 2 deletions src/u32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ pick! {
if #[cfg(target_feature="avx2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct u32x8 { avx2: m256i }
pub struct u32x8 { pub(crate) avx2: m256i }
} else {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct u32x8 { a : u32x4, b : u32x4 }
pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/u64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ pick! {
if #[cfg(target_feature="sse2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(16))]
pub struct u64x2 { sse: m128i }
pub struct u64x2 { pub(crate) sse: m128i }
} else if #[cfg(target_feature="simd128")] {
use core::arch::wasm32::*;

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct u64x2 { simd: v128 }
pub struct u64x2 { pub(crate) simd: v128 }

impl Default for u64x2 {
fn default() -> Self {
Expand All @@ -29,7 +29,7 @@ pick! {
use core::arch::aarch64::*;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct u64x2 { neon : uint64x2_t }
pub struct u64x2 { pub(crate) neon : uint64x2_t }

impl Default for u64x2 {
#[inline]
Expand Down
4 changes: 2 additions & 2 deletions src/u64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ pick! {
if #[cfg(target_feature="avx2")] {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct u64x4 { avx2: m256i }
pub struct u64x4 { pub(crate) avx2: m256i }
} else {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct u64x4 { a : u64x2, b : u64x2 }
pub struct u64x4 { pub(crate) a : u64x2, pub(crate) b : u64x2 }
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/u8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pick! {

#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct u8x16 { simd: v128 }
pub struct u8x16 { pub(crate) simd: v128 }

impl Default for u8x16 {
fn default() -> Self {
Expand All @@ -29,7 +29,7 @@ pick! {
use core::arch::aarch64::*;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct u8x16 { neon : uint8x16_t }
pub struct u8x16 { pub(crate) neon : uint8x16_t }

impl Default for u8x16 {
#[inline]
Expand All @@ -51,7 +51,7 @@ pick! {
} else {
#[derive(Default, Clone, Copy, PartialEq, Eq)]
#[repr(C, align(16))]
pub struct u8x16 { arr: [u8;16] }
pub struct u8x16 { pub(crate) arr: [u8;16] }
}
}

Expand Down
Loading

0 comments on commit 445451e

Please sign in to comment.