Skip to content

Commit

Permalink
much more inline!
Browse files Browse the repository at this point in the history
  • Loading branch information
Lokathor committed Oct 7, 2022
1 parent 7d586f9 commit ae1be20
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 111 deletions.
23 changes: 14 additions & 9 deletions src/f32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,7 @@ impl f32x4 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn asin_acos(self) -> (Self, Self) {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand Down Expand Up @@ -845,6 +846,7 @@ impl f32x4 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn asin(self) -> Self {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand Down Expand Up @@ -915,6 +917,7 @@ impl f32x4 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn atan(self) -> Self {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand Down Expand Up @@ -953,6 +956,7 @@ impl f32x4 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn atan2(self, x: Self) -> Self {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand All @@ -974,9 +978,9 @@ impl f32x4 {
// check for special case: x and y are both +/- INF
let both_infinite = x.is_inf() & y.is_inf();
if both_infinite.any() {
let mone = -Self::ONE;
x2 = both_infinite.blend(x2 & mone, x2);
y2 = both_infinite.blend(y2 & mone, y2);
let minus_one = -Self::ONE;
x2 = both_infinite.blend(x2 & minus_one, x2);
y2 = both_infinite.blend(y2 & minus_one, y2);
}

// x = y = 0 will produce NAN. No problem, fixed below
Expand Down Expand Up @@ -1265,31 +1269,31 @@ impl f32x4 {
);
cast::<_, f32x4>(t2)
}

#[inline]
fn is_zero_or_subnormal(self) -> Self {
let t = cast::<_, i32x4>(self);
let t = t & i32x4::splat(0x7F800000);
i32x4::round_float(t.cmp_eq(i32x4::splat(0)))
}

#[inline]
fn infinity() -> Self {
cast::<_, f32x4>(i32x4::splat(0x7F800000))
}

#[inline]
fn nan_log() -> Self {
cast::<_, f32x4>(i32x4::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
}

#[inline]
fn nan_pow() -> Self {
cast::<_, f32x4>(i32x4::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
}

#[inline]
pub fn sign_bit(self) -> Self {
let t1 = cast::<_, i32x4>(self);
let t2 = t1 >> 31;
!cast::<_, f32x4>(t2).cmp_eq(f32x4::ZERO)
}

#[inline]
pub fn reduce_add(self) -> f32 {
let arr: [f32; 4] = cast(self);
arr.iter().sum()
Expand Down Expand Up @@ -1466,6 +1470,7 @@ impl f32x4 {
(self.is_nan() | y.is_nan()).blend(self + y, z)
}

#[inline]
pub fn powf(self, y: f32) -> Self {
Self::pow_f32x4(self, f32x4::splat(y))
}
Expand Down
24 changes: 14 additions & 10 deletions src/f32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,7 @@ impl f32x8 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn asin_acos(self) -> (Self, Self) {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand Down Expand Up @@ -1079,6 +1080,7 @@ impl f32x8 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn atan(self) -> Self {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand Down Expand Up @@ -1117,6 +1119,7 @@ impl f32x8 {
}

#[allow(non_upper_case_globals)]
#[inline]
pub fn atan2(self, x: Self) -> Self {
// Based on the Agner Fog "vector class library":
// https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
Expand All @@ -1138,9 +1141,9 @@ impl f32x8 {
// check for special case: x and y are both +/- INF
let both_infinite = x.is_inf() & y.is_inf();
if both_infinite.any() {
let mone = -Self::ONE;
x2 = both_infinite.blend(x2 & mone, x2);
y2 = both_infinite.blend(y2 & mone, y2);
let minus_one = -Self::ONE;
x2 = both_infinite.blend(x2 & minus_one, x2);
y2 = both_infinite.blend(y2 & minus_one, y2);
}

// x = y = 0 will produce NAN. No problem, fixed below
Expand Down Expand Up @@ -1462,31 +1465,31 @@ impl f32x8 {
);
cast::<_, f32x8>(t2)
}

#[inline]
fn is_zero_or_subnormal(self) -> Self {
let t = cast::<_, i32x8>(self);
let t = t & i32x8::splat(0x7F800000);
i32x8::round_float(t.cmp_eq(i32x8::splat(0)))
}

#[inline]
fn infinity() -> Self {
cast::<_, f32x8>(i32x8::splat(0x7F800000))
}

#[inline]
fn nan_log() -> Self {
cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
}

#[inline]
fn nan_pow() -> Self {
cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
}

#[inline]
pub fn sign_bit(self) -> Self {
let t1 = cast::<_, i32x8>(self);
let t2 = t1 >> 31;
!cast::<_, f32x8>(t2).cmp_eq(f32x8::ZERO)
}

#[inline]
pub fn reduce_add(self) -> f32 {
pick! {
// From https://stackoverflow.com/questions/13219146/how-to-sum-m256-horizontally
Expand Down Expand Up @@ -1681,7 +1684,7 @@ impl f32x8 {

(self.is_nan() | y.is_nan()).blend(self + y, z)
}

#[inline]
pub fn powf(self, y: f32) -> Self {
Self::pow_f32x8(self, f32x8::splat(y))
}
Expand All @@ -1699,6 +1702,7 @@ impl f32x8 {

impl Not for f32x8 {
type Output = Self;
#[inline]
fn not(self) -> Self {
pick! {
if #[cfg(target_feature="avx")] {
Expand Down

0 comments on commit ae1be20

Please sign in to comment.