Skip to content

Commit

Permalink
Implement f32-vtanh microkernels using a 9/6 rational polynomial ap…
Browse files Browse the repository at this point in the history
…proximation.

The coefficients of the rational polynomial were computed using a variant of the Remez algorithm to minimize the relative error in the range `[-8, 8]`. The approximation has a maximum relative error of `5.36e-7` or `4.79e-7` (4.5 or 4 ULP), for non-FMA and FMA evaluation, respectively.

Evaluating the rational approximant is ~1.5x faster than the current implementations for `SSE2`, `AVX`, `FMA3`, and `AVX512`.

This change also fixes a minor issue in which the `AVX512SKX` microkernels were returning `1.0f` for `NaN` inputs, which was picked up by the newly added tests.

PiperOrigin-RevId: 630967750
  • Loading branch information
gonnet authored and xnnpack-bot committed May 6, 2024
1 parent 637f676 commit 204fd5a
Show file tree
Hide file tree
Showing 173 changed files with 29,199 additions and 5,010 deletions.
204 changes: 204 additions & 0 deletions bench/f32-vtanh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,26 @@ BENCHMARK_CAPTURE(f32_vtanh, scalar_expm1minus_rr1_p6h5ts_div_u4,
xnn_init_f32_tanh_scalar_expm1minus_rr1_p6h5_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, scalar_rational_9_6_u1,
xnn_f32_vtanh_ukernel__scalar_rational_9_6_u1,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, scalar_rational_9_6_u2,
xnn_f32_vtanh_ukernel__scalar_rational_9_6_u2,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, scalar_rational_9_6_u4,
xnn_f32_vtanh_ukernel__scalar_rational_9_6_u4,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, scalar_rational_9_6_u8,
xnn_f32_vtanh_ukernel__scalar_rational_9_6_u8,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma_expm1minus_rr1_lut8_p4h3ts_div_u1,
xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_lut8_p4h3ts_div_u1,
xnn_init_f32_tanh_scalar_expm1minus_rr1_lut8_p4h3_params)
Expand Down Expand Up @@ -207,6 +227,46 @@ BENCHMARK_CAPTURE(f32_vtanh, fma_expm1minus_rr1_p6h5ts_div_u4,
xnn_init_f32_tanh_sse_expm1minus_rr1_p6h5_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_div_u4,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_div_u4,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_div_u8,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_div_u8,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_div_u12,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_div_u12,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_div_u16,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_div_u16,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_nr_u4,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_nr_u4,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_nr_u8,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_nr_u8,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_nr_u12,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_nr_u12,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse2_rational_9_6_nr_u16,
xnn_f32_vtanh_ukernel__sse2_rational_9_6_nr_u16,
xnn_init_f32_tanh_sse_rational_9_6_params)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, sse41_expm1minus_rr1_lut8_p4h3ts_div_u4,
xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_u4,
xnn_init_f32_tanh_sse_expm1minus_rr1_lut8_p4h3_params,
Expand Down Expand Up @@ -615,6 +675,54 @@ BENCHMARK_CAPTURE(f32_vtanh, fma_expm1minus_rr1_p6h5ts_div_u4,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_div_u8,
xnn_f32_vtanh_ukernel__avx_rational_9_6_div_u8,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_div_u16,
xnn_f32_vtanh_ukernel__avx_rational_9_6_div_u16,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_div_u24,
xnn_f32_vtanh_ukernel__avx_rational_9_6_div_u24,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_div_u32,
xnn_f32_vtanh_ukernel__avx_rational_9_6_div_u32,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_nr_u8,
xnn_f32_vtanh_ukernel__avx_rational_9_6_nr_u8,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_nr_u16,
xnn_f32_vtanh_ukernel__avx_rational_9_6_nr_u16,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_nr_u24,
xnn_f32_vtanh_ukernel__avx_rational_9_6_nr_u24,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx_rational_9_6_nr_u32,
xnn_f32_vtanh_ukernel__avx_rational_9_6_nr_u32,
xnn_init_f32_tanh_avx_rational_9_6_params,
benchmark::utils::CheckAVX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u8,
xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_u8,
xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h3_perm_params,
Expand Down Expand Up @@ -963,6 +1071,54 @@ BENCHMARK_CAPTURE(f32_vtanh, fma_expm1minus_rr1_p6h5ts_div_u4,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_div_u8,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_div_u8,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_div_u16,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_div_u16,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_div_u24,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_div_u24,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_div_u32,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_div_u32,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_nr_u8,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_nr_u8,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_nr_u16,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_nr_u16,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_nr_u24,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_nr_u24,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, fma3_rational_9_6_nr_u32,
xnn_f32_vtanh_ukernel__fma3_rational_9_6_nr_u32,
xnn_init_f32_tanh_fma3_rational_9_6_params,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u8,
xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u8,
xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h3_perm_params,
Expand Down Expand Up @@ -1983,6 +2139,54 @@ BENCHMARK_CAPTURE(f32_vtanh, fma_expm1minus_rr1_p6h5ts_div_u4,
benchmark::utils::CheckAVX512SKX)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_div_u16,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_div_u16,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_div_u32,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_div_u32,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_div_u48,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_div_u48,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_div_u64,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_div_u64,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_nr_u16,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u16,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_nr_u32,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u32,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_nr_u48,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u48,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vtanh, avx512f_rational_9_6_nr_u64,
xnn_f32_vtanh_ukernel__avx512f_rational_9_6_nr_u64,
xnn_init_f32_tanh_avx512_rational_9_6_params,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Expand Down
40 changes: 40 additions & 0 deletions cmake/microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ SET(ALL_AVX_MICROKERNEL_SRCS
src/f32-vtanh/gen/f32-vtanh-avx-expm1minus-rr1-p6h5ts-nr2-u64.c
src/f32-vtanh/gen/f32-vtanh-avx-expm1minus-rr1-p6h5ts-nr2-u72.c
src/f32-vtanh/gen/f32-vtanh-avx-expm1minus-rr1-p6h5ts-nr2-u80.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-div-u8.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-div-u16.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-div-u24.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-div-u32.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-nr-u8.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-nr-u16.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-nr-u24.c
src/f32-vtanh/gen/f32-vtanh-avx-rational-9-6-nr-u32.c
src/f32-vunary/gen/f32-vabs-avx-u8.c
src/f32-vunary/gen/f32-vabs-avx-u16.c
src/f32-vunary/gen/f32-vneg-avx-u8.c
Expand Down Expand Up @@ -1475,6 +1483,18 @@ SET(ALL_AVX512F_MICROKERNEL_SRCS
src/f32-vsqrt/gen/f32-vsqrt-avx512f-rsqrt-u16.c
src/f32-vsqrt/gen/f32-vsqrt-avx512f-rsqrt-u32.c
src/f32-vsqrt/gen/f32-vsqrt-avx512f-rsqrt-u48.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-div-u8.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-div-u16.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-div-u24.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-div-u32.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-nr-u8.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-nr-u16.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-nr-u24.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-nr-u32.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-u8.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-u16.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-u24.c
src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-6-u32.c
src/f32-vunary/gen/f32-vabs-avx512f-u16.c
src/f32-vunary/gen/f32-vabs-avx512f-u32.c
src/f32-vunary/gen/f32-vneg-avx512f-u16.c
Expand Down Expand Up @@ -2851,6 +2871,14 @@ SET(ALL_FMA3_MICROKERNEL_SRCS
src/f32-vtanh/gen/f32-vtanh-fma3-expm1minus-rr1-p6h5ts-nr1adj-u64.c
src/f32-vtanh/gen/f32-vtanh-fma3-expm1minus-rr1-p6h5ts-nr1adj-u72.c
src/f32-vtanh/gen/f32-vtanh-fma3-expm1minus-rr1-p6h5ts-nr1adj-u80.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-div-u8.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-div-u16.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-div-u24.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-div-u32.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-nr-u8.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-nr-u16.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-nr-u24.c
src/f32-vtanh/gen/f32-vtanh-fma3-rational-9-6-nr-u32.c
src/math/f32-sqrt-fma3-nr1fma1adj.c
src/math/f32-sqrt-fma3-nr1fma.c
src/math/f32-sqrt-fma3-nr2fma.c
Expand Down Expand Up @@ -5825,6 +5853,10 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/f32-vtanh/gen/f32-vtanh-scalar-expm1minus-rr1-p6h5ts-div-u1.c
src/f32-vtanh/gen/f32-vtanh-scalar-expm1minus-rr1-p6h5ts-div-u2.c
src/f32-vtanh/gen/f32-vtanh-scalar-expm1minus-rr1-p6h5ts-div-u4.c
src/f32-vtanh/gen/f32-vtanh-scalar-rational-9-6-u1.c
src/f32-vtanh/gen/f32-vtanh-scalar-rational-9-6-u2.c
src/f32-vtanh/gen/f32-vtanh-scalar-rational-9-6-u4.c
src/f32-vtanh/gen/f32-vtanh-scalar-rational-9-6-u8.c
src/f32-vunary/gen/f32-vabs-scalar-u1.c
src/f32-vunary/gen/f32-vabs-scalar-u2.c
src/f32-vunary/gen/f32-vabs-scalar-u4.c
Expand Down Expand Up @@ -6815,6 +6847,14 @@ SET(ALL_SSE2_MICROKERNEL_SRCS
src/f32-vtanh/gen/f32-vtanh-sse2-expm1minus-rr1-p6h5ts-nr2-u8.c
src/f32-vtanh/gen/f32-vtanh-sse2-expm1minus-rr1-p6h5ts-nr2-u12.c
src/f32-vtanh/gen/f32-vtanh-sse2-expm1minus-rr1-p6h5ts-nr2-u16.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-div-u4.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-div-u8.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-div-u12.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-div-u16.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-nr-u4.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-nr-u8.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-nr-u12.c
src/f32-vtanh/gen/f32-vtanh-sse2-rational-9-6-nr-u16.c
src/math/f16-f32-cvt-sse2-int16.c
src/math/f16-f32-cvt-sse2-int32.c
src/math/f32-exp-sse2-rr2-lut64-p2.c
Expand Down

0 comments on commit 204fd5a

Please sign in to comment.