Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
324 changes: 251 additions & 73 deletions src/row/arch/neon/packed_rgb_float.rs

Large diffs are not rendered by default.

282 changes: 260 additions & 22 deletions src/row/arch/neon/tests/packed_rgb_float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ fn rgbf32_to_rgb_neon_matches_scalar_widths() {
let input = pseudo_random_rgbf32(w);
let mut out_scalar = std::vec![0u8; w * 3];
let mut out_neon = std::vec![0u8; w * 3];
scalar::rgbf32_to_rgb_row(&input, &mut out_scalar, w);
scalar::rgbf32_to_rgb_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf32_to_rgb_row(&input, &mut out_neon, w);
rgbf32_to_rgb_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -49,9 +49,9 @@ fn rgbf32_to_rgba_neon_matches_scalar_widths() {
let input = pseudo_random_rgbf32(w);
let mut out_scalar = std::vec![0u8; w * 4];
let mut out_neon = std::vec![0u8; w * 4];
scalar::rgbf32_to_rgba_row(&input, &mut out_scalar, w);
scalar::rgbf32_to_rgba_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf32_to_rgba_row(&input, &mut out_neon, w);
rgbf32_to_rgba_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -64,9 +64,9 @@ fn rgbf32_to_rgb_u16_neon_matches_scalar_widths() {
let input = pseudo_random_rgbf32(w);
let mut out_scalar = std::vec![0u16; w * 3];
let mut out_neon = std::vec![0u16; w * 3];
scalar::rgbf32_to_rgb_u16_row(&input, &mut out_scalar, w);
scalar::rgbf32_to_rgb_u16_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf32_to_rgb_u16_row(&input, &mut out_neon, w);
rgbf32_to_rgb_u16_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -79,9 +79,9 @@ fn rgbf32_to_rgba_u16_neon_matches_scalar_widths() {
let input = pseudo_random_rgbf32(w);
let mut out_scalar = std::vec![0u16; w * 4];
let mut out_neon = std::vec![0u16; w * 4];
scalar::rgbf32_to_rgba_u16_row(&input, &mut out_scalar, w);
scalar::rgbf32_to_rgba_u16_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf32_to_rgba_u16_row(&input, &mut out_neon, w);
rgbf32_to_rgba_u16_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -94,9 +94,9 @@ fn rgbf32_to_rgb_f32_neon_matches_scalar_widths() {
let input = pseudo_random_rgbf32(w);
let mut out_scalar = std::vec![0.0f32; w * 3];
let mut out_neon = std::vec![0.0f32; w * 3];
scalar::rgbf32_to_rgb_f32_row(&input, &mut out_scalar, w);
scalar::rgbf32_to_rgb_f32_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf32_to_rgb_f32_row(&input, &mut out_neon, w);
rgbf32_to_rgb_f32_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
// Lossless: output should equal input bit-exact.
Expand Down Expand Up @@ -131,9 +131,9 @@ fn neon_rgbf16_to_rgb_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![0u8; w * 3];
let mut out_neon = std::vec![0u8; w * 3];
scalar::rgbf16_to_rgb_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgb_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgb_row(&input, &mut out_neon, w);
rgbf16_to_rgb_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -152,9 +152,9 @@ fn neon_rgbf16_to_rgba_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![0u8; w * 4];
let mut out_neon = std::vec![0u8; w * 4];
scalar::rgbf16_to_rgba_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgba_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgba_row(&input, &mut out_neon, w);
rgbf16_to_rgba_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -173,9 +173,9 @@ fn neon_rgbf16_to_rgb_u16_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![0u16; w * 3];
let mut out_neon = std::vec![0u16; w * 3];
scalar::rgbf16_to_rgb_u16_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgb_u16_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgb_u16_row(&input, &mut out_neon, w);
rgbf16_to_rgb_u16_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -194,9 +194,9 @@ fn neon_rgbf16_to_rgba_u16_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![0u16; w * 4];
let mut out_neon = std::vec![0u16; w * 4];
scalar::rgbf16_to_rgba_u16_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgba_u16_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgba_u16_row(&input, &mut out_neon, w);
rgbf16_to_rgba_u16_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -215,9 +215,9 @@ fn neon_rgbf16_to_rgb_f32_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![0.0f32; w * 3];
let mut out_neon = std::vec![0.0f32; w * 3];
scalar::rgbf16_to_rgb_f32_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgb_f32_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgb_f32_row(&input, &mut out_neon, w);
rgbf16_to_rgb_f32_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
}
Expand All @@ -233,12 +233,250 @@ fn neon_rgbf16_to_rgb_f16_matches_scalar() {
let input = pseudo_random_rgbf16(w);
let mut out_scalar = std::vec![half::f16::ZERO; w * 3];
let mut out_neon = std::vec![half::f16::ZERO; w * 3];
scalar::rgbf16_to_rgb_f16_row(&input, &mut out_scalar, w);
scalar::rgbf16_to_rgb_f16_row::<false>(&input, &mut out_scalar, w);
unsafe {
rgbf16_to_rgb_f16_row(&input, &mut out_neon, w);
rgbf16_to_rgb_f16_row::<false>(&input, &mut out_neon, w);
}
assert_eq!(out_scalar, out_neon, "width {w}");
// Lossless: output should equal input bit-exact.
assert_eq!(out_neon, input[..w * 3], "lossless width {w}");
}
}

// ---- BE parity tests — Rgbf32 -----------------------------------------------
//
// For each kernel: byte-swap the LE f32 inputs into a BE buffer, call the
// kernel with `BE=true`, and assert the output matches the LE run (`BE=false`).

/// Build a BE-encoded f32 slice by byte-swapping every 32-bit element.
fn be_rgbf32(le: &[f32]) -> std::vec::Vec<f32> {
le.iter()
.map(|v| f32::from_bits(v.to_bits().swap_bytes()))
.collect()
}

/// Build a BE-encoded f16 slice by byte-swapping every 16-bit element.
fn be_rgbf16(le: &[half::f16]) -> std::vec::Vec<half::f16> {
le.iter()
.map(|v| half::f16::from_bits(v.to_bits().swap_bytes()))
.collect()
}

#[test]
#[cfg_attr(miri, ignore = "NEON SIMD intrinsics unsupported by Miri")]
fn neon_rgbf32_to_rgb_be_matches_le() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf32(w);
let be_in = be_rgbf32(&le_in);
let mut out_le = std::vec![0u8; w * 3];
let mut out_be = std::vec![0u8; w * 3];
unsafe {
rgbf32_to_rgb_row::<false>(&le_in, &mut out_le, w);
rgbf32_to_rgb_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf32_to_rgb BE parity width {w}");
}
}

#[test]
#[cfg_attr(miri, ignore = "NEON SIMD intrinsics unsupported by Miri")]
fn neon_rgbf32_to_rgba_be_matches_le() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf32(w);
let be_in = be_rgbf32(&le_in);
let mut out_le = std::vec![0u8; w * 4];
let mut out_be = std::vec![0u8; w * 4];
unsafe {
rgbf32_to_rgba_row::<false>(&le_in, &mut out_le, w);
rgbf32_to_rgba_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf32_to_rgba BE parity width {w}");
}
}

#[test]
#[cfg_attr(miri, ignore = "NEON SIMD intrinsics unsupported by Miri")]
fn neon_rgbf32_to_rgb_u16_be_matches_le() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf32(w);
let be_in = be_rgbf32(&le_in);
let mut out_le = std::vec![0u16; w * 3];
let mut out_be = std::vec![0u16; w * 3];
unsafe {
rgbf32_to_rgb_u16_row::<false>(&le_in, &mut out_le, w);
rgbf32_to_rgb_u16_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf32_to_rgb_u16 BE parity width {w}");
}
}

#[test]
#[cfg_attr(miri, ignore = "NEON SIMD intrinsics unsupported by Miri")]
fn neon_rgbf32_to_rgba_u16_be_matches_le() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf32(w);
let be_in = be_rgbf32(&le_in);
let mut out_le = std::vec![0u16; w * 4];
let mut out_be = std::vec![0u16; w * 4];
unsafe {
rgbf32_to_rgba_u16_row::<false>(&le_in, &mut out_le, w);
rgbf32_to_rgba_u16_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"NEON rgbf32_to_rgba_u16 BE parity width {w}"
);
}
}

#[test]
#[cfg_attr(miri, ignore = "NEON SIMD intrinsics unsupported by Miri")]
fn neon_rgbf32_to_rgb_f32_be_is_byteswap() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf32(w);
let be_in = be_rgbf32(&le_in);
let mut out_le = std::vec![0.0f32; w * 3];
let mut out_be = std::vec![0.0f32; w * 3];
unsafe {
rgbf32_to_rgb_f32_row::<false>(&le_in, &mut out_le, w);
rgbf32_to_rgb_f32_row::<true>(&be_in, &mut out_be, w);
}
// BE path byte-swaps each f32, producing host-native = same as LE.
assert_eq!(out_le, out_be, "NEON rgbf32_to_rgb_f32 BE parity width {w}");
}
}

// ---- BE parity tests — Rgbf16 -----------------------------------------------

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgb_be_matches_le() {
if !std::arch::is_aarch64_feature_detected!("fp16") {
return;
}
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![0u8; w * 3];
let mut out_be = std::vec![0u8; w * 3];
unsafe {
rgbf16_to_rgb_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgb_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf16_to_rgb BE parity width {w}");
}
}

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgba_be_matches_le() {
if !std::arch::is_aarch64_feature_detected!("fp16") {
return;
}
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![0u8; w * 4];
let mut out_be = std::vec![0u8; w * 4];
unsafe {
rgbf16_to_rgba_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgba_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf16_to_rgba BE parity width {w}");
}
}

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgb_u16_be_matches_le() {
if !std::arch::is_aarch64_feature_detected!("fp16") {
return;
}
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![0u16; w * 3];
let mut out_be = std::vec![0u16; w * 3];
unsafe {
rgbf16_to_rgb_u16_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgb_u16_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf16_to_rgb_u16 BE parity width {w}");
}
}

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgba_u16_be_matches_le() {
if !std::arch::is_aarch64_feature_detected!("fp16") {
return;
}
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![0u16; w * 4];
let mut out_be = std::vec![0u16; w * 4];
unsafe {
rgbf16_to_rgba_u16_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgba_u16_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(
out_le, out_be,
"NEON rgbf16_to_rgba_u16 BE parity width {w}"
);
}
}

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgb_f32_be_matches_le() {
if !std::arch::is_aarch64_feature_detected!("fp16") {
return;
}
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![0.0f32; w * 3];
let mut out_be = std::vec![0.0f32; w * 3];
unsafe {
rgbf16_to_rgb_f32_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgb_f32_row::<true>(&be_in, &mut out_be, w);
}
assert_eq!(out_le, out_be, "NEON rgbf16_to_rgb_f32 BE parity width {w}");
}
}

#[test]
#[cfg_attr(
miri,
ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
)]
fn neon_rgbf16_to_rgb_f16_be_is_byteswap() {
for w in [1usize, 4, 7, 16, 33, 1920, 1921] {
let le_in = pseudo_random_rgbf16(w);
let be_in = be_rgbf16(&le_in);
let mut out_le = std::vec![half::f16::ZERO; w * 3];
let mut out_be = std::vec![half::f16::ZERO; w * 3];
unsafe {
rgbf16_to_rgb_f16_row::<false>(&le_in, &mut out_le, w);
rgbf16_to_rgb_f16_row::<true>(&be_in, &mut out_be, w);
}
// BE byte-swap should reconstruct original LE output bit-exact.
assert_eq!(out_le, out_be, "NEON rgbf16_to_rgb_f16 BE parity width {w}");
}
}
Loading
Loading