Added full support of optimisation with help of SSE4.1 for convolut…

…ion of `U8x3` images.
Cykooz · Oct 9, 2022 · 29f9f1f · 29f9f1f
1 parent e3b64bf
commit 29f9f1f
Show file tree

Hide file tree

Showing 11 changed files with 312 additions and 19 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,7 +12,7 @@
     - associated method `component_count_of_values` renamed into `count_of_component_values`.
   - All pixel types (`U8`, `U8x2`, ...) replaced by type aliases for new 
     generic structure `Pixel`. Use method `new()` to create 
-    instance of one pixel. 
+    instance of one pixel.
 - Added module `color` for working with colorspace and gamma:
   - Added mapper `SRGB_TO_RGB`. It is lazy static instance of `PixelComponentMapper` to 
     convert images from SRGB colorspace to linear RGB and back.
@@ -24,6 +24,7 @@
   components in whole image.
 - Added generic trait `IntoPixelComponent<Out: PixelComponent>`.
 - Added generic structure `Pixel` for create all types of pixels.
+- Added full support of optimisation with help of `SSE4.1` for convolution of `U8x3` images.
 
 ### Example application
 

diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ Supported pixel formats and available optimisations:
 |:------:|:--------------------------------------------------------------|:-----------:|:-------:|:----:|
 |   U8   | One `u8` component per pixel (e.g. L)                         |      +      | partial |  +   |
 |  U8x2  | Two `u8` components per pixel (e.g. LA)                       |      +      |    +    |  +   |
-|  U8x3  | Three `u8` components per pixel (e.g. RGB)                    |      +      | partial |  +   |
+|  U8x3  | Three `u8` components per pixel (e.g. RGB)                    |      +      |    +    |  +   |
 |  U8x4  | Four `u8` components per pixel (e.g. RGBA, RGBx, CMYK)        |      +      |    +    |  +   |
 |  U16   | One `u16` components per pixel (e.g. L16)                     |      +      |    +    |  +   |
 | U16x2  | Two `u16` components per pixel (e.g. LA16)                    |      +      |    +    |  +   |

diff --git a/benches/bench_compare_l.rs b/benches/bench_compare_l.rs
@@ -1,4 +1,6 @@
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use glassbench::*;
 use image::imageops;
@@ -46,7 +48,7 @@ pub fn bench_downscale_l(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/bench_compare_l16.rs b/benches/bench_compare_l16.rs
@@ -1,4 +1,6 @@
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use glassbench::*;
 use image::imageops;
@@ -46,7 +48,7 @@ pub fn bench_downscale_l16(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/bench_compare_rgb.rs b/benches/bench_compare_rgb.rs
@@ -1,4 +1,6 @@
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use glassbench::*;
 use image::imageops;
@@ -45,7 +47,7 @@ pub fn bench_downscale_rgb(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/bench_compare_rgb16.rs b/benches/bench_compare_rgb16.rs
@@ -1,4 +1,6 @@
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use glassbench::*;
 use image::imageops;
@@ -46,7 +48,7 @@ pub fn bench_downscale_rgb16(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/bench_compare_rgba.rs b/benches/bench_compare_rgba.rs
@@ -1,4 +1,6 @@
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use glassbench::*;
 use resize::px::RGBA;
@@ -27,7 +29,7 @@ pub fn bench_downscale_rgba(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/bench_compare_rgba16.rs b/benches/bench_compare_rgba16.rs
@@ -3,6 +3,8 @@ use resize::px::RGBA;
 use resize::Pixel::RGBA16P;
 use rgb::FromSlice;
 use std::num::NonZeroU32;
+use std::thread::sleep;
+use std::time::Duration;
 
 use fast_image_resize::pixels::U16x4;
 use fast_image_resize::{CpuExtensions, FilterType, Image, MulDiv, ResizeAlg, Resizer};
@@ -27,7 +29,7 @@ pub fn bench_downscale_rgba16(bench: &mut Bench) {
             let filter = match alg_name {
                 "Nearest" => {
                     // resizer doesn't support "nearest" algorithm
-                    task.iter(|| {});
+                    task.iter(|| sleep(Duration::new(0, 1)));
                     return;
                 }
                 "Bilinear" => resize::Type::Triangle,

diff --git a/benches/utils/mod.rs b/benches/utils/mod.rs
@@ -25,10 +25,10 @@ pub fn print_md_table(bench: &Bench) {
                 res_map.insert(crate_name.clone(), Vec::new());
             }
             if let Some(values) = res_map.get_mut(&crate_name) {
-                let s_value = format!("{:.2}", value);
-                if s_value == "0.00" {
+                if value < 0.10 {
                     values.push("-".to_string());
                 } else {
+                    let s_value = format!("{:.2}", value);
                     values.push(s_value);
                 }
             }

diff --git a/src/convolution/u8x3/mod.rs b/src/convolution/u8x3/mod.rs
@@ -23,9 +23,7 @@ impl Convolution for U8x3 {
             #[cfg(target_arch = "x86_64")]
             CpuExtensions::Avx2 => avx2::horiz_convolution(src_image, dst_image, offset, coeffs),
             #[cfg(target_arch = "x86_64")]
-            CpuExtensions::Sse4_1 => unsafe {
-                sse4::horiz_convolution(src_image, dst_image, offset, coeffs)
-            },
+            CpuExtensions::Sse4_1 => sse4::horiz_convolution(src_image, dst_image, offset, coeffs),
             _ => native::horiz_convolution(src_image, dst_image, offset, coeffs),
         }
     }