Skip to content

Commit

Permalink
Added full support of optimisation with help of SSE4.1 for convolut…
Browse files Browse the repository at this point in the history
…ion of `U8x3` images.
  • Loading branch information
Cykooz committed Oct 9, 2022
1 parent e3b64bf commit 29f9f1f
Show file tree
Hide file tree
Showing 11 changed files with 312 additions and 19 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Expand Up @@ -12,7 +12,7 @@
- associated method `component_count_of_values` renamed into `count_of_component_values`.
- All pixel types (`U8`, `U8x2`, ...) replaced by type aliases for new
generic structure `Pixel`. Use method `new()` to create
instance of one pixel.
instance of one pixel.
- Added module `color` for working with colorspace and gamma:
- Added mapper `SRGB_TO_RGB`. It is lazy static instance of `PixelComponentMapper` to
convert images from SRGB colorspace to linear RGB and back.
Expand All @@ -24,6 +24,7 @@
components in whole image.
- Added generic trait `IntoPixelComponent<Out: PixelComponent>`.
- Added generic structure `Pixel` for create all types of pixels.
- Added full support of optimisation with help of `SSE4.1` for convolution of `U8x3` images.

### Example application

Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -14,7 +14,7 @@ Supported pixel formats and available optimisations:
|:------:|:--------------------------------------------------------------|:-----------:|:-------:|:----:|
| U8 | One `u8` component per pixel (e.g. L) | + | partial | + |
| U8x2 | Two `u8` components per pixel (e.g. LA) | + | + | + |
| U8x3 | Three `u8` components per pixel (e.g. RGB) | + | partial | + |
| U8x3 | Three `u8` components per pixel (e.g. RGB) | + | + | + |
| U8x4 | Four `u8` components per pixel (e.g. RGBA, RGBx, CMYK) | + | + | + |
| U16 | One `u16` components per pixel (e.g. L16) | + | + | + |
| U16x2 | Two `u16` components per pixel (e.g. LA16) | + | + | + |
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_l.rs
@@ -1,4 +1,6 @@
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use glassbench::*;
use image::imageops;
Expand Down Expand Up @@ -46,7 +48,7 @@ pub fn bench_downscale_l(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_l16.rs
@@ -1,4 +1,6 @@
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use glassbench::*;
use image::imageops;
Expand Down Expand Up @@ -46,7 +48,7 @@ pub fn bench_downscale_l16(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_rgb.rs
@@ -1,4 +1,6 @@
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use glassbench::*;
use image::imageops;
Expand Down Expand Up @@ -45,7 +47,7 @@ pub fn bench_downscale_rgb(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_rgb16.rs
@@ -1,4 +1,6 @@
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use glassbench::*;
use image::imageops;
Expand Down Expand Up @@ -46,7 +48,7 @@ pub fn bench_downscale_rgb16(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_rgba.rs
@@ -1,4 +1,6 @@
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use glassbench::*;
use resize::px::RGBA;
Expand Down Expand Up @@ -27,7 +29,7 @@ pub fn bench_downscale_rgba(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 3 additions & 1 deletion benches/bench_compare_rgba16.rs
Expand Up @@ -3,6 +3,8 @@ use resize::px::RGBA;
use resize::Pixel::RGBA16P;
use rgb::FromSlice;
use std::num::NonZeroU32;
use std::thread::sleep;
use std::time::Duration;

use fast_image_resize::pixels::U16x4;
use fast_image_resize::{CpuExtensions, FilterType, Image, MulDiv, ResizeAlg, Resizer};
Expand All @@ -27,7 +29,7 @@ pub fn bench_downscale_rgba16(bench: &mut Bench) {
let filter = match alg_name {
"Nearest" => {
// resizer doesn't support "nearest" algorithm
task.iter(|| {});
task.iter(|| sleep(Duration::new(0, 1)));
return;
}
"Bilinear" => resize::Type::Triangle,
Expand Down
4 changes: 2 additions & 2 deletions benches/utils/mod.rs
Expand Up @@ -25,10 +25,10 @@ pub fn print_md_table(bench: &Bench) {
res_map.insert(crate_name.clone(), Vec::new());
}
if let Some(values) = res_map.get_mut(&crate_name) {
let s_value = format!("{:.2}", value);
if s_value == "0.00" {
if value < 0.10 {
values.push("-".to_string());
} else {
let s_value = format!("{:.2}", value);
values.push(s_value);
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/convolution/u8x3/mod.rs
Expand Up @@ -23,9 +23,7 @@ impl Convolution for U8x3 {
#[cfg(target_arch = "x86_64")]
CpuExtensions::Avx2 => avx2::horiz_convolution(src_image, dst_image, offset, coeffs),
#[cfg(target_arch = "x86_64")]
CpuExtensions::Sse4_1 => unsafe {
sse4::horiz_convolution(src_image, dst_image, offset, coeffs)
},
CpuExtensions::Sse4_1 => sse4::horiz_convolution(src_image, dst_image, offset, coeffs),
_ => native::horiz_convolution(src_image, dst_image, offset, coeffs),
}
}
Expand Down

0 comments on commit 29f9f1f

Please sign in to comment.