Skip to content

Commit

Permalink
Blur: Improve parallelization
Browse files Browse the repository at this point in the history
  • Loading branch information
ferdnyc committed Feb 5, 2020
1 parent 7868157 commit 0fc94cd
Showing 1 changed file with 85 additions and 50 deletions.
135 changes: 85 additions & 50 deletions src/effects/Blur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,21 @@ std::shared_ptr<Frame> Blur::GetFrame(std::shared_ptr<Frame> frame, int64_t fram
int w = frame_image->width();
int h = frame_image->height();

// Declare 2-column arrays for each color channel
// Declare a struct of arrays for each color channel
typedef struct {
unsigned char *red;
unsigned char *green;
unsigned char *blue;
unsigned char *alpha;
} channels;

channels arrays_in {
channels arrays_a {
new unsigned char[w * h](),
new unsigned char[w * h](),
new unsigned char[w * h](),
new unsigned char[w * h]()
};
channels arrays_out {
channels arrays_b {
new unsigned char[w * h](),
new unsigned char[w * h](),
new unsigned char[w * h](),
Expand All @@ -105,75 +105,106 @@ std::shared_ptr<Frame> Blur::GetFrame(std::shared_ptr<Frame> frame, int64_t fram
for (int pixel = 0; pixel < w * h; ++pixel)
{
// Get the RGBA values from each pixel
arrays_in.red[pixel] = arrays_out.red[pixel] = pixels[pixel * 4];
arrays_in.green[pixel] = arrays_out.green[pixel] = pixels[pixel * 4 + 1];
arrays_in.blue[pixel] = arrays_out.blue[pixel] = pixels[pixel * 4 + 2];
arrays_in.alpha[pixel] = arrays_out.alpha[pixel] = pixels[pixel * 4 + 3];
arrays_a.red[pixel] = arrays_b.red[pixel] = pixels[pixel * 4];
arrays_a.green[pixel] = arrays_b.green[pixel] = pixels[pixel * 4 + 1];
arrays_a.blue[pixel] = arrays_b.blue[pixel] = pixels[pixel * 4 + 2];
arrays_a.alpha[pixel] = arrays_b.alpha[pixel] = pixels[pixel * 4 + 3];
}

// Initialize target struct pointers for boxBlur operations
channels *array_a = &arrays_in;
channels *array_b = &arrays_out;
channels *chans_a = &arrays_a;
channels *chans_b = &arrays_b;
channels *output = chans_a;

// Loop through each iteration
#pragma omp parallel for
for (int iteration = 0; iteration < iteration_value; ++iteration)
{
// HORIZONTAL BLUR (if any)
if (horizontal_radius_value > 0.0) {
// Apply horizontal blur to target RGBA channels
#pragma omp parallel
{
boxBlurH(array_a->red, array_b->red, w, h, horizontal_radius_value);
boxBlurH(array_a->green, array_b->green, w, h, horizontal_radius_value);
boxBlurH(array_a->blue, array_b->blue, w, h, horizontal_radius_value);
boxBlurH(array_a->alpha, array_b->alpha, w, h, horizontal_radius_value);
#pragma omp parallel num_threads(4) shared (chans_a, chans_b, output)
{
int id = omp_get_thread_num();
int flipped = 0;

unsigned char *in, *out;
switch(id) {
case 0: {
in = chans_a->red;
out = chans_b->red;
break;
}
case 1: {
in = chans_a->green;
out = chans_b->green;
break;
}
case 2: {
in = chans_a->blue;
out = chans_b->blue;
break;
}
case 3: {
in = chans_a->alpha;
out = chans_b->alpha;
break;
}
}

// Swap input and output arrays
channels *temp = array_a;
array_a = array_b;
array_b = temp;
}

// VERTICAL BLUR (if any)
if (vertical_radius_value > 0.0) {
// Apply vertical blur to target RGBA channels
#pragma omp parallel
{
boxBlurT(array_a->red, array_b->red, w, h, vertical_radius_value);
boxBlurT(array_a->green, array_b->green, w, h, vertical_radius_value);
boxBlurT(array_a->blue, array_b->blue, w, h, vertical_radius_value);
boxBlurT(array_a->alpha, array_b->alpha, w, h, vertical_radius_value);
// HORIZONTAL BLUR (if any)
if (horizontal_radius_value > 0.0) {
// Apply horizontal blur to target RGBA channels
boxBlurH(in, out, w, h, horizontal_radius_value);
auto *chan_temp = in;
in = out;
out = chan_temp;
// Record that we've flipped the arrays one time
++flipped;
}

// Swap input and output arrays
channels *temp = array_a;
array_a = array_b;
array_b = temp;
}
// VERTICAL BLUR (if any)
if (vertical_radius_value > 0.0) {
// Apply vertical blur to target RGBA channels
boxBlurT(in, out, w, h, vertical_radius_value);

// Record a second flip of the arrays
++flipped;
}

// Check whether we need to swap chans_a and chans_b
#pragma omp master
{
if (flipped == 1) {
// Swap input and output arrays
auto *temp = chans_a;
chans_a = chans_b;
chans_b = temp;
output = (output == chans_a ? chans_b : chans_a);
}
}
#pragma omp flush (chans_a, chans_b, output)
} // end parallel section
}

// Copy RGBA channels back to original image
#pragma omp parallel for
for (int pixel = 0; pixel < w * h; ++pixel)
{
// Combine channels
pixels[pixel * 4] = array_b->red[pixel];
pixels[pixel * 4 + 1] = array_b->green[pixel];
pixels[pixel * 4 + 2] = array_b->blue[pixel];
pixels[pixel * 4 + 3] = array_b->alpha[pixel];
pixels[pixel * 4] = output->red[pixel];
pixels[pixel * 4 + 1] = output->green[pixel];
pixels[pixel * 4 + 2] = output->blue[pixel];
pixels[pixel * 4 + 3] = output->alpha[pixel];
}

// Delete channel arrays
delete[] arrays_in.red;
delete[] arrays_in.green;
delete[] arrays_in.blue;
delete[] arrays_in.alpha;
delete[] arrays_a.red;
delete[] arrays_a.green;
delete[] arrays_a.blue;
delete[] arrays_a.alpha;

delete[] arrays_out.red;
delete[] arrays_out.green;
delete[] arrays_out.blue;
delete[] arrays_out.alpha;
delete[] arrays_b.red;
delete[] arrays_b.green;
delete[] arrays_b.blue;
delete[] arrays_b.alpha;

// return the modified frame
return frame;
Expand All @@ -182,6 +213,8 @@ std::shared_ptr<Frame> Blur::GetFrame(std::shared_ptr<Frame> frame, int64_t fram
// Credit: http://blog.ivank.net/fastest-gaussian-blur.html (MIT License)
void Blur::boxBlurH(unsigned char *scl, unsigned char *tcl, int w, int h, int r) {
float iarr = 1.0 / (r + r + 1);

#pragma omp parallel for shared (scl, tcl)
for (int i = 0; i < h; i++) {
int ti = i * w, li = ti, ri = ti + r;
int fv = scl[ti], lv = scl[ti + w - 1], val = (r + 1) * fv;
Expand All @@ -203,6 +236,8 @@ void Blur::boxBlurH(unsigned char *scl, unsigned char *tcl, int w, int h, int r)

void Blur::boxBlurT(unsigned char *scl, unsigned char *tcl, int w, int h, int r) {
float iarr = 1.0 / (r + r + 1);

#pragma omp parallel for shared (scl, tcl)
for (int i = 0; i < w; i++) {
int ti = i, li = ti, ri = ti + r * w;
int fv = scl[ti], lv = scl[ti + w * (h - 1)], val = (r + 1) * fv;
Expand Down

0 comments on commit 0fc94cd

Please sign in to comment.