Skip to content

Commit cb23fed

Browse files
committed
Add speed_vibrance_algorithm.cpp
1 parent 9e9d395 commit cb23fed

File tree

1 file changed

+149
-10
lines changed

1 file changed

+149
-10
lines changed

speed_vibrance_algorithm.cpp

Lines changed: 149 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <stdio.h>
2+
#include <omp.h>
23
#include <opencv2/opencv.hpp>
34

45
using namespace std;
@@ -61,7 +62,7 @@ void GetGrayIntegralImage_SSE(unsigned char *Src, int *Integral, int Width, int
6162
void BoxBlur(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Radius) {
6263
int *Integral = (int *)malloc((Width + 1) * (Height + 1) * sizeof(int));
6364
GetGrayIntegralImage(Src, Integral, Width, Height, Stride);
64-
#pragma parallel for num_threads(4)
65+
#pragma parallel for num_threads(4)
6566
for (int Y = 0; Y < Height; Y++) {
6667
int Y1 = max(Y - Radius, 0);
6768
int Y2 = min(Y + Radius + 1, Height - 1);
@@ -143,6 +144,39 @@ void VibranceAlgorithm_INT(unsigned char *Src, unsigned char *Dest, int Width, i
143144
}
144145
}
145146

147+
void VibranceAlgorithm_INT_OpenMP(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
148+
int VibranceAdjustment = -1.28 * Adjustment;
149+
for (int Y = 0; Y < Height; Y++) {
150+
unsigned char *LinePS = Src + Y * Stride;
151+
unsigned char *LinePD = Dest + Y * Stride;
152+
#pragma omp parallel for num_threads(4)
153+
for (int X = 0; X < Width; X++) {
154+
int Blue, Green, Red, Max;
155+
Blue = LinePS[X*3 + 0], Green = LinePS[X*3 + 1], Red = LinePS[X*3 + 2];
156+
int Avg = (Blue + Green + Green + Red) >> 2;
157+
if (Blue > Green)
158+
Max = Blue;
159+
else
160+
Max = Green;
161+
if (Red > Max)
162+
Max = Red;
163+
int AmtVal = (Max - Avg) * VibranceAdjustment;
164+
if (Blue != Max) Blue += (((Max - Blue) * AmtVal) >> 14);
165+
if (Green != Max) Green += (((Max - Green) * AmtVal) >> 14);
166+
if (Red != Max) Red += (((Max - Red) * AmtVal) >> 14);
167+
if (Red < 0) Red = 0;
168+
else if (Red > 255) Red = 255;
169+
if (Green < 0) Green = 0;
170+
else if (Green > 255) Green = 255;
171+
if (Blue < 0) Blue = 0;
172+
else if (Blue > 255) Blue = 255;
173+
LinePD[X*3 + 0] = Blue;
174+
LinePD[X*3 + 1] = Green;
175+
LinePD[X*3 + 2] = Red;
176+
}
177+
}
178+
}
179+
146180
void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
147181
int VibranceAdjustment = (int)(-1.28 * Adjustment);
148182
__m128i Adjustment128 = _mm_setr_epi16(VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment,
@@ -173,7 +207,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
173207
Red8 = _mm_or_si128(Red8, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 3, 6, 9, 12, 15)));
174208

175209
Max8 = _mm_max_epu8(_mm_max_epu8(Blue8, Green8), Red8);
176-
210+
177211
BL16 = _mm_unpacklo_epi8(Blue8, Zero);
178212
BH16 = _mm_unpackhi_epi8(Blue8, Zero);
179213
GL16 = _mm_unpacklo_epi8(Green8, Zero);
@@ -195,7 +229,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
195229
BH16 = _mm_adds_epi16(BH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, BH16), 2), AmtVal));
196230
GH16 = _mm_adds_epi16(GH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, GH16), 2), AmtVal));
197231
RH16 = _mm_adds_epi16(RH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, RH16), 2), AmtVal));
198-
232+
199233
Blue8 = _mm_packus_epi16(BL16, BH16);
200234
Green8 = _mm_packus_epi16(GL16, GH16);
201235
Red8 = _mm_packus_epi16(RL16, RH16);
@@ -211,12 +245,117 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
211245
Dest3 = _mm_shuffle_epi8(Blue8, _mm_setr_epi8(-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1));
212246
Dest3 = _mm_or_si128(Dest3, _mm_shuffle_epi8(Green8, _mm_setr_epi8(-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1)));
213247
Dest3 = _mm_or_si128(Dest3, _mm_shuffle_epi8(Red8, _mm_setr_epi8(10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15)));
214-
248+
249+
_mm_storeu_si128((__m128i *)(LinePD + 0), Dest1);
250+
_mm_storeu_si128((__m128i *)(LinePD + 16), Dest2);
251+
_mm_storeu_si128((__m128i *)(LinePD + 32), Dest3);
252+
}
253+
for (; X < Width; X++) {
254+
int Blue, Green, Red, Max;
255+
Blue = LinePS[0], Green = LinePS[1], Red = LinePS[2];
256+
int Avg = (Blue + Green + Green + Red) >> 2;
257+
if (Blue > Green)
258+
Max = Blue;
259+
else
260+
Max = Green;
261+
if (Red > Max)
262+
Max = Red;
263+
int AmtVal = (Max - Avg) * VibranceAdjustment;
264+
if (Blue != Max) Blue += (((Max - Blue) * AmtVal) >> 14);
265+
if (Green != Max) Green += (((Max - Green) * AmtVal) >> 14);
266+
if (Red != Max) Red += (((Max - Red) * AmtVal) >> 14);
267+
if (Red < 0) Red = 0;
268+
else if (Red > 255) Red = 255;
269+
if (Green < 0) Green = 0;
270+
else if (Green > 255) Green = 255;
271+
if (Blue < 0) Blue = 0;
272+
else if (Blue > 255) Blue = 255;
273+
LinePD[0] = Blue;
274+
LinePD[1] = Green;
275+
LinePD[2] = Red;
276+
LinePS += 3;
277+
LinePD += 3;
278+
}
279+
}
280+
}
281+
282+
void VibranceAlgorithm_SSE_OpenMP(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
283+
int VibranceAdjustment = (int)(-1.28 * Adjustment);
284+
__m128i Adjustment128 = _mm_setr_epi16(VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment,
285+
VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment);
286+
int X;
287+
for (int Y = 0; Y < Height; Y++) {
288+
unsigned char *LinePS = Src + Y * Stride;
289+
unsigned char *LinePD = Dest + Y * Stride;
290+
X = 0;
291+
__m128i Src1, Src2, Src3, Dest1, Dest2, Dest3, Blue8, Green8, Red8, Max8;
292+
__m128i BL16, BH16, GL16, GH16, RL16, RH16, MaxL16, MaxH16, AvgL16, AvgH16, AmtVal;
293+
__m128i Zero = _mm_setzero_si128();
294+
295+
for (X = 0; X < Width - 16; X += 16) {
296+
Src1 = _mm_loadu_si128((__m128i *)(LinePS + 0)); //B1,G1,R1,B2,G2,R2,B3,G3,R3,B4,G4,R4,B5,G5,R5,B6
297+
Src2 = _mm_loadu_si128((__m128i *)(LinePS + 16));//G6,R6,B7,G7,R7,B8,G8,R8,B9,G9,R9,B10,G10,R10,B11,G11
298+
Src3 = _mm_loadu_si128((__m128i *)(LinePS + 32));//R11,B12,G12,R12,B13,G13,R13,B14,G14,R14,B15,G15,R15,B16,G16,R16
299+
300+
Blue8 = _mm_shuffle_epi8(Src1, _mm_setr_epi8(0, 3, 6, 9, 12, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
301+
Blue8 = _mm_or_si128(Blue8, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, 2, 5, 8, 11, 14, -1, -1, -1, -1, -1)));
302+
Blue8 = _mm_or_si128(Blue8, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 4, 7, 10, 13)));
303+
304+
Green8 = _mm_shuffle_epi8(Src1, _mm_setr_epi8(1, 4, 7, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
305+
Green8 = _mm_or_si128(Green8, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, 0, 3, 6, 9, 12, 15, -1, -1, -1, -1, -1)));
306+
Green8 = _mm_or_si128(Green8, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, 5, 8, 11, 14)));
307+
308+
Red8 = _mm_shuffle_epi8(Src1, _mm_setr_epi8(2, 5, 8, 11, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
309+
Red8 = _mm_or_si128(Red8, _mm_shuffle_epi8(Src2, _mm_setr_epi8(-1, -1, -1, -1, -1, 1, 4, 7, 10, 13, -1, -1, -1, -1, -1, -1)));
310+
Red8 = _mm_or_si128(Red8, _mm_shuffle_epi8(Src3, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 3, 6, 9, 12, 15)));
311+
312+
Max8 = _mm_max_epu8(_mm_max_epu8(Blue8, Green8), Red8);
313+
314+
BL16 = _mm_unpacklo_epi8(Blue8, Zero);
315+
BH16 = _mm_unpackhi_epi8(Blue8, Zero);
316+
GL16 = _mm_unpacklo_epi8(Green8, Zero);
317+
GH16 = _mm_unpackhi_epi8(Green8, Zero);
318+
RL16 = _mm_unpacklo_epi8(Red8, Zero);
319+
RH16 = _mm_unpackhi_epi8(Red8, Zero);
320+
MaxL16 = _mm_unpacklo_epi8(Max8, Zero);
321+
MaxH16 = _mm_unpackhi_epi8(Max8, Zero);
322+
323+
AvgL16 = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(BL16, RL16), _mm_slli_epi16(GL16, 1)), 2);
324+
AvgH16 = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(BH16, RH16), _mm_slli_epi16(GH16, 1)), 2);
325+
326+
AmtVal = _mm_mullo_epi16(_mm_sub_epi16(MaxL16, AvgL16), Adjustment128);
327+
BL16 = _mm_adds_epi16(BL16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxL16, BL16), 2), AmtVal));
328+
GL16 = _mm_adds_epi16(GL16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxL16, GL16), 2), AmtVal));
329+
RL16 = _mm_adds_epi16(RL16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxL16, RL16), 2), AmtVal));
330+
331+
AmtVal = _mm_mullo_epi16(_mm_sub_epi16(MaxH16, AvgH16), Adjustment128);
332+
BH16 = _mm_adds_epi16(BH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, BH16), 2), AmtVal));
333+
GH16 = _mm_adds_epi16(GH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, GH16), 2), AmtVal));
334+
RH16 = _mm_adds_epi16(RH16, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(MaxH16, RH16), 2), AmtVal));
335+
336+
Blue8 = _mm_packus_epi16(BL16, BH16);
337+
Green8 = _mm_packus_epi16(GL16, GH16);
338+
Red8 = _mm_packus_epi16(RL16, RH16);
339+
340+
Dest1 = _mm_shuffle_epi8(Blue8, _mm_setr_epi8(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5));
341+
Dest1 = _mm_or_si128(Dest1, _mm_shuffle_epi8(Green8, _mm_setr_epi8(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1)));
342+
Dest1 = _mm_or_si128(Dest1, _mm_shuffle_epi8(Red8, _mm_setr_epi8(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1)));
343+
344+
Dest2 = _mm_shuffle_epi8(Blue8, _mm_setr_epi8(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1));
345+
Dest2 = _mm_or_si128(Dest2, _mm_shuffle_epi8(Green8, _mm_setr_epi8(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10)));
346+
Dest2 = _mm_or_si128(Dest2, _mm_shuffle_epi8(Red8, _mm_setr_epi8(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1)));
347+
348+
Dest3 = _mm_shuffle_epi8(Blue8, _mm_setr_epi8(-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1));
349+
Dest3 = _mm_or_si128(Dest3, _mm_shuffle_epi8(Green8, _mm_setr_epi8(-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1)));
350+
Dest3 = _mm_or_si128(Dest3, _mm_shuffle_epi8(Red8, _mm_setr_epi8(10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15)));
351+
215352
_mm_storeu_si128((__m128i *)(LinePD + 0), Dest1);
216353
_mm_storeu_si128((__m128i *)(LinePD + 16), Dest2);
217354
_mm_storeu_si128((__m128i *)(LinePD + 32), Dest3);
355+
LinePS = LinePS + 48;
356+
LinePD = LinePD + 48;
218357
}
219-
for (; X < Width; X ++) {
358+
for (; X < Width; X++) {
220359
int Blue, Green, Red, Max;
221360
Blue = LinePS[0], Green = LinePS[1], Red = LinePS[2];
222361
int Avg = (Blue + Green + Green + Red) >> 2;
@@ -246,7 +385,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
246385
}
247386

248387
int main() {
249-
Mat src = imread("F:\\1.jpg");
388+
Mat src = imread("F:\\car.jpg");
250389
int Height = src.rows;
251390
int Width = src.cols;
252391
unsigned char *Src = src.data;
@@ -255,12 +394,12 @@ int main() {
255394
int Radius = 11;
256395
int Adjustment = 50;
257396
int64 st = cvGetTickCount();
258-
for (int i = 0; i <50; i++) {
259-
VibranceAlgorithm_SSE(Src, Dest, Width, Height, Stride, Adjustment);
397+
for (int i = 0; i <100; i++) {
398+
VibranceAlgorithm_SSE_OpenMP(Src, Dest, Width, Height, Stride, Adjustment);
260399
}
261-
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 20;
400+
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 10;
262401
printf("%.5f\n", duration);
263-
VibranceAlgorithm_SSE(Src, Dest, Width, Height, Stride, Adjustment);
402+
VibranceAlgorithm_SSE_OpenMP(Src, Dest, Width, Height, Stride, Adjustment);
264403
Mat dst(Height, Width, CV_8UC3, Dest);
265404
imshow("origin", src);
266405
imshow("result", dst);

0 commit comments

Comments
 (0)