11#include < stdio.h>
2+ #include < omp.h>
23#include < opencv2/opencv.hpp>
34
45using namespace std ;
@@ -61,7 +62,7 @@ void GetGrayIntegralImage_SSE(unsigned char *Src, int *Integral, int Width, int
6162void BoxBlur (unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Radius) {
6263 int *Integral = (int *)malloc ((Width + 1 ) * (Height + 1 ) * sizeof (int ));
6364 GetGrayIntegralImage (Src, Integral, Width, Height, Stride);
64- #pragma parallel for num_threads(4)
65+ #pragma parallel for num_threads(4)
6566 for (int Y = 0 ; Y < Height; Y++) {
6667 int Y1 = max (Y - Radius, 0 );
6768 int Y2 = min (Y + Radius + 1 , Height - 1 );
@@ -143,6 +144,39 @@ void VibranceAlgorithm_INT(unsigned char *Src, unsigned char *Dest, int Width, i
143144 }
144145}
145146
147+ void VibranceAlgorithm_INT_OpenMP (unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
148+ int VibranceAdjustment = -1.28 * Adjustment;
149+ for (int Y = 0 ; Y < Height; Y++) {
150+ unsigned char *LinePS = Src + Y * Stride;
151+ unsigned char *LinePD = Dest + Y * Stride;
152+ #pragma omp parallel for num_threads(4)
153+ for (int X = 0 ; X < Width; X++) {
154+ int Blue, Green, Red, Max;
155+ Blue = LinePS[X*3 + 0 ], Green = LinePS[X*3 + 1 ], Red = LinePS[X*3 + 2 ];
156+ int Avg = (Blue + Green + Green + Red) >> 2 ;
157+ if (Blue > Green)
158+ Max = Blue;
159+ else
160+ Max = Green;
161+ if (Red > Max)
162+ Max = Red;
163+ int AmtVal = (Max - Avg) * VibranceAdjustment;
164+ if (Blue != Max) Blue += (((Max - Blue) * AmtVal) >> 14 );
165+ if (Green != Max) Green += (((Max - Green) * AmtVal) >> 14 );
166+ if (Red != Max) Red += (((Max - Red) * AmtVal) >> 14 );
167+ if (Red < 0 ) Red = 0 ;
168+ else if (Red > 255 ) Red = 255 ;
169+ if (Green < 0 ) Green = 0 ;
170+ else if (Green > 255 ) Green = 255 ;
171+ if (Blue < 0 ) Blue = 0 ;
172+ else if (Blue > 255 ) Blue = 255 ;
173+ LinePD[X*3 + 0 ] = Blue;
174+ LinePD[X*3 + 1 ] = Green;
175+ LinePD[X*3 + 2 ] = Red;
176+ }
177+ }
178+ }
179+
146180void VibranceAlgorithm_SSE (unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
147181 int VibranceAdjustment = (int )(-1.28 * Adjustment);
148182 __m128i Adjustment128 = _mm_setr_epi16 (VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment,
@@ -173,7 +207,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
173207 Red8 = _mm_or_si128 (Red8, _mm_shuffle_epi8 (Src3, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 0 , 3 , 6 , 9 , 12 , 15 )));
174208
175209 Max8 = _mm_max_epu8 (_mm_max_epu8 (Blue8, Green8), Red8);
176-
210+
177211 BL16 = _mm_unpacklo_epi8 (Blue8, Zero);
178212 BH16 = _mm_unpackhi_epi8 (Blue8, Zero);
179213 GL16 = _mm_unpacklo_epi8 (Green8, Zero);
@@ -195,7 +229,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
195229 BH16 = _mm_adds_epi16 (BH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, BH16), 2 ), AmtVal));
196230 GH16 = _mm_adds_epi16 (GH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, GH16), 2 ), AmtVal));
197231 RH16 = _mm_adds_epi16 (RH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, RH16), 2 ), AmtVal));
198-
232+
199233 Blue8 = _mm_packus_epi16 (BL16, BH16);
200234 Green8 = _mm_packus_epi16 (GL16, GH16);
201235 Red8 = _mm_packus_epi16 (RL16, RH16);
@@ -211,12 +245,117 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
211245 Dest3 = _mm_shuffle_epi8 (Blue8, _mm_setr_epi8 (-1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 , -1 , -1 ));
212246 Dest3 = _mm_or_si128 (Dest3, _mm_shuffle_epi8 (Green8, _mm_setr_epi8 (-1 , -1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 , -1 )));
213247 Dest3 = _mm_or_si128 (Dest3, _mm_shuffle_epi8 (Red8, _mm_setr_epi8 (10 , -1 , -1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 )));
214-
248+
249+ _mm_storeu_si128 ((__m128i *)(LinePD + 0 ), Dest1);
250+ _mm_storeu_si128 ((__m128i *)(LinePD + 16 ), Dest2);
251+ _mm_storeu_si128 ((__m128i *)(LinePD + 32 ), Dest3);
252+ }
253+ for (; X < Width; X++) {
254+ int Blue, Green, Red, Max;
255+ Blue = LinePS[0 ], Green = LinePS[1 ], Red = LinePS[2 ];
256+ int Avg = (Blue + Green + Green + Red) >> 2 ;
257+ if (Blue > Green)
258+ Max = Blue;
259+ else
260+ Max = Green;
261+ if (Red > Max)
262+ Max = Red;
263+ int AmtVal = (Max - Avg) * VibranceAdjustment;
264+ if (Blue != Max) Blue += (((Max - Blue) * AmtVal) >> 14 );
265+ if (Green != Max) Green += (((Max - Green) * AmtVal) >> 14 );
266+ if (Red != Max) Red += (((Max - Red) * AmtVal) >> 14 );
267+ if (Red < 0 ) Red = 0 ;
268+ else if (Red > 255 ) Red = 255 ;
269+ if (Green < 0 ) Green = 0 ;
270+ else if (Green > 255 ) Green = 255 ;
271+ if (Blue < 0 ) Blue = 0 ;
272+ else if (Blue > 255 ) Blue = 255 ;
273+ LinePD[0 ] = Blue;
274+ LinePD[1 ] = Green;
275+ LinePD[2 ] = Red;
276+ LinePS += 3 ;
277+ LinePD += 3 ;
278+ }
279+ }
280+ }
281+
282+ void VibranceAlgorithm_SSE_OpenMP (unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Adjustment) {
283+ int VibranceAdjustment = (int )(-1.28 * Adjustment);
284+ __m128i Adjustment128 = _mm_setr_epi16 (VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment,
285+ VibranceAdjustment, VibranceAdjustment, VibranceAdjustment, VibranceAdjustment);
286+ int X;
287+ for (int Y = 0 ; Y < Height; Y++) {
288+ unsigned char *LinePS = Src + Y * Stride;
289+ unsigned char *LinePD = Dest + Y * Stride;
290+ X = 0 ;
291+ __m128i Src1, Src2, Src3, Dest1, Dest2, Dest3, Blue8, Green8, Red8, Max8;
292+ __m128i BL16, BH16, GL16, GH16, RL16, RH16, MaxL16, MaxH16, AvgL16, AvgH16, AmtVal;
293+ __m128i Zero = _mm_setzero_si128 ();
294+
295+ for (X = 0 ; X < Width - 16 ; X += 16 ) {
296+ Src1 = _mm_loadu_si128 ((__m128i *)(LinePS + 0 )); // B1,G1,R1,B2,G2,R2,B3,G3,R3,B4,G4,R4,B5,G5,R5,B6
297+ Src2 = _mm_loadu_si128 ((__m128i *)(LinePS + 16 ));// G6,R6,B7,G7,R7,B8,G8,R8,B9,G9,R9,B10,G10,R10,B11,G11
298+ Src3 = _mm_loadu_si128 ((__m128i *)(LinePS + 32 ));// R11,B12,G12,R12,B13,G13,R13,B14,G14,R14,B15,G15,R15,B16,G16,R16
299+
300+ Blue8 = _mm_shuffle_epi8 (Src1, _mm_setr_epi8 (0 , 3 , 6 , 9 , 12 , 15 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ));
301+ Blue8 = _mm_or_si128 (Blue8, _mm_shuffle_epi8 (Src2, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , 2 , 5 , 8 , 11 , 14 , -1 , -1 , -1 , -1 , -1 )));
302+ Blue8 = _mm_or_si128 (Blue8, _mm_shuffle_epi8 (Src3, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 1 , 4 , 7 , 10 , 13 )));
303+
304+ Green8 = _mm_shuffle_epi8 (Src1, _mm_setr_epi8 (1 , 4 , 7 , 10 , 13 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ));
305+ Green8 = _mm_or_si128 (Green8, _mm_shuffle_epi8 (Src2, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , 0 , 3 , 6 , 9 , 12 , 15 , -1 , -1 , -1 , -1 , -1 )));
306+ Green8 = _mm_or_si128 (Green8, _mm_shuffle_epi8 (Src3, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 2 , 5 , 8 , 11 , 14 )));
307+
308+ Red8 = _mm_shuffle_epi8 (Src1, _mm_setr_epi8 (2 , 5 , 8 , 11 , 14 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ));
309+ Red8 = _mm_or_si128 (Red8, _mm_shuffle_epi8 (Src2, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , 1 , 4 , 7 , 10 , 13 , -1 , -1 , -1 , -1 , -1 , -1 )));
310+ Red8 = _mm_or_si128 (Red8, _mm_shuffle_epi8 (Src3, _mm_setr_epi8 (-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 0 , 3 , 6 , 9 , 12 , 15 )));
311+
312+ Max8 = _mm_max_epu8 (_mm_max_epu8 (Blue8, Green8), Red8);
313+
314+ BL16 = _mm_unpacklo_epi8 (Blue8, Zero);
315+ BH16 = _mm_unpackhi_epi8 (Blue8, Zero);
316+ GL16 = _mm_unpacklo_epi8 (Green8, Zero);
317+ GH16 = _mm_unpackhi_epi8 (Green8, Zero);
318+ RL16 = _mm_unpacklo_epi8 (Red8, Zero);
319+ RH16 = _mm_unpackhi_epi8 (Red8, Zero);
320+ MaxL16 = _mm_unpacklo_epi8 (Max8, Zero);
321+ MaxH16 = _mm_unpackhi_epi8 (Max8, Zero);
322+
323+ AvgL16 = _mm_srli_epi16 (_mm_add_epi16 (_mm_add_epi16 (BL16, RL16), _mm_slli_epi16 (GL16, 1 )), 2 );
324+ AvgH16 = _mm_srli_epi16 (_mm_add_epi16 (_mm_add_epi16 (BH16, RH16), _mm_slli_epi16 (GH16, 1 )), 2 );
325+
326+ AmtVal = _mm_mullo_epi16 (_mm_sub_epi16 (MaxL16, AvgL16), Adjustment128);
327+ BL16 = _mm_adds_epi16 (BL16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxL16, BL16), 2 ), AmtVal));
328+ GL16 = _mm_adds_epi16 (GL16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxL16, GL16), 2 ), AmtVal));
329+ RL16 = _mm_adds_epi16 (RL16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxL16, RL16), 2 ), AmtVal));
330+
331+ AmtVal = _mm_mullo_epi16 (_mm_sub_epi16 (MaxH16, AvgH16), Adjustment128);
332+ BH16 = _mm_adds_epi16 (BH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, BH16), 2 ), AmtVal));
333+ GH16 = _mm_adds_epi16 (GH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, GH16), 2 ), AmtVal));
334+ RH16 = _mm_adds_epi16 (RH16, _mm_mulhi_epi16 (_mm_slli_epi16 (_mm_sub_epi16 (MaxH16, RH16), 2 ), AmtVal));
335+
336+ Blue8 = _mm_packus_epi16 (BL16, BH16);
337+ Green8 = _mm_packus_epi16 (GL16, GH16);
338+ Red8 = _mm_packus_epi16 (RL16, RH16);
339+
340+ Dest1 = _mm_shuffle_epi8 (Blue8, _mm_setr_epi8 (0 , -1 , -1 , 1 , -1 , -1 , 2 , -1 , -1 , 3 , -1 , -1 , 4 , -1 , -1 , 5 ));
341+ Dest1 = _mm_or_si128 (Dest1, _mm_shuffle_epi8 (Green8, _mm_setr_epi8 (-1 , 0 , -1 , -1 , 1 , -1 , -1 , 2 , -1 , -1 , 3 , -1 , -1 , 4 , -1 , -1 )));
342+ Dest1 = _mm_or_si128 (Dest1, _mm_shuffle_epi8 (Red8, _mm_setr_epi8 (-1 , -1 , 0 , -1 , -1 , 1 , -1 , -1 , 2 , -1 , -1 , 3 , -1 , -1 , 4 , -1 )));
343+
344+ Dest2 = _mm_shuffle_epi8 (Blue8, _mm_setr_epi8 (-1 , -1 , 6 , -1 , -1 , 7 , -1 , -1 , 8 , -1 , -1 , 9 , -1 , -1 , 10 , -1 ));
345+ Dest2 = _mm_or_si128 (Dest2, _mm_shuffle_epi8 (Green8, _mm_setr_epi8 (5 , -1 , -1 , 6 , -1 , -1 , 7 , -1 , -1 , 8 , -1 , -1 , 9 , -1 , -1 , 10 )));
346+ Dest2 = _mm_or_si128 (Dest2, _mm_shuffle_epi8 (Red8, _mm_setr_epi8 (-1 , 5 , -1 , -1 , 6 , -1 , -1 , 7 , -1 , -1 , 8 , -1 , -1 , 9 , -1 , -1 )));
347+
348+ Dest3 = _mm_shuffle_epi8 (Blue8, _mm_setr_epi8 (-1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 , -1 , -1 ));
349+ Dest3 = _mm_or_si128 (Dest3, _mm_shuffle_epi8 (Green8, _mm_setr_epi8 (-1 , -1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 , -1 )));
350+ Dest3 = _mm_or_si128 (Dest3, _mm_shuffle_epi8 (Red8, _mm_setr_epi8 (10 , -1 , -1 , 11 , -1 , -1 , 12 , -1 , -1 , 13 , -1 , -1 , 14 , -1 , -1 , 15 )));
351+
215352 _mm_storeu_si128 ((__m128i *)(LinePD + 0 ), Dest1);
216353 _mm_storeu_si128 ((__m128i *)(LinePD + 16 ), Dest2);
217354 _mm_storeu_si128 ((__m128i *)(LinePD + 32 ), Dest3);
355+ LinePS = LinePS + 48 ;
356+ LinePD = LinePD + 48 ;
218357 }
219- for (; X < Width; X ++) {
358+ for (; X < Width; X++) {
220359 int Blue, Green, Red, Max;
221360 Blue = LinePS[0 ], Green = LinePS[1 ], Red = LinePS[2 ];
222361 int Avg = (Blue + Green + Green + Red) >> 2 ;
@@ -246,7 +385,7 @@ void VibranceAlgorithm_SSE(unsigned char *Src, unsigned char *Dest, int Width, i
246385}
247386
248387int main () {
249- Mat src = imread (" F:\\ 1 .jpg" );
388+ Mat src = imread (" F:\\ car .jpg" );
250389 int Height = src.rows ;
251390 int Width = src.cols ;
252391 unsigned char *Src = src.data ;
@@ -255,12 +394,12 @@ int main() {
255394 int Radius = 11 ;
256395 int Adjustment = 50 ;
257396 int64 st = cvGetTickCount ();
258- for (int i = 0 ; i <50 ; i++) {
259- VibranceAlgorithm_SSE (Src, Dest, Width, Height, Stride, Adjustment);
397+ for (int i = 0 ; i <100 ; i++) {
398+ VibranceAlgorithm_SSE_OpenMP (Src, Dest, Width, Height, Stride, Adjustment);
260399 }
261- double duration = (cv::getTickCount () - st) / cv::getTickFrequency () * 20 ;
400+ double duration = (cv::getTickCount () - st) / cv::getTickFrequency () * 10 ;
262401 printf (" %.5f\n " , duration);
263- VibranceAlgorithm_SSE (Src, Dest, Width, Height, Stride, Adjustment);
402+ VibranceAlgorithm_SSE_OpenMP (Src, Dest, Width, Height, Stride, Adjustment);
264403 Mat dst (Height, Width, CV_8UC3, Dest);
265404 imshow (" origin" , src);
266405 imshow (" result" , dst);
0 commit comments