1+ #include < stdio.h>
2+ #include < iostream>
3+ #include < immintrin.h>
4+ #include < opencv2/opencv.hpp>
5+ using namespace cv ;
6+ using namespace std ;
7+ // 所有代码针对灰度图,RGB分为3个通道处理
8+ // 中值滤波串行代码
9+ Mat speed_MedianFilter (Mat src) {
10+ int row = src.rows ;
11+ int col = src.cols ;
12+ Mat dst (row, col, CV_8UC1);
13+ for (int i = 1 ; i < row - 1 ; i++) {
14+ for (int j = 1 ; j < col - 1 ; j++) {
15+ unsigned char a[9 ];
16+ a[0 ] = src.at <uchar>(i, j);
17+ a[1 ] = src.at <uchar>(i, j + 1 );
18+ a[2 ] = src.at <uchar>(i, j - 1 );
19+
20+ a[3 ] = src.at <uchar>(i + 1 , j);
21+ a[4 ] = src.at <uchar>(i + 1 , j + 1 );
22+ a[5 ] = src.at <uchar>(i + 1 , j - 1 );
23+
24+ a[6 ] = src.at <uchar>(i - 1 , j);
25+ a[7 ] = src.at <uchar>(i - 1 , j + 1 );
26+ a[8 ] = src.at <uchar>(i - 1 , j - 1 );
27+ for (int ii = 0 ; ii < 5 ; ii++) {
28+ for (int jj = ii + 1 ; jj < 9 ; jj++) {
29+ if (a[ii] > a[jj]) {
30+ unsigned char tmp = a[ii];
31+ a[jj] = a[ii];
32+ a[jj] = tmp;
33+ }
34+ }
35+ }
36+ dst.at <uchar>(i, j) = a[4 ];
37+ }
38+ }
39+ for (int i = 0 ; i < row; i++) {
40+ dst.at <uchar>(i, 0 ) = src.at <uchar>(i, 0 );
41+ dst.at <uchar>(i, col - 1 ) = src.at <uchar>(i, col - 1 );
42+ }
43+ for (int i = 0 ; i < col; i++) {
44+ dst.at <uchar>(0 , i) = src.at <uchar>(0 , i);
45+ dst.at <uchar>(row - 1 , i) = src.at <uchar>(row - 1 , i);
46+ }
47+ return dst;
48+ }
49+
50+ // 使用AVX指令集优化中值滤波
51+
52+ Mat AVX_MedianFilter (Mat src, unsigned char * __restrict temp) {
53+ int row = src.rows ;
54+ int col = src.cols ;
55+ Mat dst (row, col, CV_8UC3);
56+ unsigned char * __restrict temp2 = new unsigned char [row * col];
57+ for (int i = 0 ; i < row; i++) {
58+ for (int j = 0 ; j < col; j++) {
59+ temp2[i * col + j] = src.at <uchar>(i, j);
60+ }
61+ }
62+ for (int i = 1 ; i < row - 1 ; i++) {
63+ int j;
64+ for (j = 1 ; j < col - 1 - 32 ; j += 32 ) {
65+ __m256i a[9 ];
66+ a[0 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + i * col + j));
67+ a[1 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + i * col + j + 1 ));
68+ a[2 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + i * col + j - 1 ));
69+
70+ a[3 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i + 1 ) * col + j));
71+ a[4 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i + 1 ) * col + j + 1 ));
72+ a[5 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i + 1 ) * col + j - 1 ));
73+
74+ a[6 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i - 1 ) * col + j));
75+ a[7 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i - 1 ) * col + j + 1 ));
76+ a[8 ] = _mm256_loadu_si256 ((__m256i*)(temp2 + (i - 1 ) * col + j - 1 ));
77+
78+ for (int ii = 0 ; ii < 5 ; ii++) {
79+ for (int jj = ii + 1 ; jj < 9 ; jj++) {
80+ __m256i larger = _mm256_max_epu8 (a[ii], a[jj]);
81+ __m256i smaller = _mm256_min_epu8 (a[ii], a[jj]);
82+ a[ii] = smaller;
83+ a[jj] = larger;
84+ }
85+ }
86+ _mm256_storeu_si256 ((__m256i*)(temp + i * col + j), a[4 ]);
87+ }
88+ for (int je = j; je < col - 1 ; je++) {
89+ unsigned char a[9 ];
90+ a[0 ] = src.at <uchar>(i, je);
91+ a[1 ] = src.at <uchar>(i, je + 1 );
92+ a[2 ] = src.at <uchar>(i, je - 1 );
93+
94+ a[3 ] = src.at <uchar>(i + 1 , je);
95+ a[4 ] = src.at <uchar>(i + 1 , je + 1 );
96+ a[5 ] = src.at <uchar>(i + 1 , je - 1 );
97+
98+ a[6 ] = src.at <uchar>(i - 1 , je);
99+ a[7 ] = src.at <uchar>(i - 1 , je + 1 );
100+ a[8 ] = src.at <uchar>(i - 1 , je - 1 );
101+ for (int ii = 0 ; ii < 5 ; ii++) {
102+ for (int jj = ii + 1 ; jj < 9 ; jj++) {
103+ unsigned char large = std::max<unsigned char >(a[ii], a[jj]);
104+ unsigned char small = std::min<unsigned char >(a[ii], a[jj]);
105+ a[ii] = small;
106+ a[jj] = large;
107+ }
108+ }
109+ temp[i * col + je] = a[4 ];
110+ }
111+ }
112+ for (int i = 1 ; i < row - 1 ; i++) {
113+ for (int j = 1 ; j < col - 1 ; j++) {
114+ dst.at <uchar>(i, j) = (temp[i * col + j]);
115+ }
116+ }
117+ for (int i = 0 ; i < row; i++) {
118+ dst.at <uchar>(i, 0 ) = src.at <uchar>(i, 0 );
119+ dst.at <uchar>(i, col - 1 ) = src.at <uchar>(i, col - 1 );
120+ }
121+ for (int i = 0 ; i < col; i++) {
122+ dst.at <uchar>(0 , i) = src.at <uchar>(0 , i);
123+ dst.at <uchar>(row - 1 , i) = src.at <uchar>(row - 1 , i);
124+ }
125+ return dst;
126+ }
127+
128+ // 使用Openmp加速中值滤波,在6核Core i7 3930K上,12线程的加速比为8.4
129+
130+ Mat Openmp_MedianFilter (Mat src) {
131+ int row = src.rows ;
132+ int col = src.cols ;
133+ Mat dst (row, col, CV_8UC1);
134+ #pragma omp parallel default(none) shared(src, dst, row, col) num_threads(12)
135+ {
136+ #pragma omp for nowait
137+ for (int i = 1 ; i < row - 1 ; i++) {
138+ for (int j = 1 ; j < col - 1 ; j++) {
139+ unsigned char a[9 ];
140+ a[0 ] = src.at <uchar>(i, j);
141+ a[1 ] = src.at <uchar>(i, j + 1 );
142+ a[2 ] = src.at <uchar>(i, j - 1 );
143+
144+ a[3 ] = src.at <uchar>(i + 1 , j);
145+ a[4 ] = src.at <uchar>(i + 1 , j + 1 );
146+ a[5 ] = src.at <uchar>(i + 1 , j - 1 );
147+
148+ a[6 ] = src.at <uchar>(i - 1 , j);
149+ a[7 ] = src.at <uchar>(i - 1 , j + 1 );
150+ a[8 ] = src.at <uchar>(i - 1 , j - 1 );
151+ for (int ii = 0 ; ii < 5 ; ii++) {
152+ for (int jj = ii + 1 ; jj < 9 ; jj++) {
153+ if (a[ii] > a[jj]) {
154+ unsigned char tmp = a[ii];
155+ a[jj] = a[ii];
156+ a[jj] = tmp;
157+ }
158+ }
159+ }
160+ dst.at <uchar>(i, j) = a[4 ];
161+ }
162+ }
163+ #pragma omp for nowait
164+ for (int i = 0 ; i < row; i++) {
165+ dst.at <uchar>(i, 0 ) = src.at <uchar>(i, 0 );
166+ dst.at <uchar>(i, col - 1 ) = src.at <uchar>(i, col - 1 );
167+ }
168+ #pragma omp for nowait
169+ for (int i = 0 ; i < col; i++) {
170+ dst.at <uchar>(0 , i) = src.at <uchar>(0 , i);
171+ dst.at <uchar>(row - 1 , i) = src.at <uchar>(row - 1 , i);
172+ }
173+ }
174+ return dst;
175+ }
176+
177+ Mat speed_rgb2gray (Mat src) {
178+ Mat dst (src.rows , src.cols , CV_8UC1);
179+ #pragma omp parallel for num_threads(4)
180+ for (int i = 0 ; i < src.rows ; i++) {
181+ for (int j = 0 ; j < src.cols ; j++) {
182+ dst.at <uchar>(i, j) = ((src.at <Vec3b>(i, j)[0 ] << 18 ) + (src.at <Vec3b>(i, j)[0 ] << 15 ) + (src.at <Vec3b>(i, j)[0 ] << 14 ) +
183+ (src.at <Vec3b>(i, j)[0 ] << 11 ) + (src.at <Vec3b>(i, j)[0 ] << 7 ) + (src.at <Vec3b>(i, j)[0 ] << 7 ) + (src.at <Vec3b>(i, j)[0 ] << 5 ) +
184+ (src.at <Vec3b>(i, j)[0 ] << 4 ) + (src.at <Vec3b>(i, j)[0 ] << 2 ) +
185+ (src.at <Vec3b>(i, j)[1 ] << 19 ) + (src.at <Vec3b>(i, j)[1 ] << 16 ) + (src.at <Vec3b>(i, j)[1 ] << 14 ) + (src.at <Vec3b>(i, j)[1 ] << 13 ) +
186+ (src.at <Vec3b>(i, j)[1 ] << 10 ) + (src.at <Vec3b>(i, j)[1 ] << 8 ) + (src.at <Vec3b>(i, j)[1 ] << 4 ) + (src.at <Vec3b>(i, j)[1 ] << 3 ) + (src.at <Vec3b>(i, j)[1 ] << 1 ) +
187+ (src.at <Vec3b>(i, j)[2 ] << 16 ) + (src.at <Vec3b>(i, j)[2 ] << 15 ) + (src.at <Vec3b>(i, j)[2 ] << 14 ) + (src.at <Vec3b>(i, j)[2 ] << 12 ) +
188+ (src.at <Vec3b>(i, j)[2 ] << 9 ) + (src.at <Vec3b>(i, j)[2 ] << 7 ) + (src.at <Vec3b>(i, j)[2 ] << 6 ) + (src.at <Vec3b>(i, j)[2 ] << 5 ) + (src.at <Vec3b>(i, j)[2 ] << 4 ) + (src.at <Vec3b>(i, j)[2 ] << 1 ) >> 20 );
189+ }
190+ }
191+ return dst;
192+ }
193+
194+ int main () {
195+ Mat src = cv::imread (" F:\\ 1.jpg" );
196+ src = speed_rgb2gray (src);
197+ int row = src.rows ;
198+ int col = src.cols ;
199+ unsigned char * __restrict temp = new unsigned char [row * col];
200+ Mat dst1 = speed_MedianFilter (src);
201+ cv::imshow (" res1" , dst1);
202+ Mat dst2 = AVX_MedianFilter (src, temp);
203+ cv::imshow (" res2" , dst2);
204+ Mat dst3 = Openmp_MedianFilter (src);
205+ cv::imshow (" res3" , dst3);
206+ cv::waitKey (0 );
207+ return 0 ;
208+ }
0 commit comments