Skip to content

Commit d57fbff

Browse files
committed
Update speed_medianFilter_in_gray_image.cpp
1 parent 113434c commit d57fbff

File tree

2 files changed

+211
-1
lines changed

2 files changed

+211
-1
lines changed

Algorithm optimization/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22

33
- speed_filter2d_in_gray_image.cpp 使用OpenBLAS和OpenMP优化对灰度图的卷积算法(filter2D)
44
- speed_filter2d_in_rgb_image.cpp 使用OpenBLAS和OpenMP优化对RGB图的卷积算法(filter2D)
5-
- speed_meanFilter_in_gray_image.cpp 使用x86循环展开,openmp优化均值滤波算法
5+
- speed_meanFilter_in_gray_image.cpp 使用x86循环展开,openmp优化均值滤波算法
6+
7+
- speed_medianFilter_in_gray_image.cpp AVX和Openmp,优化中值滤波算法
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
#include <stdio.h>
2+
#include <iostream>
3+
#include <immintrin.h>
4+
#include <opencv2/opencv.hpp>
5+
using namespace cv;
6+
using namespace std;
7+
//所有代码针对灰度图,RGB分为3个通道处理
8+
//中值滤波串行代码
9+
Mat speed_MedianFilter(Mat src) {
10+
int row = src.rows;
11+
int col = src.cols;
12+
Mat dst(row, col, CV_8UC1);
13+
for (int i = 1; i < row - 1; i++) {
14+
for (int j = 1; j < col - 1; j++) {
15+
unsigned char a[9];
16+
a[0] = src.at<uchar>(i, j);
17+
a[1] = src.at<uchar>(i, j + 1);
18+
a[2] = src.at<uchar>(i, j - 1);
19+
20+
a[3] = src.at<uchar>(i + 1, j);
21+
a[4] = src.at<uchar>(i + 1, j + 1);
22+
a[5] = src.at<uchar>(i + 1, j - 1);
23+
24+
a[6] = src.at<uchar>(i - 1, j);
25+
a[7] = src.at<uchar>(i - 1, j + 1);
26+
a[8] = src.at<uchar>(i - 1, j - 1);
27+
for (int ii = 0; ii < 5; ii++) {
28+
for (int jj = ii + 1; jj < 9; jj++) {
29+
if (a[ii] > a[jj]) {
30+
unsigned char tmp = a[ii];
31+
a[jj] = a[ii];
32+
a[jj] = tmp;
33+
}
34+
}
35+
}
36+
dst.at<uchar>(i, j) = a[4];
37+
}
38+
}
39+
for (int i = 0; i < row; i++) {
40+
dst.at<uchar>(i, 0) = src.at<uchar>(i, 0);
41+
dst.at<uchar>(i, col - 1) = src.at<uchar>(i, col - 1);
42+
}
43+
for (int i = 0; i < col; i++) {
44+
dst.at<uchar>(0, i) = src.at<uchar>(0, i);
45+
dst.at<uchar>(row - 1, i) = src.at<uchar>(row - 1, i);
46+
}
47+
return dst;
48+
}
49+
50+
//使用AVX指令集优化中值滤波
51+
52+
Mat AVX_MedianFilter(Mat src, unsigned char * __restrict temp) {
53+
int row = src.rows;
54+
int col = src.cols;
55+
Mat dst(row, col, CV_8UC3);
56+
unsigned char * __restrict temp2 = new unsigned char[row * col];
57+
for (int i = 0; i < row; i++) {
58+
for (int j = 0; j < col; j++) {
59+
temp2[i * col + j] = src.at<uchar>(i, j);
60+
}
61+
}
62+
for (int i = 1; i < row - 1; i++) {
63+
int j;
64+
for (j = 1; j < col - 1 - 32; j += 32) {
65+
__m256i a[9];
66+
a[0] = _mm256_loadu_si256((__m256i*)(temp2 + i * col + j));
67+
a[1] = _mm256_loadu_si256((__m256i*)(temp2 + i * col + j + 1));
68+
a[2] = _mm256_loadu_si256((__m256i*)(temp2 + i * col + j - 1));
69+
70+
a[3] = _mm256_loadu_si256((__m256i*)(temp2 + (i + 1) * col + j));
71+
a[4] = _mm256_loadu_si256((__m256i*)(temp2 + (i + 1) * col + j + 1));
72+
a[5] = _mm256_loadu_si256((__m256i*)(temp2 + (i + 1) * col + j - 1));
73+
74+
a[6] = _mm256_loadu_si256((__m256i*)(temp2 + (i - 1) * col + j));
75+
a[7] = _mm256_loadu_si256((__m256i*)(temp2 + (i - 1) * col + j + 1));
76+
a[8] = _mm256_loadu_si256((__m256i*)(temp2 + (i - 1) * col + j - 1));
77+
78+
for (int ii = 0; ii < 5; ii++) {
79+
for (int jj = ii + 1; jj < 9; jj++) {
80+
__m256i larger = _mm256_max_epu8(a[ii], a[jj]);
81+
__m256i smaller = _mm256_min_epu8(a[ii], a[jj]);
82+
a[ii] = smaller;
83+
a[jj] = larger;
84+
}
85+
}
86+
_mm256_storeu_si256((__m256i*)(temp + i * col + j), a[4]);
87+
}
88+
for (int je = j; je < col - 1; je++) {
89+
unsigned char a[9];
90+
a[0] = src.at<uchar>(i, je);
91+
a[1] = src.at<uchar>(i, je + 1);
92+
a[2] = src.at<uchar>(i, je - 1);
93+
94+
a[3] = src.at<uchar>(i + 1, je);
95+
a[4] = src.at<uchar>(i + 1, je + 1);
96+
a[5] = src.at<uchar>(i + 1, je - 1);
97+
98+
a[6] = src.at<uchar>(i - 1, je);
99+
a[7] = src.at<uchar>(i - 1, je + 1);
100+
a[8] = src.at<uchar>(i - 1, je - 1);
101+
for (int ii = 0; ii < 5; ii++) {
102+
for (int jj = ii + 1; jj < 9; jj++) {
103+
unsigned char large = std::max<unsigned char>(a[ii], a[jj]);
104+
unsigned char small = std::min<unsigned char>(a[ii], a[jj]);
105+
a[ii] = small;
106+
a[jj] = large;
107+
}
108+
}
109+
temp[i * col + je] = a[4];
110+
}
111+
}
112+
for (int i = 1; i < row - 1; i++) {
113+
for (int j = 1; j < col - 1; j++) {
114+
dst.at<uchar>(i, j) = (temp[i * col + j]);
115+
}
116+
}
117+
for (int i = 0; i < row; i++) {
118+
dst.at<uchar>(i, 0) = src.at<uchar>(i, 0);
119+
dst.at<uchar>(i, col - 1) = src.at<uchar>(i, col - 1);
120+
}
121+
for (int i = 0; i < col; i++) {
122+
dst.at<uchar>(0, i) = src.at<uchar>(0, i);
123+
dst.at<uchar>(row - 1, i) = src.at<uchar>(row - 1, i);
124+
}
125+
return dst;
126+
}
127+
128+
//使用Openmp加速中值滤波,在6核Core i7 3930K上,12线程的加速比为8.4
129+
130+
Mat Openmp_MedianFilter(Mat src) {
131+
int row = src.rows;
132+
int col = src.cols;
133+
Mat dst(row, col, CV_8UC1);
134+
#pragma omp parallel default(none) shared(src, dst, row, col) num_threads(12)
135+
{
136+
#pragma omp for nowait
137+
for (int i = 1; i < row - 1; i++) {
138+
for (int j = 1; j < col - 1; j++) {
139+
unsigned char a[9];
140+
a[0] = src.at<uchar>(i, j);
141+
a[1] = src.at<uchar>(i, j + 1);
142+
a[2] = src.at<uchar>(i, j - 1);
143+
144+
a[3] = src.at<uchar>(i + 1, j);
145+
a[4] = src.at<uchar>(i + 1, j + 1);
146+
a[5] = src.at<uchar>(i + 1, j - 1);
147+
148+
a[6] = src.at<uchar>(i - 1, j);
149+
a[7] = src.at<uchar>(i - 1, j + 1);
150+
a[8] = src.at<uchar>(i - 1, j - 1);
151+
for (int ii = 0; ii < 5; ii++) {
152+
for (int jj = ii + 1; jj < 9; jj++) {
153+
if (a[ii] > a[jj]) {
154+
unsigned char tmp = a[ii];
155+
a[jj] = a[ii];
156+
a[jj] = tmp;
157+
}
158+
}
159+
}
160+
dst.at<uchar>(i, j) = a[4];
161+
}
162+
}
163+
#pragma omp for nowait
164+
for (int i = 0; i < row; i++) {
165+
dst.at<uchar>(i, 0) = src.at<uchar>(i, 0);
166+
dst.at<uchar>(i, col - 1) = src.at<uchar>(i, col - 1);
167+
}
168+
#pragma omp for nowait
169+
for (int i = 0; i < col; i++) {
170+
dst.at<uchar>(0, i) = src.at<uchar>(0, i);
171+
dst.at<uchar>(row - 1, i) = src.at<uchar>(row - 1, i);
172+
}
173+
}
174+
return dst;
175+
}
176+
177+
Mat speed_rgb2gray(Mat src) {
178+
Mat dst(src.rows, src.cols, CV_8UC1);
179+
#pragma omp parallel for num_threads(4)
180+
for (int i = 0; i < src.rows; i++) {
181+
for (int j = 0; j < src.cols; j++) {
182+
dst.at<uchar>(i, j) = ((src.at<Vec3b>(i, j)[0] << 18) + (src.at<Vec3b>(i, j)[0] << 15) + (src.at<Vec3b>(i, j)[0] << 14) +
183+
(src.at<Vec3b>(i, j)[0] << 11) + (src.at<Vec3b>(i, j)[0] << 7) + (src.at<Vec3b>(i, j)[0] << 7) + (src.at<Vec3b>(i, j)[0] << 5) +
184+
(src.at<Vec3b>(i, j)[0] << 4) + (src.at<Vec3b>(i, j)[0] << 2) +
185+
(src.at<Vec3b>(i, j)[1] << 19) + (src.at<Vec3b>(i, j)[1] << 16) + (src.at<Vec3b>(i, j)[1] << 14) + (src.at<Vec3b>(i, j)[1] << 13) +
186+
(src.at<Vec3b>(i, j)[1] << 10) + (src.at<Vec3b>(i, j)[1] << 8) + (src.at<Vec3b>(i, j)[1] << 4) + (src.at<Vec3b>(i, j)[1] << 3) + (src.at<Vec3b>(i, j)[1] << 1) +
187+
(src.at<Vec3b>(i, j)[2] << 16) + (src.at<Vec3b>(i, j)[2] << 15) + (src.at<Vec3b>(i, j)[2] << 14) + (src.at<Vec3b>(i, j)[2] << 12) +
188+
(src.at<Vec3b>(i, j)[2] << 9) + (src.at<Vec3b>(i, j)[2] << 7) + (src.at<Vec3b>(i, j)[2] << 6) + (src.at<Vec3b>(i, j)[2] << 5) + (src.at<Vec3b>(i, j)[2] << 4) + (src.at<Vec3b>(i, j)[2] << 1) >> 20);
189+
}
190+
}
191+
return dst;
192+
}
193+
194+
int main() {
195+
Mat src = cv::imread("F:\\1.jpg");
196+
src = speed_rgb2gray(src);
197+
int row = src.rows;
198+
int col = src.cols;
199+
unsigned char * __restrict temp = new unsigned char[row * col];
200+
Mat dst1 = speed_MedianFilter(src);
201+
cv::imshow("res1", dst1);
202+
Mat dst2 = AVX_MedianFilter(src, temp);
203+
cv::imshow("res2", dst2);
204+
Mat dst3 = Openmp_MedianFilter(src);
205+
cv::imshow("res3", dst3);
206+
cv::waitKey(0);
207+
return 0;
208+
}

0 commit comments

Comments
 (0)