Skip to content

Commit a776fd2

Browse files
committed
Update speed_filter2d_in_gray_image.cpp
1 parent 2c2e89e commit a776fd2

File tree

1 file changed

+62
-5
lines changed

1 file changed

+62
-5
lines changed

Algorithm optimization/speed_filter2d_in_gray_image.cpp

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using namespace cv;
1010
//RGB转化为灰度图
1111
Mat speed_rgb2gray(Mat src) {
1212
Mat dst(src.rows, src.cols, CV_8UC1);
13-
#pragma omp parallel for num_threads(4)
13+
//#pragma omp parallel for num_threads(4)
1414
for (int i = 0; i < src.rows; i++) {
1515
for (int j = 0; j < src.cols; j++) {
1616
dst.at<uchar>(i, j) = ((src.at<Vec3b>(i, j)[0] << 18) + (src.at<Vec3b>(i, j)[0] << 15) + (src.at<Vec3b>(i, j)[0] << 14) +
@@ -27,8 +27,8 @@ Mat speed_rgb2gray(Mat src) {
2727

2828
//A增加Pad的运算
2929
void get_Pad(int pad_Height, int pad_Width, int row, int col, float *A_pad, float *A) {
30-
int pad_x = pad_Height - row >> 1;
31-
int pad_y = pad_Width - col >> 1;
30+
int pad_x = (pad_Height - row) >> 1;
31+
int pad_y = (pad_Width - col) >> 1;
3232
printf("pad_x: %d pad_y: %d\n", pad_x, pad_y);
3333
for (int i = 0; i < pad_Height; i++) {
3434
for (int j = 0; j < pad_Width; j++) {
@@ -48,7 +48,45 @@ void get_Pad(int pad_Height, int pad_Width, int row, int col, float *A_pad, floa
4848
}
4949
}
5050

51+
//pad_A的转换,以适用于openblas,row2col的思想
52+
void convert_A(float *A_convert, const int OutHeight, const int OutWidth, const int pad_Height, const int pad_Width, float *A_pad) {
53+
for (int i = 0; i < OutHeight; i++) {
54+
for (int j = 0; j < OutWidth; j++) {
55+
int index = i * OutHeight * pad_Height + j * pad_Width;
56+
int col1 = i * pad_Height + j;
57+
//row2col展开,这里是3*3卷积,展开9次
58+
A_convert[index] = A_pad[col1];
59+
A_convert[index + 1] = A_pad[col1 + 1];
60+
A_convert[index + 2] = A_pad[col1 + 2];
5161

62+
int col2 = (i + 1) * pad_Height + j;
63+
A_convert[index + 3] = A_pad[col2];
64+
A_convert[index + 4] = A_pad[col2 + 1];
65+
A_convert[index + 5] = A_pad[col2 + 2];
66+
67+
int col3 = (i + 2) * pad_Height + j;
68+
A_convert[index + 6] = A_pad[col3];
69+
A_convert[index + 7] = A_pad[col3 + 1];
70+
A_convert[index + 8] = A_pad[col3 + 2];
71+
}
72+
}
73+
}
74+
//OpenBlas调用sgemm算法
75+
void Matrixmul_blas(const int convAh, const int convAw, float *A_convert, float *B, float *C) {
76+
const enum CBLAS_ORDER Order = CblasRowMajor;
77+
const enum CBLAS_TRANSPOSE TransA = CblasNoTrans;
78+
const enum CBLAS_TRANSPOSE TransB = CblasNoTrans;
79+
const int M = convAh;//A的行数,C的行数
80+
const int N = 1;//B的列数,C的列数
81+
const int K = convAw;//A的列数,B的行数
82+
const float alpha = 1;
83+
const float beta = 0;
84+
const int lda = K;//A的列
85+
const int ldb = N;//B的列
86+
const int ldc = N;//C的列
87+
88+
cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A_convert, lda, B, ldb, beta, C, ldc);
89+
}
5290

5391
int main() {
5492
Mat src = cv::imread("F:\\1.jpg");
@@ -68,13 +106,32 @@ int main() {
68106
//卷积核参数初始化为
69107
const int pad = (KernelHeight - 1) / 2; //需要pad的长度
70108
const int stride = 1; //卷积核滑动的步长
71-
//计算卷积输出矩阵的长宽
109+
//计算卷积输出矩阵的长宽
72110
const int OutHeight = (row - KernelHeight + 2 * pad) / stride + 1;
73111
const int OutWidth = (col - KernelWidth + 2 * pad) / stride + 1;
74112
//计算pad_A
75113
const int pad_Height = row + 2 * pad;
76114
const int pad_Width = col + 2 * pad;
77115
float *A_pad = new float[pad_Height * pad_Width];
78116
get_Pad(pad_Height, pad_Width, row, col, A_pad, A);
79-
117+
//定义被卷积矩阵宽高
118+
const int convAw = KernelHeight * KernelWidth;
119+
const int convAh = OutHeight * OutWidth;
120+
//转换被卷积矩阵
121+
float *A_convert = new float[convAh * convAw];
122+
convert_A(A_convert, OutHeight, OutWidth, pad_Height, pad_Width, A_pad);
123+
//定义卷积输出矩阵
124+
float *C = new float[convAh * 1];
125+
//sgemm算法计算输出矩阵
126+
Matrixmul_blas(convAh, convAw, A_convert, B, C);
127+
//输出验证
128+
Mat dst(OutHeight, OutWidth, CV_32FC1);
129+
for (int i = 0; i < OutHeight; i++) {
130+
for (int j = 0; j < OutWidth; j++) {
131+
dst.at<float>(i, j) = C[i * OutHeight + j];
132+
}
133+
}
134+
cv::imshow("result", dst);
135+
cv::waitKey(0);
136+
return 0;
80137
}

0 commit comments

Comments
 (0)