@@ -10,7 +10,7 @@ using namespace cv;
1010// RGB转化为灰度图
1111Mat speed_rgb2gray (Mat src) {
1212 Mat dst (src.rows , src.cols , CV_8UC1);
13- #pragma omp parallel for num_threads(4)
13+ // #pragma omp parallel for num_threads(4)
1414 for (int i = 0 ; i < src.rows ; i++) {
1515 for (int j = 0 ; j < src.cols ; j++) {
1616 dst.at <uchar>(i, j) = ((src.at <Vec3b>(i, j)[0 ] << 18 ) + (src.at <Vec3b>(i, j)[0 ] << 15 ) + (src.at <Vec3b>(i, j)[0 ] << 14 ) +
@@ -27,8 +27,8 @@ Mat speed_rgb2gray(Mat src) {
2727
2828// A增加Pad的运算
2929void get_Pad (int pad_Height, int pad_Width, int row, int col, float *A_pad, float *A) {
30- int pad_x = pad_Height - row >> 1 ;
31- int pad_y = pad_Width - col >> 1 ;
30+ int pad_x = ( pad_Height - row) >> 1 ;
31+ int pad_y = ( pad_Width - col) >> 1 ;
3232 printf (" pad_x: %d pad_y: %d\n " , pad_x, pad_y);
3333 for (int i = 0 ; i < pad_Height; i++) {
3434 for (int j = 0 ; j < pad_Width; j++) {
@@ -48,7 +48,45 @@ void get_Pad(int pad_Height, int pad_Width, int row, int col, float *A_pad, floa
4848 }
4949}
5050
51+ // pad_A的转换,以适用于openblas,row2col的思想
52+ void convert_A (float *A_convert, const int OutHeight, const int OutWidth, const int pad_Height, const int pad_Width, float *A_pad) {
53+ for (int i = 0 ; i < OutHeight; i++) {
54+ for (int j = 0 ; j < OutWidth; j++) {
55+ int index = i * OutHeight * pad_Height + j * pad_Width;
56+ int col1 = i * pad_Height + j;
57+ // row2col展开,这里是3*3卷积,展开9次
58+ A_convert[index] = A_pad[col1];
59+ A_convert[index + 1 ] = A_pad[col1 + 1 ];
60+ A_convert[index + 2 ] = A_pad[col1 + 2 ];
5161
62+ int col2 = (i + 1 ) * pad_Height + j;
63+ A_convert[index + 3 ] = A_pad[col2];
64+ A_convert[index + 4 ] = A_pad[col2 + 1 ];
65+ A_convert[index + 5 ] = A_pad[col2 + 2 ];
66+
67+ int col3 = (i + 2 ) * pad_Height + j;
68+ A_convert[index + 6 ] = A_pad[col3];
69+ A_convert[index + 7 ] = A_pad[col3 + 1 ];
70+ A_convert[index + 8 ] = A_pad[col3 + 2 ];
71+ }
72+ }
73+ }
74+ // OpenBlas调用sgemm算法
75+ void Matrixmul_blas (const int convAh, const int convAw, float *A_convert, float *B, float *C) {
76+ const enum CBLAS_ORDER Order = CblasRowMajor;
77+ const enum CBLAS_TRANSPOSE TransA = CblasNoTrans;
78+ const enum CBLAS_TRANSPOSE TransB = CblasNoTrans;
79+ const int M = convAh;// A的行数,C的行数
80+ const int N = 1 ;// B的列数,C的列数
81+ const int K = convAw;// A的列数,B的行数
82+ const float alpha = 1 ;
83+ const float beta = 0 ;
84+ const int lda = K;// A的列
85+ const int ldb = N;// B的列
86+ const int ldc = N;// C的列
87+
88+ cblas_sgemm (Order, TransA, TransB, M, N, K, alpha, A_convert, lda, B, ldb, beta, C, ldc);
89+ }
5290
5391int main () {
5492 Mat src = cv::imread (" F:\\ 1.jpg" );
@@ -68,13 +106,32 @@ int main() {
68106 // 卷积核参数初始化为
69107 const int pad = (KernelHeight - 1 ) / 2 ; // 需要pad的长度
70108 const int stride = 1 ; // 卷积核滑动的步长
71- // 计算卷积输出矩阵的长宽
109+ // 计算卷积输出矩阵的长宽
72110 const int OutHeight = (row - KernelHeight + 2 * pad) / stride + 1 ;
73111 const int OutWidth = (col - KernelWidth + 2 * pad) / stride + 1 ;
74112 // 计算pad_A
75113 const int pad_Height = row + 2 * pad;
76114 const int pad_Width = col + 2 * pad;
77115 float *A_pad = new float [pad_Height * pad_Width];
78116 get_Pad (pad_Height, pad_Width, row, col, A_pad, A);
79-
117+ // 定义被卷积矩阵宽高
118+ const int convAw = KernelHeight * KernelWidth;
119+ const int convAh = OutHeight * OutWidth;
120+ // 转换被卷积矩阵
121+ float *A_convert = new float [convAh * convAw];
122+ convert_A (A_convert, OutHeight, OutWidth, pad_Height, pad_Width, A_pad);
123+ // 定义卷积输出矩阵
124+ float *C = new float [convAh * 1 ];
125+ // sgemm算法计算输出矩阵
126+ Matrixmul_blas (convAh, convAw, A_convert, B, C);
127+ // 输出验证
128+ Mat dst (OutHeight, OutWidth, CV_32FC1);
129+ for (int i = 0 ; i < OutHeight; i++) {
130+ for (int j = 0 ; j < OutWidth; j++) {
131+ dst.at <float >(i, j) = C[i * OutHeight + j];
132+ }
133+ }
134+ cv::imshow (" result" , dst);
135+ cv::waitKey (0 );
136+ return 0 ;
80137}
0 commit comments