Skip to content

Commit

Permalink
change CMakelist for FPGA track
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyang committed Oct 20, 2018
1 parent 5c28936 commit c4cce98
Show file tree
Hide file tree
Showing 19 changed files with 108 additions and 208 deletions.
16 changes: 11 additions & 5 deletions CMakeLists.txt
Expand Up @@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.0)
project(paddle-mobile)

# select the platform to build
option(CPU "armv7 with neon support" ON)
option(CPU "armv7 with neon support" OFF)
option(MALI_GPU "mali gpu support" OFF)
option(FPGA "fpga support" OFF)
option(FPGA "fpga support" ON)

option(USE_OPENMP "openmp support" OFF)
option(DEBUGING "enable debug mode" ON)
Expand All @@ -29,7 +29,10 @@ if(DEBUGING)
message(STATUS "debugging mode")
add_definitions(-DPADDLE_MOBILE_DEBUG)
else()
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
if(FPGA)
else()
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif()
endif()

if(USE_EXCEPTION)
Expand Down Expand Up @@ -93,8 +96,7 @@ else()
endif()

if(FPGA)
set(DEBUGING ON)
add_definitions(-DPADDLE_MOBILE_DEBUG)
message("FPGA mode enabled")
add_definitions(-DPADDLE_MOBILE_FPGA)
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
Expand Down Expand Up @@ -177,6 +179,10 @@ if(DEBUGING)
else()
add_subdirectory(test)
endif()
elseif(FPGA)
add_subdirectory(test)
endif()




2 changes: 1 addition & 1 deletion src/common/variant.h
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include <cstdlib>

#include <cstring>
#include "common/enforce.h"
#include "common/log.h"

Expand Down
16 changes: 8 additions & 8 deletions src/fpga/api.cpp
Expand Up @@ -22,7 +22,7 @@ limitations under the License. */
#include "fpga/filter.h"
#include "fpga/image.h"
#define FPGA_TEST_MODE
//#define PADDLE_MOBILE_OS_LINUX
// #define PADDLE_MOBILE_OS_LINUX

namespace paddle_mobile {
namespace fpga {
Expand Down Expand Up @@ -149,7 +149,7 @@ int ComputeBasicConv(const struct ConvArgs &args) {
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}

int ComputeFpgaConv(const struct WrapperConvArgs &args) {
int ComputeFpgaConv(const struct SplitConvArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFPGAConv===========";
DLOG << " filter_num:" << args.filter_num
Expand Down Expand Up @@ -194,8 +194,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(short(args.const0))
<< " const1:" << fp16_2_fp32(short(args.const1));
<< " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address
<< " image0_channels:" << args.image0.channels
Expand Down Expand Up @@ -383,10 +383,10 @@ void format_concat_output(framework::Tensor *out, int height, int width,
out->reset_data_ptr(data_ptr);
}

void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, int stride_w,
int padding_h, int padding_w, float *bs_ptr) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>();
Expand Down
44 changes: 14 additions & 30 deletions src/fpga/api.h
Expand Up @@ -89,7 +89,7 @@ struct ConcatArgs {
uint32_t width;
};

struct WrapperConvArgs {
struct SplitConvArgs {
uint32_t split_num;
uint32_t group_num;
uint32_t filter_num;
Expand All @@ -98,6 +98,14 @@ struct WrapperConvArgs {
struct ConcatArgs concat_arg;
};

struct GroupConvArgs {
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct SplitConvArgs* conv_args;
struct ConcatArgs concat_arg;
};

struct PoolingArgs {
int16_t mode; // mode: 0:max, 1:avg
half kernel_reciprocal;
Expand Down Expand Up @@ -159,30 +167,6 @@ struct MemoryCacheArgs {
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)

enum FPGA_ERR_TYPE {
ERR_IOCTL_CMD = -1,
ERR_TIMEOUT = -2,
ERR_COMPLETION_TIMEOUT = -3,
ERR_INVALID_FPGA_ADDR = -4,
ERR_NOMEM = -5,
ERR_NO_RESERVE_MEM = -6,
ERR_COPY_FROM_USER = -7,
ERR_COPY_TO_USER = -8,
ERR_DEL_TIMER = -9,
ERR_ENABLE_MSI = -10,
ERR_REGISTER_IRQ = -11,
ERR_PCIE_REGISTER = -12,
ERR_PCIE_PROBE = -13,
ERR_REGISTER_BLOCK = -14,
ERR_ALLOC_GENDISK = -15,
ERR_INIT_QUEUE = -16,
ERR_WAIT = -17,
ERR_ECC_ERROR = -31,
ERR_FPGA_FAIL_STOP = -64,
ERR_FPGA_DEBUG_STOP = -113,
DEV_TMP_UNAVAILABLE = -128
};

//============================== API =============================

int open_device();
Expand All @@ -195,7 +179,7 @@ int fpga_flush(void* address, size_t size);
int fpga_invalidate(void* address, size_t size);

int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct WrapperConvArgs& args);
int ComputeFpgaConv(const struct SplitConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args);
int ComputeFPGAConcat(const struct ConcatArgs& args);
Expand All @@ -220,10 +204,10 @@ void format_bias_scale_array(float** bias_scale_array,
void format_concat_output(framework::Tensor* out, int height, int width,
int image_num, uint32_t* channel_num);

void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, int stride_w,
int padding_h, int padding_w, float* bs_ptr);
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);

half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);
Expand Down
5 changes: 4 additions & 1 deletion src/fpga/filter.cpp
Expand Up @@ -21,7 +21,10 @@ namespace paddle_mobile {
namespace fpga {
namespace filter {

int calc_division_capacity(int chw) { return 2048 / ((chw + 15) / 16) * 32; }
int calc_division_capacity(int chw) {
int n = 2048 / ((chw + 15) / 16) * 32;
return n < 2048 ? n : 2048;
}

int calc_split_num(int num, int division_capacity) {
return (num + division_capacity - 1) / division_capacity;
Expand Down
9 changes: 5 additions & 4 deletions src/operators/kernel/fpga/conv_add_bn_kernel.cpp
Expand Up @@ -66,10 +66,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);

return true;
Expand Down
9 changes: 5 additions & 4 deletions src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
Expand Up @@ -65,10 +65,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(

fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
9 changes: 5 additions & 4 deletions src/operators/kernel/fpga/conv_add_relu_kernel.cpp
Expand Up @@ -47,10 +47,11 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {

fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
9 changes: 5 additions & 4 deletions src/operators/kernel/fpga/conv_bn_kernel.cpp
Expand Up @@ -59,10 +59,11 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {

fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
9 changes: 5 additions & 4 deletions src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
Expand Up @@ -59,10 +59,11 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {

fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
6 changes: 3 additions & 3 deletions src/operators/kernel/fpga/fc_relu_kernel.cpp
Expand Up @@ -53,9 +53,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
6 changes: 3 additions & 3 deletions src/operators/kernel/fpga/fusion_fc_kernel.cpp
Expand Up @@ -54,9 +54,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);

fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
Expand Down
71 changes: 0 additions & 71 deletions src/operators/kernel/fpga/mul_kernel.cpp

This file was deleted.

4 changes: 2 additions & 2 deletions src/operators/math/gemm_int8.cpp
Expand Up @@ -652,7 +652,7 @@ void Gemm::WriteBasic(int32_t mc, int32_t nc, int32_t *c, int32_t *C,
int32_t *C0, *c0;
c_ptr = c;
C_ptr = C;
if (nc1 > 0) {
/*if (nc1 > 0) {
asm volatile(
"subs %[mc], %[mc], #1 \n\t"
"blt end_mc_%= \n\t"
Expand Down Expand Up @@ -684,7 +684,7 @@ void Gemm::WriteBasic(int32_t mc, int32_t nc, int32_t *c, int32_t *C,
: [C_ptr] "r"(C_ptr), [c_ptr] "r"(c_ptr), [mc] "r"(m), [nc1] "r"(nc1),
[step] "r"(step), [step1] "r"(step1)
: "memory", "r5", "r6", "q0", "q1", "q2", "q3");
}
}*/

if (_nc1 != 0) {
for (int32_t i = 0; i < mc; i++) {
Expand Down

0 comments on commit c4cce98

Please sign in to comment.