Skip to content

Commit

Permalink
Merge pull request Tencent#1 from Tencent/master
Browse files Browse the repository at this point in the history
update forks
  • Loading branch information
teslawho committed Sep 17, 2020
2 parents e69e85e + c286af0 commit 614fc4b
Show file tree
Hide file tree
Showing 46 changed files with 852 additions and 167 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/android-arm-cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: android-arm-cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
android:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: configure
run: sudo apt-get install attr
- name: build
run: export ANDROID_NDK=$ANDROID_HOME/ndk-bundle && ./scripts/build_android.sh
15 changes: 15 additions & 0 deletions .github/workflows/ios-cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: ios-cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
ios-iphone-os:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: build
run: ./scripts/build_ios.sh
19 changes: 19 additions & 0 deletions .github/workflows/linux-x86-cpu-gcc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: linux-x86-cpu-gcc
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
linux-gcc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: update
run: sudo apt-get update
- name: gcc-multilib
run: sudo apt-get install gcc-multilib g++-multilib
- name: build
run: ./scripts/build_linux.sh
17 changes: 17 additions & 0 deletions .github/workflows/macos-x64-cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: macos-x64-cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
macos-clang:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: protobuf
run: brew install protobuf opencv3
- name: build
run: ./scripts/build_macos.sh
43 changes: 0 additions & 43 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,6 @@ addons:

matrix:
include:
- name: "Linux | build"
os: linux
language: cpp
compiler: gcc
script: ./scripts/build_linux.sh

- name: "MacOS | build"
os: osx
language: cpp
osx_image: xcode11.2
compiler: clang
script: ./scripts/build_macos.sh

- name: "IOS | build"
os: osx
language: cpp
osx_image: xcode11.2
compiler: clang
script: ./scripts/build_ios.sh

- name: "Linux | Android | build"
os: linux
language: android
compiler: clang
android:
components:
- tools
- build-tools
- platform-tools
- android-21
licenses:
- 'android-sdk-preview-license-.+'
- 'android-sdk-license-.+'
- 'google-gdk-license-.+'
before_script:
- sudo apt-get remove cmake
- echo yes | sdkmanager "ndk-bundle"
- echo yes | sdkmanager "cmake;3.10.2.4988404"
- export ANDROID_NDK=$ANDROID_HOME/ndk-bundle
- export PATH=/usr/local/android-sdk/cmake/3.10.2.4988404/bin/:$PATH
script:
- ./scripts/build_android.sh

- name: "Linux | Arm64 | build"
os: linux
arch: arm64
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ COPY . $TNN_ROOT/
#RUN cd $TOOLS_ROOT/onnx2tnn/onnx-converter && ./build.sh
RUN cd $TOOLS_ROOT/convert2tnn && bash ./build.sh


RUN python3 $TOOLS_ROOT/convert2tnn/converter.py -h

WORKDIR $TOOLS_ROOT/convert2tnn/
31 changes: 29 additions & 2 deletions benchmark/benchmark_android/benchmark_models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function usage() {
echo " -b build targets only"
echo " -f build profiling targets "
echo " -d run with specified device"
echo " -t CPU/GPU specify the platform to run"
echo " -t CPU/GPU/HUAWEI_NPU specify the platform to run"
}

function exit_with_msg() {
Expand All @@ -56,6 +56,20 @@ function build_android_bench() {
if [ "-c" == "$CLEAN" ]; then
clean_build $BUILD_DIR
fi
if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ]; then
echo "NPU Enable"
# set c++ shared
STL="c++_shared"
HUAWEI_NPU_ENABLE="ON"
#start to cp
if [ ! -d ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/ ]; then
mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/
fi
mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a
mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a
cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a/libc++_shared.so ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a/
cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a/
fi
mkdir -p build
cd $BUILD_DIR
cmake ../../.. \
Expand All @@ -67,6 +81,7 @@ function build_android_bench() {
-DANDROID_TOOLCHAIN=clang \
-DTNN_ARM_ENABLE:BOOL=ON \
-DTNN_OPENCL_ENABLE:BOOL=ON \
-DTNN_HUAWEI_NPU_ENABLE:BOOL=${HUAWEI_NPU_ENABLE} \
-DTNN_OPENMP_ENABLE:BOOL=ON \
-DTNN_TEST_ENABLE:BOOL=ON \
-DTNN_BENCHMARK_MODE:BOOL=ON \
Expand Down Expand Up @@ -105,7 +120,7 @@ function bench_android() {
benchmark_model_list=`ls *.tnnproto`
fi

if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then
if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ] && [ "$DEVICE_TYPE" != "HUAWEI_NPU" ]; then
DEVICE_TYPE=""
fi

Expand Down Expand Up @@ -133,6 +148,18 @@ function bench_android() {
done
fi

if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ];then
echo "Run Huawei Npu"
device=HUAWEI_NPU
$ADB push ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/${ABI}/* $ANDROID_DIR/
$ADB push ${WORK_DIR}/../../third_party/huawei_npu/hiai_ddk_latest/${ABI}/* $ANDROID_DIR/
$ADB shell "echo '\nbenchmark device: ${device} \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE"
for benchmark_model in ${benchmark_model_list[*]}
do
$ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./TNNTest -th ${THREAD_NUM} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -nt ${device} -mt ${MODEL_TYPE} -mp ${ANDROID_DATA_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE"
done
fi

$ADB shell "echo '' >> $ANDROID_DIR/$OUTPUT_LOG_FILE"
$ADB shell "date >> $ANDROID_DIR/$OUTPUT_LOG_FILE"

Expand Down
5 changes: 3 additions & 2 deletions doc/cn/development/profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,9 @@ cp mobilenet_v1.tnnproto .
-c 删除之前的编译文件,重新编译
-b 仅编译,不执行
-f 打印每一层的耗时,否则是整个网络的平均耗时。
-t 指定执行的平台。需要加上<CPU/GPU>
-t 指定执行的平台。需要加上<CPU/GPU/HUAWEI_NPU>
```
P.S. 不指定 -t, 默认跑CPU和GPU, 华为npu benchmark需通过-t HUAWEI_NPU特殊制定.
#### 4.1 全网络性能分析:
分析整体网络耗时,执行多次,获取平均性能。
执行脚本:
Expand All @@ -117,7 +118,7 @@ cp mobilenet_v1.tnnproto .
<div align=left ><img src="https://gitee.com/darren3d/tnn-resource/raw/master/doc/cn/development/resource/opencl_profiling.jpg" width = "75%" height = "75%"/>

执行结果会保存在`benchmark_models_result.txt`中。

P.S. 华为npu不支持每层分析。

### 5. 特殊说明
* 对于OpenCL平台,逐层性能分析的目的是分析kernel的耗时分布,其中为了打印每层耗时,有额外开销,只有kernel时间具有参考意义。如果要看整体实际性能,需要参考全网络性能分析。
47 changes: 39 additions & 8 deletions doc/cn/user/convert.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

<div align=left ><img src="https://gitee.com/darren3d/tnn-resource/raw/master/doc/cn/user/resource/convert.png"/>

目前 TNN 支持业界主流的模型文件格式,包括ONNX、PyTorch、TensorFlow 以及 Caffe 等。如上图所示,TNN 将 ONNX 作为中间层,借助于ONNX 开源社区的力量,来支持多种模型文件格式。如果要将PyTorch、TensorFlow 以及 Caffe 等模型文件格式转换为 TNN,首先需要使用对应的模型转换工具,统一将各种模型格式转换成为 ONNX 模型格式,然后将 ONNX 模型转换成 TNN 模型。
目前 TNN 支持业界主流的模型文件格式,包括ONNX、PyTorch、TensorFlow、TesorFlow-Lite 以及 Caffe 等。如上图所示,TNN 将 ONNX 作为中间层,借助于ONNX 开源社区的力量,来支持多种模型文件格式。如果要将PyTorch、TensorFlow 以及 Caffe 等模型文件格式转换为 TNN,首先需要使用对应的模型转换工具,统一将各种模型格式转换成为 ONNX 模型格式,然后将 ONNX 模型转换成 TNN 模型。

| 原始模型 | 转换工具 | 目标模型 |
|------------|-----------------|----------|
| PyTorch | pytorch export | ONNX |
| TensorFlow | tensorflow-onnx | ONNX |
| Caffe | caffe2onnx | ONNX |
| ONNX | onnx2tnn | TNN |

| TensorFlow-Lite | tflite2tnn | TNN |
目前 TNN 目前仅支持 CNN 等常用网络结构,RNN、GAN等网络结构正在逐步开发中。

# TNN 模型转换工具
Expand Down Expand Up @@ -91,6 +91,7 @@ positional arguments:
onnx2tnn convert onnx model to tnn model
caffe2tnn convert caffe model to tnn model
tf2tnn convert tensorflow model to tnn model
tflite2tnn convert tensorflow-lite model to tnn model
optional arguments:
-h, --help show this help message and exit
Expand Down Expand Up @@ -187,6 +188,15 @@ docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter
-align \
-input_file /workspace/in.txt \
-ref_file /workspace/ref.txt

# convert tflite
docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py tflite2tnn \
/workspace/mobilenet_v1_1.0_224.tflite \
-v v1.0 \
-align \
-input_file /workspace/in.txt \
-ref_file /workspace/ref.txt


```

Expand Down Expand Up @@ -446,14 +456,34 @@ optional arguments:
the reference file path which contains the reference
data to compare the results.
```
- tensorflow-lite2tnn

当前 tensorflow-lite2tnn 的转换支持tflite格式文件,从而方便移动端部署。

``` shell script
python3 converter.py tflite2tnn -h
```
usage 信息如下:
```
usage: convert tflite2tnn [-h] TF_PATH [-o OUTPUT_DIR] [-v v1.0] [-align]
optional arguments:
-h, --help show this help message and exit
TF_PATH the path for tensorflow-lite graphdef file
-o OUTPUT_DIR the output tnn directory
-v v1.0 the version for model
-align align the onnx model with tnn model
-input_file INPUT_FILE_PATH
the input file path which contains the input data for
the inference model.
-ref_file REFER_FILE_PATH
the reference file path which contains the reference
data to compare the results.
```
示例:
```shell script
python3 converter.py tf2tnn \
-tp ~/tf-model/test.pb \
-in "input0[1,32,32,3];input1[1,32,32,3]" \
-on output0 \
-v v2.0 \
-optimize \
python3 converter.py tflite2tnn \
~/tf-model/test.tflite \
-o ~/tf-model/ \
-align \
-input_file in.txt \
Expand Down Expand Up @@ -556,4 +586,5 @@ convert2tnn 只是对多种模型转换的工具的封装,根据第一部分
- [pytorch2tnn](onnx2tnn.md)
- [tf2tnn](tf2tnn.md)
- [caffe2tnn](caffe2tnn.md)
- [tflite2tnn](tflite2tnn.md)

62 changes: 62 additions & 0 deletions doc/cn/user/support_tflite_mode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@


# TFLite 算子的支持


| tflite operator | tnn operatpr | support |
|-------------------------+----------------------+---------|
| Add | Add | yes |
| Average_Pool_2d | Pooling | yes |
| Concatenation | Concat | yes |
| Conv_2d | Convolution | yes |
| Cos | Cos | yes |
| Depthwise_Conv_2d | Convolution | yes |
| Detetion_Post_Process | DetectionPostProcess | yes |
| Div | Div | yes |
| Exp | Exp | yes |
| Full_Connected | InnerProduct | yes |
| LeakyRelu | Prelu | yes |
| Log | Logistic | yes |
| Logistic | Sigmoid | yes |
| Max_Pool_2d | Pooling | yes |
| Maximum | Maximum | yes |
| Mean | ReduceMean | yes |
| Minimum | Minimum | yes |
| Mul | Mul | yes |
| Neg | Neg | yes |
| Pad | Pad | yes |
| Padv2 | Pad | yes |
| Prelu | Prelu | yes |
| Reshape | Reshape | yes |
| Resize_Biliner | Upsample | yes |
| Resize_Nearest_Neighbor | Upsample | yes |
| Sin | Sin | yes |
| Softmax | Softmax | yes |
| Split | SplitV | yes |
| SplitV | SplitV | yes |
| Squeeze | Squeeze | yes |
| StridedSlice | StridedSlice | yes |
| Sub | Sub | yes |
| Tanh | Tanh | yes |
| Transpose_Conv | Deconvolution | yes |


# TFLite 模型的支持


| tflite model | support align |
|----------------------------------------+---------------|
| alexnet | yes |
| densenet_2018_04_27 | yes |
| face_landmark(media pipe) | yes |
| inception_v3_2018_04_27 | yes |
| inception_v4_2018_04_27 | yes |
| mobiletnet_v1_1.0_224 | yes |
| mobiletnet_v2_1.0_224 | yes |
| object_detection_3d(shoes, media pipe) | yes |
| resnet_v2_101_229 | yes |
| squeezenet_2018_04_26 | yes |
| ssd | yes |
| vgg16 | yes |
| yolo_tiny | yes |
| yolov2_tiny | yes |
Loading

0 comments on commit 614fc4b

Please sign in to comment.