Merge pull request Tencent#1 from Tencent/master

update forks
1627180283 · Sep 17, 2020 · 614fc4b · 614fc4b
2 parents e69e85e + c286af0
commit 614fc4b
Show file tree

Hide file tree

Showing 46 changed files with 852 additions and 167 deletions.
diff --git a/.github/workflows/android-arm-cpu.yml b/.github/workflows/android-arm-cpu.yml
@@ -0,0 +1,17 @@
+name: android-arm-cpu
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  android:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: configure
+      run: sudo apt-get install attr
+    - name: build
+      run: export ANDROID_NDK=$ANDROID_HOME/ndk-bundle && ./scripts/build_android.sh
diff --git a/.github/workflows/ios-cpu.yml b/.github/workflows/ios-cpu.yml
@@ -0,0 +1,15 @@
+name: ios-cpu
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  ios-iphone-os:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: build
+      run: ./scripts/build_ios.sh
diff --git a/.github/workflows/linux-x86-cpu-gcc.yml b/.github/workflows/linux-x86-cpu-gcc.yml
@@ -0,0 +1,19 @@
+name: linux-x86-cpu-gcc
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  linux-gcc:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: update
+      run: sudo apt-get update
+    - name: gcc-multilib
+      run: sudo apt-get install gcc-multilib g++-multilib
+    - name: build
+      run: ./scripts/build_linux.sh
diff --git a/.github/workflows/macos-x64-cpu.yml b/.github/workflows/macos-x64-cpu.yml
@@ -0,0 +1,17 @@
+name: macos-x64-cpu
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  macos-clang:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: protobuf
+      run: brew install protobuf opencv3
+    - name: build
+      run: ./scripts/build_macos.sh
diff --git a/.travis.yml b/.travis.yml
@@ -11,49 +11,6 @@ addons:
 
 matrix:
   include:
-    - name: "Linux | build"
-      os: linux
-      language: cpp
-      compiler: gcc
-      script: ./scripts/build_linux.sh
-
-    - name: "MacOS | build"
-      os: osx
-      language: cpp
-      osx_image: xcode11.2
-      compiler: clang
-      script: ./scripts/build_macos.sh
-
-    - name: "IOS | build"
-      os: osx
-      language: cpp
-      osx_image: xcode11.2
-      compiler: clang
-      script: ./scripts/build_ios.sh
-
-    - name: "Linux | Android | build"
-      os: linux
-      language: android
-      compiler: clang
-      android:
-        components:
-          - tools
-          - build-tools
-          - platform-tools
-          - android-21
-        licenses:
-          - 'android-sdk-preview-license-.+'
-          - 'android-sdk-license-.+'
-          - 'google-gdk-license-.+'
-      before_script:
-        - sudo apt-get remove cmake
-        - echo yes | sdkmanager "ndk-bundle"
-        - echo yes | sdkmanager "cmake;3.10.2.4988404"
-        - export ANDROID_NDK=$ANDROID_HOME/ndk-bundle
-        - export PATH=/usr/local/android-sdk/cmake/3.10.2.4988404/bin/:$PATH
-      script:
-        - ./scripts/build_android.sh
-
     - name: "Linux | Arm64 | build"
       os: linux
       arch: arm64

diff --git a/Dockerfile b/Dockerfile
@@ -33,6 +33,7 @@ COPY . $TNN_ROOT/
 #RUN cd $TOOLS_ROOT/onnx2tnn/onnx-converter && ./build.sh
 RUN cd $TOOLS_ROOT/convert2tnn && bash ./build.sh
 
+
 RUN python3 $TOOLS_ROOT/convert2tnn/converter.py -h
 
 WORKDIR $TOOLS_ROOT/convert2tnn/
diff --git a/benchmark/benchmark_android/benchmark_models.sh b/benchmark/benchmark_android/benchmark_models.sh
@@ -35,7 +35,7 @@ function usage() {
     echo "        -b    build targets only"
     echo "        -f    build profiling targets "
     echo "        -d    run with specified device"
-    echo "        -t    CPU/GPU specify the platform to run"
+    echo "        -t    CPU/GPU/HUAWEI_NPU specify the platform to run"
 }
 
 function exit_with_msg() {
@@ -56,6 +56,20 @@ function build_android_bench() {
     if [ "-c" == "$CLEAN" ]; then
         clean_build $BUILD_DIR
     fi
+    if [ "$DEVICE_TYPE" = "HUAWEI_NPU"  ]; then
+      echo "NPU Enable"
+      # set c++ shared
+      STL="c++_shared"
+      HUAWEI_NPU_ENABLE="ON"
+      #start to cp
+      if [ ! -d ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/ ]; then
+           mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/
+      fi
+      mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a
+      mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a
+      cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a/libc++_shared.so  ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a/
+      cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a/
+    fi
     mkdir -p build
     cd $BUILD_DIR
     cmake ../../.. \
@@ -67,6 +81,7 @@ function build_android_bench() {
           -DANDROID_TOOLCHAIN=clang \
           -DTNN_ARM_ENABLE:BOOL=ON \
           -DTNN_OPENCL_ENABLE:BOOL=ON \
+          -DTNN_HUAWEI_NPU_ENABLE:BOOL=${HUAWEI_NPU_ENABLE} \
           -DTNN_OPENMP_ENABLE:BOOL=ON \
           -DTNN_TEST_ENABLE:BOOL=ON \
           -DTNN_BENCHMARK_MODE:BOOL=ON \
@@ -105,7 +120,7 @@ function bench_android() {
         benchmark_model_list=`ls *.tnnproto`
     fi
 
-    if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then
+    if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ] && [ "$DEVICE_TYPE" != "HUAWEI_NPU" ]; then
         DEVICE_TYPE=""
     fi
 
@@ -133,6 +148,18 @@ function bench_android() {
         done
     fi
 
+    if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ];then
+        echo "Run Huawei Npu"
+        device=HUAWEI_NPU
+	    $ADB push ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/${ABI}/* $ANDROID_DIR/
+        $ADB push ${WORK_DIR}/../../third_party/huawei_npu/hiai_ddk_latest/${ABI}/* $ANDROID_DIR/
+        $ADB shell "echo '\nbenchmark device: ${device} \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE"
+        for benchmark_model in ${benchmark_model_list[*]}
+        do
+            $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./TNNTest -th ${THREAD_NUM} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -nt ${device} -mt ${MODEL_TYPE} -mp ${ANDROID_DATA_DIR}/${benchmark_model}  >> $OUTPUT_LOG_FILE"
+        done
+    fi
+
     $ADB shell "echo '' >> $ANDROID_DIR/$OUTPUT_LOG_FILE"
     $ADB shell "date  >> $ANDROID_DIR/$OUTPUT_LOG_FILE"
 

diff --git a/doc/cn/development/profiling.md b/doc/cn/development/profiling.md
@@ -93,8 +93,9 @@ cp mobilenet_v1.tnnproto .
     -c    删除之前的编译文件，重新编译
     -b    仅编译，不执行
     -f    打印每一层的耗时，否则是整个网络的平均耗时。
-    -t    指定执行的平台。需要加上<CPU/GPU>
+    -t    指定执行的平台。需要加上<CPU/GPU/HUAWEI_NPU>
 ```
+P.S. 不指定 -t, 默认跑CPU和GPU, 华为npu benchmark需通过-t HUAWEI_NPU特殊制定.
 #### 4.1 全网络性能分析：
 分析整体网络耗时，执行多次，获取平均性能。  
 执行脚本：
@@ -117,7 +118,7 @@ cp mobilenet_v1.tnnproto .
 <div align=left ><img src="https://gitee.com/darren3d/tnn-resource/raw/master/doc/cn/development/resource/opencl_profiling.jpg" width = "75%" height = "75%"/>
 
 执行结果会保存在`benchmark_models_result.txt`中。  
-
+P.S. 华为npu不支持每层分析。
 
 ### 5. 特殊说明
 * 对于OpenCL平台，逐层性能分析的目的是分析kernel的耗时分布，其中为了打印每层耗时，有额外开销，只有kernel时间具有参考意义。如果要看整体实际性能，需要参考全网络性能分析。
diff --git a/doc/cn/user/convert.md b/doc/cn/user/convert.md
@@ -4,15 +4,15 @@
 
 <div align=left ><img src="https://gitee.com/darren3d/tnn-resource/raw/master/doc/cn/user/resource/convert.png"/>
 
-目前 TNN 支持业界主流的模型文件格式，包括ONNX、PyTorch、TensorFlow 以及 Caffe 等。如上图所示，TNN 将 ONNX 作为中间层，借助于ONNX 开源社区的力量，来支持多种模型文件格式。如果要将PyTorch、TensorFlow 以及 Caffe 等模型文件格式转换为 TNN，首先需要使用对应的模型转换工具，统一将各种模型格式转换成为 ONNX 模型格式，然后将 ONNX 模型转换成 TNN 模型。  
+目前 TNN 支持业界主流的模型文件格式，包括ONNX、PyTorch、TensorFlow、TesorFlow-Lite 以及 Caffe 等。如上图所示，TNN 将 ONNX 作为中间层，借助于ONNX 开源社区的力量，来支持多种模型文件格式。如果要将PyTorch、TensorFlow 以及 Caffe 等模型文件格式转换为 TNN，首先需要使用对应的模型转换工具，统一将各种模型格式转换成为 ONNX 模型格式，然后将 ONNX 模型转换成 TNN 模型。  
 
 | 原始模型   | 转换工具        | 目标模型 |
 |------------|-----------------|----------|
 | PyTorch    | pytorch export  | ONNX     |
 | TensorFlow | tensorflow-onnx | ONNX     |
 | Caffe      | caffe2onnx      | ONNX     |
 | ONNX       | onnx2tnn        | TNN      |
-
+| TensorFlow-Lite     | tflite2tnn      | TNN      |
 目前 TNN 目前仅支持 CNN 等常用网络结构，RNN、GAN等网络结构正在逐步开发中。
 
 # TNN 模型转换工具
@@ -91,6 +91,7 @@ positional arguments:
     onnx2tnn            convert onnx model to tnn model
     caffe2tnn           convert caffe model to tnn model
     tf2tnn              convert tensorflow model to tnn model
+    tflite2tnn          convert tensorflow-lite model to tnn model
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -187,6 +188,15 @@ docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter
     -align  \
     -input_file /workspace/in.txt \
     -ref_file /workspace/ref.txt
+
+# convert tflite
+docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py tflite2tnn \
+    /workspace/mobilenet_v1_1.0_224.tflite \
+    -v v1.0 \
+    -align  \
+    -input_file /workspace/in.txt \
+    -ref_file /workspace/ref.txt
+
 
 ```
 
@@ -446,14 +456,34 @@ optional arguments:
                         the reference file path which contains the reference
                         data to compare the results.
 ```
+- tensorflow-lite2tnn
+
+当前 tensorflow-lite2tnn 的转换支持tflite格式文件，从而方便移动端部署。
+
+``` shell script
+python3 converter.py tflite2tnn -h
+```
+usage 信息如下：
+```
+usage: convert tflite2tnn [-h] TF_PATH [-o OUTPUT_DIR] [-v v1.0] [-align]
+
+optional arguments:
+  -h, --help            show this help message and exit
+   TF_PATH           the path for tensorflow-lite graphdef file
+  -o OUTPUT_DIR         the output tnn directory
+  -v v1.0               the version for model
+  -align                align the onnx model with tnn model
+  -input_file INPUT_FILE_PATH
+                        the input file path which contains the input data for
+                        the inference model.
+  -ref_file REFER_FILE_PATH
+                        the reference file path which contains the reference
+                        data to compare the results.
+```
 示例：
 ```shell script
-python3 converter.py tf2tnn \
-    -tp ~/tf-model/test.pb \
-    -in "input0[1,32,32,3];input1[1,32,32,3]" \
-    -on output0 \
-    -v v2.0 \
-    -optimize \
+python3 converter.py tflite2tnn \
+    ~/tf-model/test.tflite \
     -o ~/tf-model/ \
     -align \
     -input_file in.txt \
@@ -556,4 +586,5 @@ convert2tnn 只是对多种模型转换的工具的封装，根据第一部分
 - [pytorch2tnn](onnx2tnn.md)
 - [tf2tnn](tf2tnn.md)
 - [caffe2tnn](caffe2tnn.md)
+- [tflite2tnn](tflite2tnn.md)
 
diff --git a/doc/cn/user/support_tflite_mode.md b/doc/cn/user/support_tflite_mode.md
@@ -0,0 +1,62 @@
+
+
+# TFLite 算子的支持
+
+
+| tflite operator         | tnn operatpr         | support |
+|-------------------------+----------------------+---------|
+| Add                     | Add                  | yes     |
+| Average_Pool_2d         | Pooling              | yes     |
+| Concatenation           | Concat               | yes     |
+| Conv_2d                 | Convolution          | yes     |
+| Cos                     | Cos                  | yes     |
+| Depthwise_Conv_2d       | Convolution          | yes     |
+| Detetion_Post_Process   | DetectionPostProcess | yes     |
+| Div                     | Div                  | yes     |
+| Exp                     | Exp                  | yes     |
+| Full_Connected          | InnerProduct         | yes     |
+| LeakyRelu               | Prelu                | yes     |
+| Log                     | Logistic             | yes     |
+| Logistic                | Sigmoid              | yes     |
+| Max_Pool_2d             | Pooling              | yes     |
+| Maximum                 | Maximum              | yes     |
+| Mean                    | ReduceMean           | yes     |
+| Minimum                 | Minimum              | yes     |
+| Mul                     | Mul                  | yes     |
+| Neg                     | Neg                  | yes     |
+| Pad                     | Pad                  | yes     |
+| Padv2                   | Pad                  | yes     |
+| Prelu                   | Prelu                | yes     |
+| Reshape                 | Reshape              | yes     |
+| Resize_Biliner          | Upsample             | yes     |
+| Resize_Nearest_Neighbor | Upsample             | yes     |
+| Sin                     | Sin                  | yes     |
+| Softmax                 | Softmax              | yes     |
+| Split                   | SplitV               | yes     |
+| SplitV                  | SplitV               | yes     |
+| Squeeze                 | Squeeze              | yes     |
+| StridedSlice            | StridedSlice         | yes     |
+| Sub                     | Sub                  | yes     |
+| Tanh                    | Tanh                 | yes     |
+| Transpose_Conv          | Deconvolution        | yes     |
+
+
+# TFLite 模型的支持
+
+
+| tflite model                           | support align |
+|----------------------------------------+---------------|
+| alexnet                                | yes           |
+| densenet_2018_04_27                    | yes           |
+| face_landmark(media pipe)              | yes           |
+| inception_v3_2018_04_27                | yes           |
+| inception_v4_2018_04_27                | yes           |
+| mobiletnet_v1_1.0_224                  | yes           |
+| mobiletnet_v2_1.0_224                  | yes           |
+| object_detection_3d(shoes, media pipe) | yes           |
+| resnet_v2_101_229                      | yes           |
+| squeezenet_2018_04_26                  | yes           |
+| ssd                                    | yes           |
+| vgg16                                  | yes           |
+| yolo_tiny                              | yes           |
+| yolov2_tiny                            | yes           |