diff --git a/.clang-format b/.clang-format index c91ec19fb1e..4ea70510d26 100644 --- a/.clang-format +++ b/.clang-format @@ -1,180 +1,180 @@ -# --- -# Language: Cpp -# # BasedOnStyle: LLVM -# AccessModifierOffset: -1 -# AlignAfterOpenBracket: Align -# AlignArrayOfStructures: None -# AlignConsecutiveMacros: None -# AlignConsecutiveAssignments: None -# AlignConsecutiveBitFields: None -# AlignConsecutiveDeclarations: None -# AlignEscapedNewlines: Right -# AlignOperands: Align -# AlignTrailingComments: true -# AllowAllArgumentsOnNextLine: true -# AllowAllConstructorInitializersOnNextLine: true -# AllowAllParametersOfDeclarationOnNextLine: true -# AllowShortEnumsOnASingleLine: true -# AllowShortBlocksOnASingleLine: Never -# AllowShortCaseLabelsOnASingleLine: false -# AllowShortFunctionsOnASingleLine: All -# AllowShortLambdasOnASingleLine: All -# AllowShortIfStatementsOnASingleLine: Never -# AllowShortLoopsOnASingleLine: false -# AlwaysBreakAfterDefinitionReturnType: None -# AlwaysBreakAfterReturnType: None -# AlwaysBreakBeforeMultilineStrings: false -# AlwaysBreakTemplateDeclarations: MultiLine -# AttributeMacros: -# - __capability -# BinPackArguments: true -# BinPackParameters: true -# BraceWrapping: -# AfterCaseLabel: false -# AfterClass: false -# AfterControlStatement: Never -# AfterEnum: false -# AfterFunction: false -# AfterNamespace: false -# AfterObjCDeclaration: false -# AfterStruct: false -# AfterUnion: false -# AfterExternBlock: false -# BeforeCatch: false -# BeforeElse: false -# BeforeLambdaBody: false -# BeforeWhile: false -# IndentBraces: false -# SplitEmptyFunction: true -# SplitEmptyRecord: true -# SplitEmptyNamespace: true -# BreakBeforeBinaryOperators: None -# BreakBeforeConceptDeclarations: true -# BreakBeforeBraces: Attach -# BreakBeforeInheritanceComma: false -# BreakInheritanceList: BeforeColon -# BreakBeforeTernaryOperators: true -# BreakConstructorInitializersBeforeComma: false -# BreakConstructorInitializers: BeforeColon -# BreakAfterJavaFieldAnnotations: false -# BreakStringLiterals: true -# ColumnLimit: 80 -# # CommentPragmas: '^ IWYU pragma:' -# # CommentPragmas: '^[^ ]' -# CommentPragmas: '^\\.+' -# CompactNamespaces: false -# ConstructorInitializerAllOnOneLineOrOnePerLine: false -# ConstructorInitializerIndentWidth: 4 -# ContinuationIndentWidth: 4 -# Cpp11BracedListStyle: true -# DeriveLineEnding: true -# DerivePointerAlignment: false -# DisableFormat: false -# EmptyLineAfterAccessModifier: Never -# EmptyLineBeforeAccessModifier: LogicalBlock -# ExperimentalAutoDetectBinPacking: false -# FixNamespaceComments: true -# ForEachMacros: -# - foreach -# - Q_FOREACH -# - BOOST_FOREACH -# IfMacros: -# - KJ_IF_MAYBE -# IncludeBlocks: Preserve -# IncludeCategories: -# - Regex: '^"(llvm|llvm-c|clang|clang-c)/' -# Priority: 2 -# SortPriority: 0 -# CaseSensitive: false -# - Regex: '^(<|"(gtest|gmock|isl|json)/)' -# Priority: 3 -# SortPriority: 0 -# CaseSensitive: false -# - Regex: '.*' -# Priority: 1 -# SortPriority: 0 -# CaseSensitive: false -# IncludeIsMainRegex: '(Test)?$' -# IncludeIsMainSourceRegex: '' -# IndentAccessModifiers: false -# IndentCaseLabels: false -# IndentCaseBlocks: false -# IndentGotoLabels: true -# IndentPPDirectives: None -# IndentExternBlock: AfterExternBlock -# IndentRequires: false -# IndentWidth: 2 -# IndentWrappedFunctionNames: false -# InsertTrailingCommas: None -# JavaScriptQuotes: Leave -# JavaScriptWrapImports: true -# KeepEmptyLinesAtTheStartOfBlocks: true -# LambdaBodyIndentation: Signature -# MacroBlockBegin: '' -# MacroBlockEnd: '' -# MaxEmptyLinesToKeep: 1 -# NamespaceIndentation: None -# ObjCBinPackProtocolList: Auto -# ObjCBlockIndentWidth: 2 -# ObjCBreakBeforeNestedBlockParam: true -# ObjCSpaceAfterProperty: false -# ObjCSpaceBeforeProtocolList: true -# PenaltyBreakAssignment: 2 -# PenaltyBreakBeforeFirstCallParameter: 19 -# PenaltyBreakComment: 300 -# PenaltyBreakFirstLessLess: 120 -# PenaltyBreakString: 1000 -# PenaltyBreakTemplateDeclaration: 10 -# PenaltyExcessCharacter: 1000000 -# PenaltyReturnTypeOnItsOwnLine: 60 -# PenaltyIndentedWhitespace: 0 -# PointerAlignment: Left -# PPIndentWidth: -1 -# ReferenceAlignment: Pointer -# ReflowComments: false -# ShortNamespaceLines: 1 -# SortIncludes: CaseSensitive -# SortJavaStaticImport: Before -# SortUsingDeclarations: true -# SpaceAfterCStyleCast: false -# SpaceAfterLogicalNot: false -# SpaceAfterTemplateKeyword: true -# SpaceBeforeAssignmentOperators: true -# SpaceBeforeCaseColon: false -# SpaceBeforeCpp11BracedList: false -# SpaceBeforeCtorInitializerColon: true -# SpaceBeforeInheritanceColon: true -# SpaceBeforeParens: ControlStatements -# SpaceAroundPointerQualifiers: Default -# SpaceBeforeRangeBasedForLoopColon: true -# SpaceInEmptyBlock: false -# SpaceInEmptyParentheses: false -# SpacesBeforeTrailingComments: 2 -# SpacesInAngles: Never -# SpacesInConditionalStatement: false -# SpacesInContainerLiterals: true -# SpacesInCStyleCastParentheses: false -# SpacesInLineCommentPrefix: -# Minimum: 1 -# Maximum: -1 -# SpacesInParentheses: false -# SpacesInSquareBrackets: false -# SpaceBeforeSquareBrackets: false -# BitFieldColonSpacing: Both -# Standard: Latest -# StatementAttributeLikeMacros: -# - Q_EMIT -# StatementMacros: -# - Q_UNUSED -# - QT_REQUIRE_VERSION -# TabWidth: 8 -# UseCRLF: false -# UseTab: Never -# WhitespaceSensitiveMacros: -# - STRINGIZE -# - PP_STRINGIZE -# - BOOST_PP_STRINGIZE -# - NS_SWIFT_NAME -# - CF_SWIFT_NAME -# ... -# +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +# CommentPragmas: '^ IWYU pragma:' +# CommentPragmas: '^[^ ]' +CommentPragmas: '^\\.+' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +ReferenceAlignment: Pointer +ReflowComments: false +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0a9168eea21..3e60016ef96 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,14 +24,14 @@ repos: files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ exclude: (?!.*third_party)^.*$ -# - repo: local -# hooks: -# - id: clang-format-with-version-check -# name: clang-format -# description: Format files with ClangFormat. -# entry: bash .clang_format.hook -i -# language: system -# files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ +- repo: local + hooks: + - id: clang-format-with-version-check + name: clang-format + description: Format files with ClangFormat. + entry: bash .clang_format.hook -i + language: system + files: \.(c|cc|cxx|cpp|cu|hxx|proto)$ - repo: local hooks: diff --git a/CMakeLists.txt b/CMakeLists.txt index 549d7b708d6..42bc600bb89 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ PROJECT(fastdeploy C CXX) CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + option(CSRCS_DIR_NAME "Name of source code directory") option(LIBRARY_NAME "Name of build library name") option(PY_LIBRARY_NAME "Name of build python library name") @@ -60,10 +61,12 @@ option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF) option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF) option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF) +option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF) option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF) option(ENABLE_TEXT "Whether to enable text models usage." OFF) option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF) +option(ENABLE_ENCRYPTION "Whether to enable ENCRYPTION." OFF) option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF) option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF) option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF) @@ -193,13 +196,15 @@ file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fast file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc) +file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) +file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc) file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc) file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cu) file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc) -list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS} ${DEPLOY_SOPHGO_SRCS} ${DEPLOY_ENCRYPTION_SRCS}) set(DEPEND_LIBS "") @@ -240,12 +245,12 @@ if(ENABLE_PADDLE_BACKEND) add_definitions(-DENABLE_PADDLE_BACKEND) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS}) include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake) - if(NOT APPLE) - list(APPEND DEPEND_LIBS external_paddle_inference external_dnnl external_omp) - else() - # no third parties libs(mkldnn and omp) need to - # link into paddle_inference on MacOS OSX. - list(APPEND DEPEND_LIBS external_paddle_inference) + list(APPEND DEPEND_LIBS external_paddle_inference) + if(external_dnnl_FOUND) + list(APPEND DEPEND_LIBS external_dnnl external_omp) + endif() + if(external_ort_FOUND) + list(APPEND DEPEND_LIBS external_p2o external_ort) endif() endif() @@ -263,6 +268,13 @@ if(ENABLE_RKNPU2_BACKEND) list(APPEND DEPEND_LIBS ${RKNN_RT_LIB}) endif() +if(ENABLE_SOPHGO_BACKEND) + add_definitions(-DENABLE_SOPHGO_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_SOPHGO_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/sophgo.cmake) + list(APPEND DEPEND_LIBS ${SOPHGO_RT_LIB}) +endif() + if(ENABLE_POROS_BACKEND) set(CMAKE_CXX_STANDARD 14) add_definitions(-DENABLE_POROS_BACKEND) @@ -387,9 +399,9 @@ if(ENABLE_TRT_BACKEND) find_package(Python COMPONENTS Interpreter Development REQUIRED) message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...") execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib) - file(GLOB_RECURSE TRT_STAIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a) + file(GLOB_RECURSE TRT_STATIC_LIBS ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib/*.a) if(TRT_STATIC_LIBS) - file(REMOVE ${TRT_STAIC_LIBS}) + file(REMOVE ${TRT_STATIC_LIBS}) endif() if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib @@ -441,6 +453,14 @@ if(ENABLE_TEXT) include(${PROJECT_SOURCE_DIR}/cmake/fast_tokenizer.cmake) endif() +if(ENABLE_ENCRYPTION) + add_definitions(-DENABLE_ENCRYPTION) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ENCRYPTION_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake) + include(${PROJECT_SOURCE_DIR}/cmake/openssl.cmake) + list(APPEND DEPEND_LIBS ${OPENSSL_LIBRARIES}) +endif() + if(ENABLE_PADDLE_FRONTEND) add_definitions(-DENABLE_PADDLE_FRONTEND) include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake) @@ -653,6 +673,11 @@ if(BUILD_FASTDEPLOY_PYTHON) list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS} ${PIPELINE_PYBIND_SRCS}) endif() + if(NOT ENABLE_ENCRYPTION) + file(GLOB_RECURSE ENCRYPTION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*_pybind.cc) + list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${ENCRYPTION_PYBIND_SRCS}) + endif() + if (NOT ENABLE_TEXT) file(GLOB_RECURSE TEXT_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*_pybind.cc) list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${TEXT_PYBIND_SRCS}) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 456a4d3217f..6ba0b430781 100755 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -3,6 +3,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.8) set(WITH_GPU @WITH_GPU@) set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@) set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@) +set(ENABLE_SOPHGO_BACKEND @ENABLE_SOPHGO_BACKEND@) set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@) set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) @@ -13,6 +14,7 @@ set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) set(ENABLE_VISION @ENABLE_VISION@) set(ENABLE_FLYCV @ENABLE_FLYCV@) set(ENABLE_TEXT @ENABLE_TEXT@) +set(ENABLE_ENCRYPTION @ENABLE_ENCRYPTION@) set(BUILD_ON_JETSON @BUILD_ON_JETSON@) set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@) set(OPENVINO_VERSION @OPENVINO_VERSION@) @@ -74,10 +76,9 @@ if(ENABLE_PADDLE_BACKEND) set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0") set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so") endif() - if(NOT APPLE) - list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB} ${DNNL_LIB} ${IOMP_LIB}) - else() - list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB}) + list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB}) + if(EXISTS "${DNNL_LIB}") + list(APPEND FASTDEPLOY_LIBS ${DNNL_LIB} ${IOMP_LIB}) endif() endif() @@ -271,6 +272,7 @@ message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " WITH_GPU : ${WITH_GPU}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") +message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") @@ -288,6 +290,7 @@ endif() message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}") +message(STATUS " ENABLE_ENCRYPTION : ${ENABLE_ENCRYPTION}") if(WITH_GPU) message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}") endif() diff --git a/README.md b/README.md index e8f014f6ffd..b1dd06cd5cb 120000 --- a/README.md +++ b/README.md @@ -1 +1 @@ -README_CN.md +README_EN.md diff --git a/README_CN.md b/README_CN.md index 886118c2ee3..251f019a6ab 100755 --- a/README_CN.md +++ b/README_CN.md @@ -274,7 +274,7 @@ int main(int argc, char* argv[]) { | 任务场景 | 模型 | 大小(MB) | Linux | Android | Linux | Linux | Linux | Linux | Linux | 更新中... | |:------------------:|:-----------------------------------------------------------------------------------------:|:--------:|:-------:|:-------:|:-------:|:-----------------------:|:------------------------------:|:---------------------------:|:--------------------------------:|:-------:| -| --- | --- | --- | ARM CPU | [ARM CPU](./java/android) | [瑞芯微NPU
RK3568/RK3588](./docs/cn/build_and_install/rknpu2.md) | [瑞芯微NPU
RV1109/RV1126/RK1808](./docs/cn/build_and_install/rv1126.md) | [晶晨NPU
A311D/S905D/C308X](./docs/cn/build_and_install/a311d.md) | 恩智浦NPU
i.MX 8M Plus | 更新中...| | +| --- | --- | --- | ARM CPU | [ARM CPU](./java/android) | [瑞芯微NPU
RK3588/RK3568/RK3566](./docs/cn/build_and_install/rknpu2.md) | [瑞芯微NPU
RV1109/RV1126/RK1808](./docs/cn/build_and_install/rv1126.md) | [晶晨NPU
A311D/S905D/C308X](./docs/cn/build_and_install/a311d.md) | 恩智浦NPU
i.MX 8M Plus | 更新中...| | | Classification | [PaddleClas/ResNet50](examples/vision/classification/paddleclas) | 98 | ✅ | ✅ | [✅](./examples/vision/classification/paddleclas/rknpu2) | ✅ | | | | | Classification | [PaddleClas/PP-LCNet](examples/vision/classification/paddleclas) | 11.9 | ✅ | ✅ | ❔ | ✅ | -- | -- | -- | | Classification | [PaddleClas/PP-LCNetv2](examples/vision/classification/paddleclas) | 26.6 | ✅ | ✅ | ❔ | ✅ | -- | -- | -- | diff --git a/benchmark/benchmark_uie.py b/benchmark/benchmark_uie.py new file mode 100644 index 00000000000..44c562d7e66 --- /dev/null +++ b/benchmark/benchmark_uie.py @@ -0,0 +1,321 @@ +import numpy as np +import os +import time +import distutils.util +import sys +import json + +import fastdeploy as fd +from fastdeploy.text import UIEModel, SchemaLanguage + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="The directory of model and tokenizer.") + parser.add_argument( + "--data_path", required=True, help="The path of uie data.") + parser.add_argument( + "--device", + type=str, + default='cpu', + choices=['gpu', 'cpu'], + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--backend", + type=str, + default='paddle', + choices=['ort', 'paddle', 'trt', 'paddle_trt', 'ov'], + help="The inference runtime backend.") + parser.add_argument( + "--device_id", type=int, default=0, help="device(gpu) id") + parser.add_argument( + "--batch_size", type=int, default=1, help="The batch size of data.") + parser.add_argument( + "--max_length", + type=int, + default=128, + help="The max length of sequence.") + parser.add_argument( + "--cpu_num_threads", + type=int, + default=8, + help="The number of threads when inferring on cpu.") + parser.add_argument( + "--enable_trt_fp16", + type=distutils.util.strtobool, + default=False, + help="whether enable fp16 in trt backend") + parser.add_argument( + "--epoch", type=int, default=1, help="The epoch of test") + parser.add_argument( + "--enable_collect_memory_info", + type=ast.literal_eval, + default=False, + help="whether enable collect memory info") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device == 'cpu': + option.use_cpu() + option.set_cpu_thread_num(args.cpu_num_threads) + else: + option.use_gpu(args.device_id) + if args.backend == 'paddle': + option.use_paddle_backend() + elif args.backend == 'ort': + option.use_ort_backend() + elif args.backend == 'ov': + option.use_openvino_backend() + else: + option.use_trt_backend() + if args.backend == 'paddle_trt': + option.enable_paddle_to_trt() + option.enable_paddle_trt_collect_shape() + trt_file = os.path.join(args.model_dir, "infer.trt") + option.set_trt_input_shape( + 'input_ids', + min_shape=[1, 1], + opt_shape=[args.batch_size, args.max_length // 2], + max_shape=[args.batch_size, args.max_length]) + option.set_trt_input_shape( + 'token_type_ids', + min_shape=[1, 1], + opt_shape=[args.batch_size, args.max_length // 2], + max_shape=[args.batch_size, args.max_length]) + option.set_trt_input_shape( + 'pos_ids', + min_shape=[1, 1], + opt_shape=[args.batch_size, args.max_length // 2], + max_shape=[args.batch_size, args.max_length]) + option.set_trt_input_shape( + 'att_mask', + min_shape=[1, 1], + opt_shape=[args.batch_size, args.max_length // 2], + max_shape=[args.batch_size, args.max_length]) + if args.enable_trt_fp16: + option.enable_trt_fp16() + trt_file = trt_file + ".fp16" + option.set_trt_cache_file(trt_file) + return option + + +class StatBase(object): + """StatBase""" + nvidia_smi_path = "nvidia-smi" + gpu_keys = ('index', 'uuid', 'name', 'timestamp', 'memory.total', + 'memory.free', 'memory.used', 'utilization.gpu', + 'utilization.memory') + nu_opt = ',nounits' + cpu_keys = ('cpu.util', 'memory.util', 'memory.used') + + +class Monitor(StatBase): + """Monitor""" + + def __init__(self, use_gpu=False, gpu_id=0, interval=0.1): + self.result = {} + self.gpu_id = gpu_id + self.use_gpu = use_gpu + self.interval = interval + self.cpu_stat_q = multiprocessing.Queue() + + def start(self): + cmd = '%s --id=%s --query-gpu=%s --format=csv,noheader%s -lms 50' % ( + StatBase.nvidia_smi_path, self.gpu_id, ','.join(StatBase.gpu_keys), + StatBase.nu_opt) + if self.use_gpu: + self.gpu_stat_worker = subprocess.Popen( + cmd, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + shell=True, + close_fds=True, + preexec_fn=os.setsid) + # cpu stat + pid = os.getpid() + self.cpu_stat_worker = multiprocessing.Process( + target=self.cpu_stat_func, + args=(self.cpu_stat_q, pid, self.interval)) + self.cpu_stat_worker.start() + + def stop(self): + try: + if self.use_gpu: + os.killpg(self.gpu_stat_worker.pid, signal.SIGUSR1) + # os.killpg(p.pid, signal.SIGTERM) + self.cpu_stat_worker.terminate() + self.cpu_stat_worker.join(timeout=0.01) + except Exception as e: + print(e) + return + + # gpu + if self.use_gpu: + lines = self.gpu_stat_worker.stdout.readlines() + lines = [ + line.strip().decode("utf-8") for line in lines + if line.strip() != '' + ] + gpu_info_list = [{ + k: v + for k, v in zip(StatBase.gpu_keys, line.split(', ')) + } for line in lines] + if len(gpu_info_list) == 0: + return + result = gpu_info_list[0] + for item in gpu_info_list: + for k in item.keys(): + if k not in ["name", "uuid", "timestamp"]: + result[k] = max(int(result[k]), int(item[k])) + else: + result[k] = max(result[k], item[k]) + self.result['gpu'] = result + + # cpu + cpu_result = {} + if self.cpu_stat_q.qsize() > 0: + cpu_result = { + k: v + for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get()) + } + while not self.cpu_stat_q.empty(): + item = { + k: v + for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get()) + } + for k in StatBase.cpu_keys: + cpu_result[k] = max(cpu_result[k], item[k]) + cpu_result['name'] = cpuinfo.get_cpu_info()['brand_raw'] + self.result['cpu'] = cpu_result + + def output(self): + return self.result + + def cpu_stat_func(self, q, pid, interval=0.0): + """cpu stat function""" + stat_info = psutil.Process(pid) + while True: + # pid = os.getpid() + cpu_util, mem_util, mem_use = stat_info.cpu_percent( + ), stat_info.memory_percent(), round(stat_info.memory_info().rss / + 1024.0 / 1024.0, 4) + q.put([cpu_util, mem_util, mem_use]) + time.sleep(interval) + return + + +def get_dataset(data_path, max_seq_len=512): + json_lines = [] + with open(data_path, 'r', encoding='utf-8') as f: + for line in f: + json_line = json.loads(line) + content = json_line['content'].strip() + prompt = json_line['prompt'] + # Model Input is aslike: [CLS] Prompt [SEP] Content [SEP] + # It include three summary tokens. + if max_seq_len <= len(prompt) + 3: + raise ValueError( + "The value of max_seq_len is too small, please set a larger value" + ) + json_lines.append(json_line) + + return json_lines + + +if __name__ == '__main__': + args = parse_arguments() + runtime_option = build_option(args) + model_path = os.path.join(args.model_dir, "inference.pdmodel") + param_path = os.path.join(args.model_dir, "inference.pdiparams") + vocab_path = os.path.join(args.model_dir, "vocab.txt") + + gpu_id = args.device_id + enable_collect_memory_info = args.enable_collect_memory_info + dump_result = dict() + end2end_statis = list() + cpu_mem = list() + gpu_mem = list() + gpu_util = list() + if args.device == "cpu": + file_path = args.model_dir + "_model_" + args.backend + "_" + \ + args.device + "_" + str(args.cpu_num_threads) + ".txt" + else: + if args.enable_trt_fp16: + file_path = args.model_dir + "_model_" + \ + args.backend + "_fp16_" + args.device + ".txt" + else: + file_path = args.model_dir + "_model_" + args.backend + "_" + args.device + ".txt" + f = open(file_path, "w") + f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4])) + + ds = get_dataset(args.data_path) + schema = ["时间"] + uie = UIEModel( + model_path, + param_path, + vocab_path, + position_prob=0.5, + max_length=args.max_length, + batch_size=args.batch_size, + schema=schema, + runtime_option=runtime_option, + schema_language=SchemaLanguage.ZH) + + try: + if enable_collect_memory_info: + import multiprocessing + import subprocess + import psutil + import signal + import cpuinfo + enable_gpu = args.device == "gpu" + monitor = Monitor(enable_gpu, gpu_id) + monitor.start() + uie.enable_record_time_of_runtime() + + for ep in range(args.epoch): + for i, sample in enumerate(ds): + curr_start = time.time() + uie.set_schema([sample['prompt']]) + result = uie.predict([sample['content']]) + end2end_statis.append(time.time() - curr_start) + runtime_statis = uie.print_statis_info_of_runtime() + + warmup_iter = args.epoch * len(ds) // 5 + + end2end_statis_repeat = end2end_statis[warmup_iter:] + if enable_collect_memory_info: + monitor.stop() + mem_info = monitor.output() + dump_result["cpu_rss_mb"] = mem_info['cpu'][ + 'memory.used'] if 'cpu' in mem_info else 0 + dump_result["gpu_rss_mb"] = mem_info['gpu'][ + 'memory.used'] if 'gpu' in mem_info else 0 + dump_result["gpu_util"] = mem_info['gpu'][ + 'utilization.gpu'] if 'gpu' in mem_info else 0 + + dump_result["runtime"] = runtime_statis["avg_time"] * 1000 + dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 + + time_cost_str = f"Runtime(ms): {dump_result['runtime']}\n" \ + f"End2End(ms): {dump_result['end2end']}\n" + f.writelines(time_cost_str) + print(time_cost_str) + + if enable_collect_memory_info: + mem_info_str = f"cpu_rss_mb: {dump_result['cpu_rss_mb']}\n" \ + f"gpu_rss_mb: {dump_result['gpu_rss_mb']}\n" \ + f"gpu_util: {dump_result['gpu_util']}\n" + f.writelines(mem_info_str) + print(mem_info_str) + except: + f.writelines("!!!!!Infer Failed\n") + + f.close() diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/run_benchmark_uie.sh new file mode 100644 index 00000000000..51eb5d97328 --- /dev/null +++ b/benchmark/run_benchmark_uie.sh @@ -0,0 +1,27 @@ +# wget https://bj.bcebos.com/fastdeploy/benchmark/uie/reimbursement_form_data.txt +# wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz +# tar xvfz uie-base.tgz + +DEVICE_ID=0 + +echo "[FastDeploy] Running UIE benchmark..." + +# GPU +echo "-------------------------------GPU Benchmark---------------------------------------" +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device_id $DEVICE_ID --device gpu --enable_collect_memory_info True +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle_trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 False --enable_collect_memory_info True +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle_trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True +python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend trt --device_id $DEVICE_ID --device gpu --enable_trt_fp16 True --enable_collect_memory_info True +echo "-----------------------------------------------------------------------------------" + +# CPU +echo "-------------------------------CPU Benchmark---------------------------------------" +for cpu_num_threads in 1 8; +do + python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend paddle --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True + python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ort --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True + python benchmark_uie.py --epoch 5 --model_dir uie-base --data_path reimbursement_form_data.txt --backend ov --device cpu --cpu_num_threads ${cpu_num_threads} --enable_collect_memory_info True +done +echo "-----------------------------------------------------------------------------------" diff --git a/cmake/ascend.cmake b/cmake/ascend.cmake index 01582d4186e..ed5aa5d933c 100644 --- a/cmake/ascend.cmake +++ b/cmake/ascend.cmake @@ -16,13 +16,4 @@ else () if(NOT PADDLELITE_URL) set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/test/lite-linux_arm64_huawei_ascend_npu_python_1207.tgz") endif() - execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/paddlelite/lib - COMMAND sh -c "xargs ${PATCHELF_EXE} --set-rpath '$ORIGIN'" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/paddlelite/lib - RESULT_VARIABLE result - OUTPUT_VARIABLE curr_out - ERROR_VARIABLE curr_out) - if(ret EQUAL "1") - message(FATAL_ERROR "Failed to patchelf Paddle Lite libraries when using Ascend.") - endif() - message(STATUS "result:${result} out:${curr_out}") endif() diff --git a/cmake/openssl.cmake b/cmake/openssl.cmake new file mode 100755 index 00000000000..2d7eb4402c0 --- /dev/null +++ b/cmake/openssl.cmake @@ -0,0 +1,50 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SET(OPENSSL_URL_PREFIX "https://bj.bcebos.com/paddlex/tools") +IF(CMAKE_SYSTEM_NAME MATCHES "Windows") + set(OPENSSL_FILENAME "windows_openssl-1.1.0k") + set(COMPRESSED_SUFFIX ".zip") + add_definitions(-DWIN32) +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(OPENSSL_FILENAME "openssl-1.1.0k") + set(COMPRESSED_SUFFIX ".tar.gz") + add_definitions(-DLINUX) +ENDIF() +set(OPENSSL_URL ${OPENSSL_URL_PREFIX}/${OPENSSL_FILENAME}${COMPRESSED_SUFFIX}) +if(THIRD_PARTY_PATH) + SET(OPENSSL_INSTALL_DIR ${THIRD_PARTY_PATH}) + SET(OPENSSL_ROOT_DIR ${THIRD_PARTY_PATH}/openssl-1.1.0k/install-${CMAKE_SYSTEM_PROCESSOR}) +else() + SET(OPENSSL_INSTALL_DIR ${FASTDEPLOY_INSTALL_DIR}/installed_fastdeploy/cmake) + SET(OPENSSL_ROOT_DIR ${FASTDEPLOY_INSTALL_DIR}/installed_fastdeploy/cmake/openssl-1.1.0k/install-${CMAKE_SYSTEM_PROCESSOR}) +endif() +download_and_decompress(${OPENSSL_URL} ${CMAKE_CURRENT_BINARY_DIR}/${OPENSSL_FILENAME}${COMPRESSED_SUFFIX} ${OPENSSL_INSTALL_DIR}) +SET(OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" CACHE PATH "openssl include directory." FORCE) +include_directories(${OPENSSL_INCLUDE_DIR}) +IF(CMAKE_SYSTEM_NAME MATCHES "Windows") + set(OPENSSL_LIBRARIES + "${OPENSSL_ROOT_DIR}/lib/libssl_static.lib" + "${OPENSSL_ROOT_DIR}/lib/libcrypto_static.lib" + ${GFLAGS_LIBRARIES} + shlwapi + CACHE FILEPATH "OPENSSL_LIBRARIES" FORCE) +ELSEIF (CMAKE_SYSTEM_NAME MATCHES "Linux") + set(OPENSSL_LIBRARIES + "${OPENSSL_ROOT_DIR}/lib/libssl.a" + "${OPENSSL_ROOT_DIR}/lib/libcrypto.a" + ${GFLAGS_LIBRARIES} + -ldl -lpthread + CACHE FILEPATH "OPENSSL_LIBRARIES" FORCE) +ENDIF() \ No newline at end of file diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index 3ab45454a51..6d5c8f0c134 100644 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -40,16 +40,24 @@ if(WIN32) CACHE FILEPATH "paddle_inference compile library." FORCE) set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/mkldnn.lib") set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5md.lib") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.lib") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/onnxruntime.lib") elseif(APPLE) set(PADDLEINFERENCE_COMPILE_LIB "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.dylib" CACHE FILEPATH "paddle_inference compile library." FORCE) + set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2") + set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.dylib") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.dylib") else() set(PADDLEINFERENCE_COMPILE_LIB "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so" CACHE FILEPATH "paddle_inference compile library." FORCE) set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2") set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so") + set(P2O_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so") + set(ORT_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so") endif(WIN32) @@ -59,10 +67,16 @@ if(PADDLEINFERENCE_DIRECTORY) endif() find_package(Python COMPONENTS Interpreter Development REQUIRED) message(STATUS "Copying ${PADDLEINFERENCE_DIRECTORY} to ${THIRD_PARTY_PATH}/install/paddle_inference ...") - execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference) + if(WIN32) + message(FATAL_ERROR "Define PADDLEINFERENCE_DIRECTORY is not supported on Windows platform.") + else() + execute_process(COMMAND mkdir -p ${THIRD_PARTY_PATH}/install) + execute_process(COMMAND cp -r ${PADDLEINFERENCE_DIRECTORY} ${THIRD_PARTY_PATH}/install/paddle_inference) + execute_process(COMMAND rm -rf ${THIRD_PARTY_PATH}/install/paddle_inference/paddle/lib/*.a) + endif() else() set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") - set(PADDLEINFERENCE_VERSION "2.4-dev3") + set(PADDLEINFERENCE_VERSION "2.4-dev4") if(WIN32) if (WITH_GPU) set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip") @@ -116,16 +130,23 @@ set_property(TARGET external_paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLEINFERENCE_COMPILE_LIB}) add_dependencies(external_paddle_inference ${PADDLEINFERENCE_PROJECT}) -if (NOT APPLE) - # no third parties libs(mkldnn and omp) need to - # link into paddle_inference on MacOS OSX. - add_library(external_dnnl STATIC IMPORTED GLOBAL) - set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION - ${DNNL_LIB}) - add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT}) - add_library(external_omp STATIC IMPORTED GLOBAL) - set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION - ${OMP_LIB}) - add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT}) -endif() +add_library(external_p2o STATIC IMPORTED GLOBAL) +set_property(TARGET external_p2o PROPERTY IMPORTED_LOCATION + ${P2O_LIB}) +add_dependencies(external_p2o ${PADDLEINFERENCE_PROJECT}) + +add_library(external_ort STATIC IMPORTED GLOBAL) +set_property(TARGET external_ort PROPERTY IMPORTED_LOCATION + ${ORT_LIB}) +add_dependencies(external_ort ${PADDLEINFERENCE_PROJECT}) + +add_library(external_dnnl STATIC IMPORTED GLOBAL) +set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION + ${DNNL_LIB}) +add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT}) + +add_library(external_omp STATIC IMPORTED GLOBAL) +set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION + ${OMP_LIB}) +add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT}) diff --git a/cmake/paddlelite.cmake b/cmake/paddlelite.cmake index 74525b7a9d3..d546cbf38f8 100755 --- a/cmake/paddlelite.cmake +++ b/cmake/paddlelite.cmake @@ -114,6 +114,10 @@ else() BUILD_BYPRODUCTS ${PADDLELITE_LIB}) endif() +if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND BUILD_FASTDEPLOY_PYTHON) + add_custom_target(patchelf_paddle_lite ALL COMMAND bash -c "PATCHELF_EXE=${PATCHELF_EXE} python ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_lite.py ${PADDLELITE_INSTALL_DIR}/lib/" DEPENDS ${LIBRARY_NAME}) +endif() + add_library(external_paddle_lite STATIC IMPORTED GLOBAL) set_property(TARGET external_paddle_lite PROPERTY IMPORTED_LOCATION ${PADDLELITE_LIB}) add_dependencies(external_paddle_lite ${PADDLELITE_PROJECT}) diff --git a/cmake/sophgo.cmake b/cmake/sophgo.cmake new file mode 100644 index 00000000000..d51d61368bd --- /dev/null +++ b/cmake/sophgo.cmake @@ -0,0 +1,7 @@ +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +find_package(libsophon REQUIRED) +message(${LIBSOPHON_LIB_DIRS}) +include_directories(${LIBSOPHON_INCLUDE_DIRS}) +message(${LIBSOPHON_LIB_DIRS}) +set(SOPHGO_RT_LIB ${LIBSOPHON_LIB_DIRS}/libbmrt.so) \ No newline at end of file diff --git a/cmake/summary.cmake b/cmake/summary.cmake index 85c829e04c7..9c3b1981cbd 100755 --- a/cmake/summary.cmake +++ b/cmake/summary.cmake @@ -32,6 +32,7 @@ function(fastdeploy_summary) message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") + message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}") message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") @@ -60,6 +61,7 @@ function(fastdeploy_summary) endif() message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}") + message(STATUS " ENABLE_ENCRYPTION : ${ENABLE_ENCRYPTION}") message(STATUS " ENABLE_DEBUG : ${ENABLE_DEBUG}") message(STATUS " ENABLE_VISION_VISUALIZE : ${ENABLE_VISION_VISUALIZE}") if(ANDROID) diff --git a/cmake/toolchain.cmake b/cmake/toolchain.cmake index 4b3485748b7..85bd057981b 100755 --- a/cmake/toolchain.cmake +++ b/cmake/toolchain.cmake @@ -10,7 +10,7 @@ if (DEFINED TARGET_ABI) set(OPENCV_URL "https://bj.bcebos.com/fastdeploy/third_libs/opencv-linux-armv7hf-4.6.0.tgz") set(OPENCV_FILENAME "opencv-linux-armv7hf-4.6.0") if(WITH_TIMVX) - set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-1130.tgz") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-armhf-timvx-20221229.tgz") else() message(STATUS "PADDLELITE_URL will be configured if WITH_TIMVX=ON.") endif() diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md index 0ca92592ad7..5175f50d947 100755 --- a/docs/cn/build_and_install/README.md +++ b/docs/cn/build_and_install/README.md @@ -6,10 +6,10 @@ - [FastDeploy预编译库下载安装](download_prebuilt_libraries.md) ## 自行编译安装 -- [GPU部署环境](gpu.md) +- [NVIDIA GPU部署环境](gpu.md) - [CPU部署环境](cpu.md) - [IPU部署环境](ipu.md) -- [Jetson部署环境](jetson.md) +- [Nvidia Jetson部署环境](jetson.md) - [Android平台部署环境](android.md) - [瑞芯微RV1126部署环境](rv1126.md) - [瑞芯微RK3588部署环境](rknpu2.md) @@ -26,6 +26,7 @@ | ENABLE_PADDLE_BACKEND | 默认OFF,是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) | | ENABLE_LITE_BACKEND | 默认OFF,是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) | | ENABLE_RKNPU2_BACKEND | 默认OFF,是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) | +| ENABLE_SOPHGO_BACKEND | 默认OFF,是否编译集成SOPHGO后端, 当在SOPHGO TPU上部署时,需要设置为ON | | WITH_ASCEND | 默认OFF,当在华为昇腾NPU上部署时, 需要设置为ON | | WITH_KUNLUNXIN | 默认OFF,当在昆仑芯XPU上部署时,需设置为ON | | WITH_TIMVX | 默认OFF,需要在RV1126/RV1109/A311D上部署时,需设置为ON | diff --git a/docs/cn/build_and_install/download_prebuilt_libraries.md b/docs/cn/build_and_install/download_prebuilt_libraries.md index c3c733467da..30ad094f977 100755 --- a/docs/cn/build_and_install/download_prebuilt_libraries.md +++ b/docs/cn/build_and_install/download_prebuilt_libraries.md @@ -22,7 +22,7 @@ FastDeploy提供各平台预编译库,供开发者直接下载安装使用。 ### Python安装 -Release版本(当前最新1.0.1)安装 +Release版本(当前最新1.0.2)安装 ```bash pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html ``` @@ -43,8 +43,8 @@ Release版本 | 平台 | 文件 | 说明 | | :--- | :--- | :---- | -| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.1.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 | -| Windows x64 | [fastdeploy-win-x64-gpu-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.1.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 | +| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.2.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 | +| Windows x64 | [fastdeploy-win-x64-gpu-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.2.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 | Develop版本(Nightly build) @@ -65,7 +65,7 @@ Develop版本(Nightly build) ### Python安装 -Release版本(当前最新1.0.1)安装 +Release版本(当前最新1.0.2)安装 ```bash pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html ``` @@ -81,11 +81,11 @@ Release版本 | 平台 | 文件 | 说明 | | :--- | :--- | :---- | -| Linux x64 | [fastdeploy-linux-x64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.1.tgz) | g++ 8.2编译产出 | -| Windows x64 | [fastdeploy-win-x64-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.1.zip) | Visual Studio 16 2019编译产出 | -| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.1.tgz) | clang++ 10.0.0编译产出| -| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.1.tgz) | clang++ 13.0.0编译产出 | -| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.1.tgz) | gcc 6.3编译产出 | +| Linux x64 | [fastdeploy-linux-x64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz) | g++ 8.2编译产出 | +| Windows x64 | [fastdeploy-win-x64-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.2.zip) | Visual Studio 16 2019编译产出 | +| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.2.tgz) | clang++ 10.0.0编译产出| +| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.2.tgz) | clang++ 13.0.0编译产出 | +| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.2.tgz) | gcc 6.3编译产出 | | Android armv7&v8 | [fastdeploy-android-1.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz) | NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a | ## Java SDK安装 diff --git a/docs/cn/build_and_install/huawei_ascend.md b/docs/cn/build_and_install/huawei_ascend.md index b2a6b2ea402..ce0c8021155 100644 --- a/docs/cn/build_and_install/huawei_ascend.md +++ b/docs/cn/build_and_install/huawei_ascend.md @@ -94,6 +94,12 @@ python setup.py bdist_wheel #编译完成后,请用户自行安装当前目录的dist文件夹内的whl包. ``` +## 五.昇腾部署时开启FlyCV +[FlyCV](https://github.com/PaddlePaddle/FlyCV) 是一款高性能计算机图像处理库, 针对ARM架构做了很多优化, 相比其他图像处理库性能更为出色. +FastDeploy现在已经集成FlyCV, 用户可以在支持的硬件平台上使用FlyCV, 实现模型端到端推理性能的加速. +模型端到端推理中, 预处理和后处理阶段为CPU计算, 当用户使用ARM CPU + 昇腾的硬件平台时, 我们推荐用户使用FlyCV, 可以实现端到端的推理性能加速, 详见[FLyCV使用文档](./boost_cv_by_flycv.md). -- 华为昇腾NPU 上使用C++部署 PaddleClas 分类模型请参考:[PaddleClas 华为升腾NPU C++ 部署示例](../../../examples/vision/classification/paddleclas/ascend/cpp/README.md) -- 华为昇腾NPU 上使用Python部署 PaddleClas 分类模型请参考:[PaddleClas 华为升腾NPU Python 部署示例](../../../examples/vision/classification/paddleclas/ascend/python/README.md) + +## 六.昇腾部署Demo参考 +- 华为昇腾NPU 上使用C++部署 PaddleClas 分类模型请参考:[PaddleClas 华为升腾NPU C++ 部署示例](../../../examples/vision/classification/paddleclas/cpp/README.md) +- 华为昇腾NPU 上使用Python部署 PaddleClas 分类模型请参考:[PaddleClas 华为升腾NPU Python 部署示例](../../../examples/vision/classification/paddleclas/python/README.md) diff --git a/docs/cn/build_and_install/sophgo.md b/docs/cn/build_and_install/sophgo.md new file mode 100644 index 00000000000..f27432e71dc --- /dev/null +++ b/docs/cn/build_and_install/sophgo.md @@ -0,0 +1,78 @@ +# SOPHGO 部署库编译 + +## SOPHGO 环境准备 +SOPHGO支持linux下进行编译,系统为Debian/Ubuntu +安装包由三个文件构成 +- [sophon-driver\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/KWqbmEcKp) +- [sophon-libsophon\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/PlvlBXhWY) +- [sophon-libsophon-dev\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/zTErLlpS7) + +其中“$arch”为当前机器的硬件架构,使用以下命令可以获取当前的服务器arch: +```shell +uname -m +``` +通常x86_64 机器对应的硬件架构为amd64,arm64 机器对应的硬件架构为 arm64: +```text +- sophon-driver_0.4.2_$arch.deb +- sophon-libsophon_0.4.2_$arch.deb +- sophon-libsophon-dev_0.4.2_$arch.deb +``` + +其中:sophon-driver 包含了 PCIe 加速卡驱动;sophon-libsophon 包含了运行时环境(库文 +件、工具等);sophon-libsophon-dev 包含了开发环境(头文件等)。如果只是在部署环境上安 +装,则不需要安装 sophon-libsophon-dev。 +可以通过如下步骤安装: +```shell +#安装依赖库,只需要执行一次: +sudo apt install dkms libncurses5 +#安装 libsophon: +sudo dpkg -i sophon-*.deb +#在终端执行如下命令,或者登出再登入当前用户后即可使用 bm-smi 等命令: +source /etc/profile +``` +安装位置为: +```text +/opt/sophon/ +├── driver-0.4.2 +├── libsophon-0.4.2 +| ├──bin +| ├──data +| ├──include +| └──lib +└── libsophon-current->/opt/sophon/libsophon-0.4.2 +``` + +## C++ SDK编译安装 +搭建好编译环境之后,编译命令如下: +```bash +# Download the latest source code +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy +mkdir build && cd build + +# CMake configuration with Ascend +cmake -DENABLE_SOPHGO_BACKEND=ON \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-sophgo \ + -DENABLE_VISION=ON \ + .. + +# Build FastDeploy Ascend C++ SDK +make -j8 +make install +``` +编译完成之后,会在当前的build目录下生成 fastdeploy-sophgo 目录,编译完成。 + +## Python FastDeploy 库编译 +搭建好编译环境之后,编译命令如下: +```bash +# Download the latest source code +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/python +export ENABLE_SOPHGO_BACKEND=ON +export ENABLE_VISION=ON + +python setup.py build +python setup.py bdist_wheel + +#编译完成后,请用户自行安装当前目录的dist文件夹内的whl包. +``` diff --git a/docs/cn/faq/boost_cv_by_flycv.md b/docs/cn/faq/boost_cv_by_flycv.md new file mode 100644 index 00000000000..591a5fa0a73 --- /dev/null +++ b/docs/cn/faq/boost_cv_by_flycv.md @@ -0,0 +1,68 @@ +[English](../../en/faq/boost_cv_by_flycv.md) | 中文 + + +# 使用FlyCV加速端到端推理性能 + +[FlyCV](https://github.com/PaddlePaddle/FlyCV) 是一款高性能计算机图像处理库, 针对ARM架构做了很多优化, 相比其他图像处理库性能更为出色. +FastDeploy现在已经集成FlyCV, 用户可以在支持的硬件平台上使用FlyCV, 实现模型端到端推理性能的加速. + +## 已支持的系统与硬件架构 + +| 系统 | 硬件架构 | +| :-----------| :-------- | +| Android | armeabi-v7a, arm64-v8a | +| Linux | aarch64, armhf, x86_64| + + +## 使用方式 +使用FlyCV,首先需要在编译时开启FlyCV编译选项,之后在部署时新增一行代码即可开启. +本文以Linux系统为例,说明如何开启FlyCV编译选项, 之后在部署时, 新增一行代码使用FlyCV. + +用户可以按照如下方式,在编译预测库时,开启FlyCV编译选项. +```bash +# 编译C++预测库时, 开启FlyCV编译选项. +-DENABLE_VISION=ON \ + +# 在编译Python预测库时, 开启FlyCV编译选项 +export ENABLE_FLYCV=ON +``` + +用户可以按照如下方式,在部署代码中新增一行代码启用FlyCV. +```bash +# C++部署代码. +# 新增一行代码启用FlyCV +fastdeploy::vision::EnableFlyCV(); +# 其他部署代码...(以昇腾部署为例) +fastdeploy::RuntimeOption option; +option.UseAscend(); +... + + +# Python部署代码 +# 新增一行代码启用FlyCV +fastdeploy.vision.enable_flycv() +# 其他部署代码...(以昇腾部署为例) +runtime_option = build_option() +option.use_ascend() +... +``` + +## 部分平台FlyCV 端到端性能数据 + +鲲鹏920 CPU + Atlas 300I Pro 推理卡. +| 模型 | OpenCV 端到端性能(ms) | FlyCV 端到端性能(ms) | +| :-----------| :-------- | :-------- | +| ResNet50 | 2.78 | 1.63 | +| PP-LCNetV2 | 2.50 | 1.39 | +| YOLOv7 | 27.00 | 21.36 | +| PP_HumanSegV2_Lite | 2.76 | 2.10 | + + +瑞芯微RV1126. + +| 模型 | OpenCV 端到端性能(ms) | FlyCV 端到端性能(ms) | +| :-----------| :-------- | :-------- | +| ResNet50 | 9.23 | 6.01 | +| mobilenetv1_ssld_量化模型 | 9.23 | 6.01 | +| yolov5s_量化模型 | 28.33 | 14.25 | +| PP_LiteSeg_量化模型 | 132.25 | 60.31 | diff --git a/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md b/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md new file mode 100755 index 00000000000..7ff5f91aa92 --- /dev/null +++ b/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md @@ -0,0 +1,36 @@ +[English](../../en/faq/heterogeneous_computing_on_timvx_npu.md) | 中文 + +# 在芯原系列 NPU 上实现异构计算 +在芯原系列 NPU 上,例如 RV1126 或者 A311D 上部署全量化模型时,有可能会有精度下降的问题,那么就需要在 NPU 和 ARM CPU 上进行异构计算,FastDeploy 中的异构计算是通过 subgraph.txt 配置文件来完成的,如果在更换全量化模型后,发现精度有较大的下降,可以参考本文档来定义异构配置文件。 + +异构配置文件的更新步骤: +1. 确定模型量化后在 ARM CPU 上的精度。 +- 如果在 ARM CPU 上,精度都无法满足,那量化本身就是失败的,此时可以考虑修改训练集或者更改量化方法。 +- 只需要修改 demo 中的代码,将 NPU 推理的部分改为使用 ARM CPU int8 推理,便可实现使用ARM CPU进行计算 + ``` + # 如下接口表示使用 NPU 进行推理 + fastdeploy::RuntimeOption option; + option.UseTimVX(); # 开启 TIMVX 进行 NPU 推理 + option.SetLiteSubgraphPartitionPath(subgraph_file); # 加载异构计算配置文件 + + # 如下接口表示使用 ARM CPU int8 推理 + fastdeploy::RuntimeOption option; + option.UseLiteBackend(); + option.EnableLiteInt8(); + ``` + 如果 ARM CPU 计算结果精度达标,则继续下面的步骤。 + +2. 获取整网拓扑信息。 +- 回退第一步中的修改,使用 NPU 进行推理的 API 接口,加载异构计算配置文件的开关保持关闭。 +- 将所有的日志信息写入到 log.txt中,在 log.txt 中搜索关键字 "subgraph operators" 随后的一段便是整个模型的拓扑信息 +- 它的格式如下: + - 每行记录由 ”算子类型:输入张量名列表:输出张量名列表“ 组成(即以分号分隔算子类型、输入和输出张量名列表),以逗号分隔输入、输出张量名列表中的每个张量名; + - 示例说明: + ``` + op_type0:var_name0,var_name1:var_name2 # 表示将算子类型为 op_type0、输入张量为var_name0 和 var_name1、输出张量为 var_name2 的节点强制运行在 ARM CPU 上 + ``` + +3. 修改异构配置文件 +- 将所有的 Subgraph operators 写到在 subgraph.txt 中,并打开加载异构计算配置文件的接口 +- 逐行删除、成片删除、二分法,发挥开发人员的耐心,找到引入 NPU 精度异常的 layer,将其留在 subgraph.txt 中 +- 在 txt 中的结点都是需要异构到 ARM CPU 上的 layer,不用特别担心性能问题,Paddle Lite 的 ARM kernel 性能也是非常卓越的 diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md index e0384e05136..fcd3e7c35c2 100644 --- a/docs/cn/faq/rknpu2/rknpu2.md +++ b/docs/cn/faq/rknpu2/rknpu2.md @@ -23,4 +23,4 @@ ONNX模型不能直接调用RK芯片中的NPU进行运算,需要把ONNX模型 | Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8) | 133/43 | | Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8) | 133/43 | | Face Detection | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md) | SCRFD-2.5G-kps-640(int8) | 108/42 | -| Classification | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md) | ResNet50_vd | -/92 | +| Classification | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md) | ResNet50_vd | -/33 | diff --git a/docs/cn/faq/use_sdk_on_ascend.md b/docs/cn/faq/use_sdk_on_ascend.md index 8f87d5a229d..fc0ccc45e37 100644 --- a/docs/cn/faq/use_sdk_on_ascend.md +++ b/docs/cn/faq/use_sdk_on_ascend.md @@ -1,4 +1,4 @@ -[English](../../en/faq/use_sdk_on_linux.md) | 中文 +[English](../../en/faq/use_sdk_on_ascend.md) | 中文 # Linux上使用C++在华为昇腾部署 diff --git a/docs/cn/faq/use_sdk_on_linux.md b/docs/cn/faq/use_sdk_on_linux.md index 0ceb5ca0c52..ccf46f5e041 100644 --- a/docs/cn/faq/use_sdk_on_linux.md +++ b/docs/cn/faq/use_sdk_on_linux.md @@ -3,7 +3,8 @@ # Linux上C++部署 -1. 编译完成运行,提示找不到.so文件 +1. 编译完成运行,提示找不到.so文件 "cannot open shared object file: No such file or directory" + 在执行二进制文件时,需要能够在环境变量中找到FastDeploy相关的库文件。FastDeploy提供了辅助脚本来帮助完成。 diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md index 19da3ae1018..b8e40035f37 100644 --- a/docs/en/build_and_install/download_prebuilt_libraries.md +++ b/docs/en/build_and_install/download_prebuilt_libraries.md @@ -4,7 +4,7 @@ English | [中文](../../cn/build_and_install/download_prebuilt_libraries.md) FastDeploy provides pre-built libraries for developers to download and install directly. Meanwhile, FastDeploy also offers easy access to compile so that developers can compile FastDeploy according to their own needs. -This article is divided into two parts: +This document is divided into two parts: - [1.GPU Deployment Environment](#1) - [2.CPU Deployment Environment](#2) @@ -23,7 +23,7 @@ FastDeploy supports Computer Vision, Text and NLP model deployment on CPU and Nv ### Python SDK -Install the released version(the newest 1.0.1 for now) +Install the released version(the newest 1.0.2 for now) ``` pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html @@ -43,12 +43,12 @@ conda config --add channels conda-forge && conda install cudatoolkit=11.2 cudnn= ### C++ SDK -Install the released version(Latest 1.0.1) +Install the released version(Latest 1.0.2) | Platform | File | Description | |:----------- |:--------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------- | -| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.1.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 | -| Windows x64 | [fastdeploy-win-x64-gpu-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.1.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 | +| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.2.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 | +| Windows x64 | [fastdeploy-win-x64-gpu-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.2.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 | Install the Develop version(Nightly build) @@ -70,7 +70,7 @@ FastDeploy supports computer vision, text and NLP model deployment on CPU with P ### Python SDK -Install the released version(Latest 1.0.1 for now) +Install the released version(Latest 1.0.2 for now) ``` pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html @@ -84,15 +84,15 @@ pip install fastdeploy-python==0.0.0 -f https://www.paddlepaddle.org.cn/whl/fast ### C++ SDK -Install the released version(Latest 1.0.1 for now, Android is 1.0.1) +Install the released version(Latest 1.0.2 for now, Android is 1.0.0) | Platform | File | Description | |:------------- |:--------------------------------------------------------------------------------------------------------------------- |:------------------------------ | -| Linux x64 | [fastdeploy-linux-x64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.1.tgz) | g++ 8.2 | -| Windows x64 | [fastdeploy-win-x64-1.0.1.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.1.zip) | Visual Studio 16 2019 | -| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.1.tgz) | clang++ 10.0.0| -| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.1.tgz) | clang++ 13.0.0 | -| Linux aarch64 | [fastdeploy-osx-arm64-1.0.1.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.1.tgz) | gcc 6.3 | +| Linux x64 | [fastdeploy-linux-x64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz) | g++ 8.2 | +| Windows x64 | [fastdeploy-win-x64-1.0.2.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.2.zip) | Visual Studio 16 2019 | +| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.2.tgz) | clang++ 10.0.0| +| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.2.tgz) | clang++ 13.0.0 | +| Linux aarch64 | [fastdeploy-osx-arm64-1.0.2.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.2.tgz) | gcc 6.3 | | Android armv7&v8 | [fastdeploy-android-1.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz)| NDK 25, clang++, support arm64-v8a and armeabi-v7a | ## Java SDK diff --git a/docs/en/build_and_install/huawei_ascend.md b/docs/en/build_and_install/huawei_ascend.md index 3a0bd8911fe..48653939d5d 100644 --- a/docs/en/build_and_install/huawei_ascend.md +++ b/docs/en/build_and_install/huawei_ascend.md @@ -93,6 +93,14 @@ python setup.py bdist_wheel #After the compilation is complete, please install the whl package in the dist folder of the current directory. ``` -Deploying PaddleClas Classification Model on Huawei Ascend NPU using C++ please refer to: [PaddleClas Huawei Ascend NPU C++ Deployment Example](../../../examples/vision/classification/paddleclas/ascend/cpp/README.md) +## Enable FlyCV for Ascend deployment -Deploying PaddleClas classification model on Huawei Ascend NPU using Python please refer to: [PaddleClas Huawei Ascend NPU Python Deployment Example](../../../examples/vision/classification/paddleclas/ascend/python/README.md) +[FlyCV](https://github.com/PaddlePaddle/FlyCV) is a high performance computer image processing library, providing better performance than other image processing libraries, especially in the ARM architecture. +FastDeploy is now integrated with FlyCV, allowing users to use FlyCV on supported hardware platforms to accelerate model end-to-end inference performance. +In end-to-end model inference, the pre-processing and post-processing phases are CPU computation, we recommend using FlyCV for end-to-end inference performance acceleration when you are using ARM CPU + Ascend hardware platform. See [Enable FlyCV](./boost_cv_by_flycv.md) documentation for details. + + +## Deployment demo reference +- Deploying PaddleClas Classification Model on Huawei Ascend NPU using C++ please refer to: [PaddleClas Huawei Ascend NPU C++ Deployment Example](../../../examples/vision/classification/paddleclas/cpp/README.md) + +- Deploying PaddleClas classification model on Huawei Ascend NPU using Python please refer to: [PaddleClas Huawei Ascend NPU Python Deployment Example](../../../examples/vision/classification/paddleclas/python/README.md) diff --git a/docs/en/build_and_install/sophgo.md b/docs/en/build_and_install/sophgo.md new file mode 100644 index 00000000000..08d18122c0f --- /dev/null +++ b/docs/en/build_and_install/sophgo.md @@ -0,0 +1,77 @@ + +# How to Build SOPHGO Deployment Environment + +## SOPHGO Environment Preparation +SOPHGO supports compilation on linux, using Debian/Ubuntu as an example +The installation package consists of three files +- [sophon-driver\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/KWqbmEcKp) +- [sophon-libsophon\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/PlvlBXhWY) +- [sophon-libsophon-dev\_0.4.2\_$arch.deb](http://219.142.246.77:65000/sharing/zTErLlpS7) + +$arch indicates the hardware architecture of the current machine. Run the following command to obtain the current server arch: +```shell +uname -m +``` +Generally, the hardware architecture of x86_64 machines is amd64, so the hardware architecture is arm64: +```text +- sophon-driver_0.4.2_$arch.deb +- sophon-libsophon_0.4.2_$arch.deb +- sophon-libsophon-dev_0.4.2_$arch.deb +``` + +sophon-driver contains PCIe acceleration card drivers; sophon-libsophon contains the runtime environment (librarys, tools, etc); sophon-libsophon-dev contains the development environment (header files, etc.). If you install packages only on a deployment environment, you do not need to install sophon-libsophon-dev. +You can perform the following steps to install: +```shell +#To install a dependency library, you only need to do this once: +sudo apt install dkms libncurses5 +#install libsophon: +sudo dpkg -i sophon-*.deb +#Run the following command on the terminal, log out and then log in the current user to use commands such as bm-smi: +source /etc/profile +``` +The position of installation:: +```text +/opt/sophon/ +├── driver-0.4.2 +├── libsophon-0.4.2 +| ├──bin +| ├──data +| ├──include +| └──lib +└── libsophon-current->/opt/sophon/libsophon-0.4.2 +``` + +## How to Build and Install C++ SDK +After setting up the compilation environment, the compilation command is as follows: +```bash +# Download the latest source code +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy +mkdir build && cd build + +# CMake configuration with Ascend +cmake -DENABLE_SOPHGO_BACKEND=ON \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-sophgo \ + -DENABLE_VISION=ON \ + .. + +# Build FastDeploy Ascend C++ SDK +make -j8 +make install +``` +When the compilation is complete, the fastdeploy-sophgo directory is created in the current build directory, indicating that the FastDeploy library has been compiled. + +## Compiling Python FastDeploy Libraries +After setting up the compilation environment, the compilation command is as follows: +```bash +# Download the latest source code +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/python +export ENABLE_SOPHGO_BACKEND=ON +export ENABLE_VISION=ON + +python setup.py build +python setup.py bdist_wheel + +#After the compilation is complete, please install the whl package in the dist folder of the current directory. +``` diff --git a/docs/en/faq/boost_cv_by_flycv.md b/docs/en/faq/boost_cv_by_flycv.md new file mode 100644 index 00000000000..2a06a362933 --- /dev/null +++ b/docs/en/faq/boost_cv_by_flycv.md @@ -0,0 +1,66 @@ +[简体中文](../../cn/faq/boost_cv_by_flycv.md) | English + + +# Accelerate end-to-end inference performance using FlyCV + +[FlyCV](https://github.com/PaddlePaddle/FlyCV) is a high performance computer image processing library, providing better performance than other image processing libraries, especially in the ARM architecture. +FastDeploy is now integrated with FlyCV, allowing users to use FlyCV on supported hardware platforms to accelerate model end-to-end inference performance. + +## Supported OS and Architectures + +| OS | Architectures | +| :-----------| :-------- | +| Android | armeabi-v7a, arm64-v8a | +| Linux | aarch64, armhf, x86_64| + + +## Usage +To use FlyCV, you first need to turn on the FlyCV compile option at compile time, and then add a new line of code to turn it on. +This article uses Linux as an example to show how to enable the FlyCV compile option, and then add a new line of code to use FlyCV during deployment. + +You can turn on the FlyCV compile option when compiling the FastDeploy library as follows. +```bash +# When compiling C++ libraries +-DENABLE_VISION=ON + +# When compiling Python libraries +export ENABLE_FLYCV=ON +``` + +You can enable FlyCV by adding a new line of code to the deployment code as follows. +```bash +# C++ code +fastdeploy::vision::EnableFlyCV(); +# Other..(e.g. With Huawei Ascend) +fastdeploy::RuntimeOption option; +option.UseAscend(); +... + + +# Python code +fastdeploy.vision.enable_flycv() +# Other..(e.g. With Huawei Ascend) +runtime_option = build_option() +option.use_ascend() +... +``` + +## Some Platforms FlyCV End-to-End Inference Performance + +KunPeng 920 CPU + Atlas 300I Pro. +| Model | OpenCV E2E Performance(ms) | FlyCV E2E Performance(ms) | +| :-----------| :-------- | :-------- | +| ResNet50 | 2.78 | 1.63 | +| PP-LCNetV2 | 2.50 | 1.39 | +| YOLOv7 | 27.00 | 21.36 | +| PP_HumanSegV2_Lite | 2.76 | 2.10 | + + +Rockchip RV1126. + +| Model | OpenCV E2E Performance(ms) | FlyCV E2E Performance(ms) | +| :-----------| :-------- | :-------- | +| ResNet50 | 9.23 | 6.01 | +| mobilenetv1_ssld_量化模型 | 9.23 | 6.01 | +| yolov5s_量化模型 | 28.33 | 14.25 | +| PP_LiteSeg_量化模型 | 132.25 | 60.31 | diff --git a/docs/en/faq/heterogeneous_computing_on_timvx_npu.md b/docs/en/faq/heterogeneous_computing_on_timvx_npu.md new file mode 100755 index 00000000000..467fd45fae2 --- /dev/null +++ b/docs/en/faq/heterogeneous_computing_on_timvx_npu.md @@ -0,0 +1,36 @@ +English | [中文](../../cn/faq/heterogeneous_computing_on_timvx_npu.md) + +# Heterogeneous Computing on VeriSilicon Series NPUs +When deploying a quantized model on a VeriSilicon series NPU, such as RV1126 or A311D, there may be a problem of decreased accuracy, so heterogeneous computing needs to be performed on the NPU and ARM CPU. The heterogeneous computing in FastDeploy is implemented by loading subgraph.txt configuration files. If you find that the accuracy has dropped significantly after replacing the quantized model, you can refer to this document to define the heterogeneous configuration file. + +Update steps for heterogeneous configuration files: +1. Determine the accuracy of the quantized model on an ARM CPU. +- If the accuracy cannot be satisfied on the ARM CPU, then there is a problem with the quantized model. At this time, you can consider modifying the dataset or changing the quantization method. +- Only need to modify a few lines of code in the demo, change the part of NPU inference to use ARM CPU int8. + ``` + # The following interface represents the use of NPU for inference + fastdeploy::RuntimeOption option; + option.UseTimVX(); # Turn on TIMVX for NPU inference + option.SetLiteSubgraphPartitionPath(subgraph_file); # Load heterogeneous computing configuration files + + # The following interface indicates the use of ARM CPU int8 inference + fastdeploy::RuntimeOption option; + option.UseLiteBackend(); + option.EnableLiteInt8(); + ``` + If the ARM CPU accuracy is up to standard, continue with the next steps. + +2. Obtain the topology information of the entire network. +- Roll back the modification in the first step, use the API interface of NPU for inference, and keep the switch of loading heterogeneous computing configuration files off. +- Write all the log information to log.txt, search for the keyword "subgraph operators" in log.txt and the following paragraph is the topology information of the entire model. +- It has the following format: + - Each line of records consists of "operator type: list of input tensor names: list of output tensor names" (that is, the operator type, list of input and output tensor names are separated by semicolons), and the input and output tensor names are separated by commas each tensor name in the list; + - Example: + ``` + op_type0:var_name0,var_name1:var_name2 # Indicates that the node whose operator type is op_type0, input tensors are var_name0 and var_name1, and output tensor is var_name2 is forced to run on the ARM CPU + ``` + +3. Modify heterogeneous configuration files +- Write all Subgraph operators in subgraph.txt, and open the interface for loading heterogeneous computing configuration files +- Delete line by line, delete in pieces, dichotomy, use the patience of developers, find the layer that introduces NPU precision exception, and leave it in subgraph.txt +- The nodes in txt all need to be heterogeneous to the layer on the ARM CPU, so don’t worry about performance issues. The ARM kernel performance of Paddle Lite is also very good. diff --git a/docs/en/faq/use_sdk_on_ascend.md b/docs/en/faq/use_sdk_on_ascend.md index e3cd55b40c6..c1ac864b85b 100644 --- a/docs/en/faq/use_sdk_on_ascend.md +++ b/docs/en/faq/use_sdk_on_ascend.md @@ -1,4 +1,4 @@ -[简体中文](../../cn/faq/use_sdk_on_linux.md) | English +[简体中文](../../cn/faq/use_sdk_on_ascend.md) | English # # Linux deployment with C++ on Huawei Ascend diff --git a/examples/text/uie/serving/models/uie/1/model.py b/examples/text/uie/serving/models/uie/1/model.py index 5bb1c816417..b839ae0657e 100644 --- a/examples/text/uie/serving/models/uie/1/model.py +++ b/examples/text/uie/serving/models/uie/1/model.py @@ -141,7 +141,7 @@ def execute(self, requests): self.uie_model_.set_schema(schema) results = self.uie_model_.predict(texts, return_dict=True) - results = np.array(results, dtype=np.object) + results = np.array(results, dtype=np.object_) out_tensor = pb_utils.Tensor(self.output_names[0], results) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor, ]) diff --git a/examples/vision/classification/paddleclas/a311d/cpp/README.md b/examples/vision/classification/paddleclas/a311d/cpp/README.md index 11279ccb000..c7f6afa1e45 100755 --- a/examples/vision/classification/paddleclas/a311d/cpp/README.md +++ b/examples/vision/classification/paddleclas/a311d/cpp/README.md @@ -3,12 +3,13 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) ### 量化模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.) + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 A311D 上部署量化后的 ResNet50_Vd 分类模型 请按照以下步骤完成在 A311D 上部署 ResNet50_Vd 量化模型: diff --git a/examples/vision/classification/paddleclas/rknpu2/README.md b/examples/vision/classification/paddleclas/rknpu2/README.md index bd4305dc0a5..d61e6c57064 100644 --- a/examples/vision/classification/paddleclas/rknpu2/README.md +++ b/examples/vision/classification/paddleclas/rknpu2/README.md @@ -3,6 +3,7 @@ ## 转换模型 下面以 ResNet50_vd为例子,教大家如何转换分类模型到RKNN模型。 +### 导出ONNX模型 ```bash # 安装 paddle2onnx pip install paddle2onnx @@ -17,34 +18,59 @@ paddle2onnx --model_dir ResNet50_vd_infer \ --params_filename inference.pdiparams \ --save_file ResNet50_vd_infer/ResNet50_vd_infer.onnx \ --enable_dev_version True \ - --opset_version 12 \ + --opset_version 10 \ --enable_onnx_checker True # 固定shape,注意这里的inputs得对应netron.app展示的 inputs 的 name,有可能是image 或者 x python -m paddle2onnx.optimize --input_model ResNet50_vd_infer/ResNet50_vd_infer.onnx \ --output_model ResNet50_vd_infer/ResNet50_vd_infer.onnx \ --input_shape_dict "{'inputs':[1,3,224,224]}" -``` +``` - ### 编写模型导出配置文件 -以转化RK3588的RKNN模型为例子,我们需要编辑tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml,来转换ONNX模型到RKNN模型。 +### 编写模型导出配置文件 +以转化RK3588的RKNN模型为例子,我们需要编辑tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml,来转换ONNX模型到RKNN模型。 -默认的 mean=0, std=1是在内存做normalize,如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如: +如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如: ```yaml -model_path: ./ResNet50_vd_infer.onnx -output_folder: ./ -target_platform: RK3588 -normalize: - mean: [[0.485,0.456,0.406]] - std: [[0.229,0.224,0.225]] -outputs: [] -outputs_nodes: [] +model_path: ./ResNet50_vd_infer/ResNet50_vd_infer.onnx +output_folder: ./ResNet50_vd_infer +mean: + - + - 123.675 + - 116.28 + - 103.53 +std: + - + - 58.395 + - 57.12 + - 57.375 +outputs_nodes: do_quantization: False -dataset: +dataset: "./ResNet50_vd_infer/dataset.txt" ``` +**在CPU上做normalize**可以参考以下yaml: +```yaml +model_path: ./ResNet50_vd_infer/ResNet50_vd_infer.onnx +output_folder: ./ResNet50_vd_infer +mean: + - + - 0 + - 0 + - 0 +std: + - + - 1 + - 1 + - 1 +outputs_nodes: +do_quantization: False +dataset: "./ResNet50_vd_infer/dataset.txt" +``` +这里我们选择在NPU上执行normalize操作. + -# ONNX模型转RKNN模型 +### ONNX模型转RKNN模型 ```shell python tools/rknpu2/export.py \ --config_path tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml \ @@ -54,4 +80,4 @@ python tools/rknpu2/export.py \ ## 其他链接 - [Cpp部署](./cpp) - [Python部署](./python) -- [视觉模型预测结果](../../../../../docs/api/vision_results/) \ No newline at end of file +- [视觉模型预测结果](../../../../../docs/api/vision_results/) diff --git a/examples/vision/classification/paddleclas/rknpu2/cpp/README.md b/examples/vision/classification/paddleclas/rknpu2/cpp/README.md index 1e1883486d1..c21d1d77b37 100644 --- a/examples/vision/classification/paddleclas/rknpu2/cpp/README.md +++ b/examples/vision/classification/paddleclas/rknpu2/cpp/README.md @@ -64,8 +64,8 @@ cd ./build/install ## 运行结果展示 ClassifyResult( -label_ids: 153, -scores: 0.684570, +label_ids: 153, +scores: 0.684570, ) ## 注意事项 @@ -75,4 +75,4 @@ DisablePermute(C++)或`disable_permute(Python),在预处理阶段禁用数据 ## 其它文档 - [ResNet50_vd Python 部署](../python) - [模型预测结果说明](../../../../../../docs/api/vision_results/) -- [转换ResNet50_vd RKNN模型文档](../README.md) \ No newline at end of file +- [转换ResNet50_vd RKNN模型文档](../README.md) diff --git a/examples/vision/classification/paddleclas/rknpu2/python/README.md b/examples/vision/classification/paddleclas/rknpu2/python/README.md index b85bb81f70a..f1f0994d857 100644 --- a/examples/vision/classification/paddleclas/rknpu2/python/README.md +++ b/examples/vision/classification/paddleclas/rknpu2/python/README.md @@ -19,8 +19,8 @@ python3 infer.py --model_file ./ResNet50_vd_infer/ResNet50_vd_infer_rk3588.rknn # 运行完成后返回结果如下所示 ClassifyResult( -label_ids: 153, -scores: 0.684570, +label_ids: 153, +scores: 0.684570, ) ``` @@ -32,4 +32,4 @@ DisablePermute(C++)或`disable_permute(Python),在预处理阶段禁用数据 ## 其它文档 - [ResNet50_vd C++部署](../cpp) - [模型预测结果说明](../../../../../../docs/api/vision_results/) -- [转换ResNet50_vd RKNN模型文档](../README.md) \ No newline at end of file +- [转换ResNet50_vd RKNN模型文档](../README.md) diff --git a/examples/vision/classification/paddleclas/rv1126/cpp/README.md b/examples/vision/classification/paddleclas/rv1126/cpp/README.md index b0e4623a704..b621ff72003 100755 --- a/examples/vision/classification/paddleclas/rv1126/cpp/README.md +++ b/examples/vision/classification/paddleclas/rv1126/cpp/README.md @@ -3,12 +3,13 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) ### 量化模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.) + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 RV1126 上部署量化后的 ResNet50_Vd 分类模型 请按照以下步骤完成在 RV1126 上部署 ResNet50_Vd 量化模型: diff --git a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py index 0ab7dcdc4ae..de000f6eee8 100755 --- a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py +++ b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py @@ -92,7 +92,7 @@ def execute(self, requests): results = self.postprocess_.run([infer_outputs, ]) r_str = fd.vision.utils.fd_result_to_json(results) - r_np = np.array(r_str, dtype=np.object) + r_np = np.array(r_str, dtype=np.object_) out_tensor = pb_utils.Tensor(self.output_names[0], r_np) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor, ]) diff --git a/examples/vision/classification/paddleclas/sophgo/README.md b/examples/vision/classification/paddleclas/sophgo/README.md new file mode 100644 index 00000000000..32bb3bfbf19 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/README.md @@ -0,0 +1,84 @@ +# PaddleDetection SOPHGO部署示例 + +## 支持模型列表 + +目前FastDeploy支持的如下模型的部署[ResNet系列模型](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.4/docs/zh_CN/models/ResNet_and_vd.md) + +## 准备ResNet部署模型以及转换模型 + +SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型,具体步骤如下: +- Paddle动态图模型转换为ONNX模型,请参考[Paddle2ONNX模型转换](https://github.com/PaddlePaddle/Paddle2ONNX/tree/develop/model_zoo/classification) +- ONNX模型转换bmodel模型的过程,请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir)。 + +## 模型转换example + +下面以[ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz)为例子,教大家如何转换Paddle模型到SOPHGO-TPU模型。 + +## 导出ONNX模型 + +### 下载Paddle ResNet50_vd静态图模型并解压 +```shell +wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz +tar xvf ResNet50_vd_infer.tgz +``` + +### 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐 +```shell +paddle2onnx --model_dir ResNet50_vd_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ResNet50_vd_infer.onnx \ + --enable_dev_version True +``` +### 导出bmodel模型 + +以转化BM1684x的bmodel模型为例子,我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程,安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 +### 1. 安装 +``` shell +docker pull sophgo/tpuc_dev:latest + +# myname1234是一个示例,也可以设置其他名字 +docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest + +source ./envsetup.sh +./build.sh +``` + +### 2. ONNX模型转换为bmodel模型 +``` shell +mkdir ResNet50_vd_infer && cd ResNet50_vd_infer + +# 在该文件中放入测试图片,同时将上一步转换好的ResNet50_vd_infer.onnx放入该文件夹中 +cp -rf ${REGRESSION_PATH}/dataset/COCO2017 . +cp -rf ${REGRESSION_PATH}/image . +# 放入onnx模型文件ResNet50_vd_infer.onnx + +mkdir workspace && cd workspace + +# 将ONNX模型转换为mlir模型,其中参数--output_names可以通过NETRON查看 +model_transform.py \ + --model_name ResNet50_vd_infer \ + --model_def ../ResNet50_vd_infer.onnx \ + --input_shapes [[1,3,224,224]] \ + --mean 0.0,0.0,0.0 \ + --scale 0.0039216,0.0039216,0.0039216 \ + --keep_aspect_ratio \ + --pixel_format rgb \ + --output_names save_infer_model/scale_0.tmp_1 \ + --test_input ../image/dog.jpg \ + --test_result ResNet50_vd_infer_top_outputs.npz \ + --mlir ResNet50_vd_infer.mlir + +# 将mlir模型转换为BM1684x的F32 bmodel模型 +model_deploy.py \ + --mlir ResNet50_vd_infer.mlir \ + --quantize F32 \ + --chip bm1684x \ + --test_input ResNet50_vd_infer_in_f32.npz \ + --test_reference ResNet50_vd_infer_top_outputs.npz \ + --model ResNet50_vd_infer_1684x_f32.bmodel +``` +最终获得可以在BM1684x上能够运行的bmodel模型ResNet50_vd_infer_1684x_f32.bmodel。如果需要进一步对模型进行加速,可以将ONNX模型转换为INT8 bmodel,具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 + +## 其他链接 +- [Cpp部署](./cpp) diff --git a/examples/vision/classification/paddleclas/sophgo/cpp/CMakeLists.txt b/examples/vision/classification/paddleclas/sophgo/cpp/CMakeLists.txt new file mode 100644 index 00000000000..53837058969 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +set(ENABLE_LITE_BACKEND OFF) +#set(FDLIB ${FASTDEPLOY_INSTALL_DIR}) + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) +include_directories(${FastDeploy_INCLUDE_DIRS}) + +add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/classification/paddleclas/sophgo/cpp/README.md b/examples/vision/classification/paddleclas/sophgo/cpp/README.md new file mode 100644 index 00000000000..7edfd2c94b6 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/cpp/README.md @@ -0,0 +1,61 @@ +# PaddleClas C++部署示例 + +本目录下提供`infer.cc`快速完成ResNet50_vd模型在SOPHGO BM1684x板子上加速部署的示例。 + +在部署前,需确认以下两个步骤: + +1. 软硬件环境满足要求 +2. 根据开发环境,从头编译FastDeploy仓库 + +以上步骤请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)实现 + +## 生成基本目录文件 + +该例程由以下几个部分组成 +```text +. +├── CMakeLists.txt +├── build # 编译文件夹 +├── image # 存放图片的文件夹 +├── infer.cc +├── preprocess_config.yaml #示例前处理配置文件 +└── model # 存放模型文件的文件夹 +``` + +## 编译 + +### 编译并拷贝SDK到thirdpartys文件夹 + +请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)仓库编译SDK,编译完成后,将在build目录下生成fastdeploy-0.0.3目录. + +### 拷贝模型文件,以及配置文件至model文件夹 +将Paddle模型转换为SOPHGO bmodel模型,转换步骤参考[文档](../README.md) +将转换后的SOPHGO bmodel模型文件拷贝至model中 +将前处理配置文件也拷贝到model中 +```bash +cp preprocess_config.yaml ./model +``` + +### 准备测试图片至image文件夹 +```bash +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg +cp ILSVRC2012_val_00000010.jpeg ./images +``` + +### 编译example + +```bash +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-0.0.3 +make +``` + +## 运行例程 + +```bash +./infer_demo model images/ILSVRC2012_val_00000010.jpeg +``` + + +- [模型介绍](../../) +- [模型转换](../) diff --git a/examples/vision/classification/paddleclas/sophgo/cpp/infer.cc b/examples/vision/classification/paddleclas/sophgo/cpp/infer.cc new file mode 100644 index 00000000000..b9281ffb0c1 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/cpp/infer.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "resnet50_1684x_f32.bmodel"; + auto params_file = model_dir + sep + ""; + auto config_file = model_dir + sep + "preprocess_config.yaml"; + + fastdeploy::RuntimeOption option; + option.UseSophgo(); + auto model_format = fastdeploy::ModelFormat::SOPHGO; + auto model = fastdeploy::vision::classification::PaddleClasModel( + model_file, params_file, config_file, option, model_format); + + assert(model.Initialized()); + + auto im = cv::imread(image_file); + + fastdeploy::vision::ClassifyResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: infer_demo path/to/model " + "path/to/image " + "run_option, " + "e.g ./infer_demo ./bmodel ./test.jpeg" + << std::endl; + return -1; + } + + std::string model_dir = argv[1]; + std::string test_image = argv[2]; + InitAndInfer(model_dir, test_image); + return 0; +} diff --git a/examples/vision/classification/paddleclas/sophgo/python/README.md b/examples/vision/classification/paddleclas/sophgo/python/README.md new file mode 100644 index 00000000000..f495e583095 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/python/README.md @@ -0,0 +1,29 @@ +# PaddleClas Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/sophgo.md) + +本目录下提供`infer.py`快速完成 ResNet50_vd 在SOPHGO TPU上部署的示例。执行如下脚本即可完成 + +```bash +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/vision/classification/paddleclas/sophgo/python + +# 下载图片 +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg + +# 推理 +python3 infer.py --model_file ./bmodel/resnet50_1684x_f32.bmodel --config_file ResNet50_vd_infer/inference_cls.yaml --image ILSVRC2012_val_00000010.jpeg + +# 运行完成后返回结果如下所示 +ClassifyResult( +label_ids: 153, +scores: 0.684570, +) +``` + +## 其它文档 +- [ResNet50_vd C++部署](../cpp) +- [转换ResNet50_vd SOPHGO模型文档](../README.md) diff --git a/examples/vision/classification/paddleclas/sophgo/python/infer.py b/examples/vision/classification/paddleclas/sophgo/python/infer.py new file mode 100644 index 00000000000..5bc84789ec7 --- /dev/null +++ b/examples/vision/classification/paddleclas/sophgo/python/infer.py @@ -0,0 +1,41 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, help="Path of model.") + parser.add_argument( + "--config_file", required=True, help="Path of config file.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--topk", type=int, default=1, help="Return topk results.") + + return parser.parse_args() + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = fd.RuntimeOption() +runtime_option.use_sophgo() + +model_file = args.model +params_file = "" +config_file = args.config_file + +model = fd.vision.classification.PaddleClasModel( + model_file, + params_file, + config_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.SOPHGO) + +# 预测图片分类结果 +im = cv2.imread(args.image) +result = model.predict(im, args.topk) +print(result) diff --git a/examples/vision/detection/nanodet_plus/python/README.md b/examples/vision/detection/nanodet_plus/python/README.md index b5085662ce2..a89e15d1b17 100644 --- a/examples/vision/detection/nanodet_plus/python/README.md +++ b/examples/vision/detection/nanodet_plus/python/README.md @@ -69,7 +69,7 @@ NanoDetPlus模型加载和初始化,其中model_file为导出的ONNX模型格 > > * **padding_value**(list[float]): 通过此参数可以修改图片在resize时候做填充(padding)的值, 包含三个浮点型元素, 分别表示三个通道的值, 默认值为[0, 0, 0] > > * **keep_ratio**(bool): 通过此参数指定resize时是否保持宽高比例不变,默认是fasle. > > * **reg_max**(int): GFL回归中的reg_max参数,默认是7. -> > * **downsample_strides**(list[int]): 通过此参数可以修改生成anchor的特征图的下采样倍数, 包含三个整型元素, 分别表示默认的生成anchor的下采样倍数, 默认值为[8, 16, 32, 64] +> > * **downsample_strides**(list[int]): 通过此参数可以修改生成anchor的特征图的下采样倍数, 包含四个整型元素, 分别表示默认的生成anchor的下采样倍数, 默认值为[8, 16, 32, 64] diff --git a/examples/vision/detection/paddledetection/README.md b/examples/vision/detection/paddledetection/README.md index ae4ba494c4b..11b616ba90d 100644 --- a/examples/vision/detection/paddledetection/README.md +++ b/examples/vision/detection/paddledetection/README.md @@ -20,6 +20,15 @@ - [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.5/configs/yolov6) - [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.5/configs/yolov7) - [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.5/configs/rtmdet) +- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/cascade_rcnn) +- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/rcnn_enhance) +- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/retinanet) +- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/smalldet) +- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/fcos) +- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ttfnet) +- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/tood) +- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/gfl) + ## 导出部署模型 @@ -59,6 +68,16 @@ | [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| | | [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| | | [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| | +| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT | +| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT | +| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT | +| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT | +| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT | +| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| | +| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT | +| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT | +| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT | +| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT | ## 详细部署文档 diff --git a/examples/vision/detection/paddledetection/a311d/cpp/README.md b/examples/vision/detection/paddledetection/a311d/cpp/README.md index 41e4ab3d287..baf2016f4a2 100755 --- a/examples/vision/detection/paddledetection/a311d/cpp/README.md +++ b/examples/vision/detection/paddledetection/a311d/cpp/README.md @@ -4,13 +4,15 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) ### 模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) -- 3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) +3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。) +4. 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 A311D 上部署量化后的 PP-YOLOE 检测模型 请按照以下步骤完成在 A311D 上部署 PP-YOLOE 量化模型: diff --git a/examples/vision/detection/paddledetection/cpp/CMakeLists.txt b/examples/vision/detection/paddledetection/cpp/CMakeLists.txt index 9382931a146..6dcbb7cc884 100644 --- a/examples/vision/detection/paddledetection/cpp/CMakeLists.txt +++ b/examples/vision/detection/paddledetection/cpp/CMakeLists.txt @@ -44,3 +44,27 @@ target_link_libraries(infer_yolov7_demo ${FASTDEPLOY_LIBS}) add_executable(infer_rtmdet_demo ${PROJECT_SOURCE_DIR}/infer_rtmdet.cc) target_link_libraries(infer_rtmdet_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_cascadercnn_demo ${PROJECT_SOURCE_DIR}/infer_cascadercnn.cc) +target_link_libraries(infer_cascadercnn_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_pssdet_demo ${PROJECT_SOURCE_DIR}/infer_pssdet.cc) +target_link_libraries(infer_pssdet_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_retinanet_demo ${PROJECT_SOURCE_DIR}/infer_retinanet.cc) +target_link_libraries(infer_retinanet_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_ppyoloesod_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloesod.cc) +target_link_libraries(infer_ppyoloesod_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_fcos_demo ${PROJECT_SOURCE_DIR}/infer_fcos.cc) +target_link_libraries(infer_fcos_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_ttfnet_demo ${PROJECT_SOURCE_DIR}/infer_ttfnet.cc) +target_link_libraries(infer_ttfnet_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_tood_demo ${PROJECT_SOURCE_DIR}/infer_tood.cc) +target_link_libraries(infer_tood_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_gfl_demo ${PROJECT_SOURCE_DIR}/infer_gfl.cc) +target_link_libraries(infer_gfl_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/detection/paddledetection/cpp/README.md b/examples/vision/detection/paddledetection/cpp/README.md index 5dbee5bf2df..0e944a465aa 100755 --- a/examples/vision/detection/paddledetection/cpp/README.md +++ b/examples/vision/detection/paddledetection/cpp/README.md @@ -1,6 +1,6 @@ # PaddleDetection C++部署示例 -本目录下提供`infer_xxx.cc`快速完成PaddleDetection模型包括PPYOLOE/PicoDet/YOLOX/YOLOv3/PPYOLO/FasterRCNN/YOLOv5/YOLOv6/YOLOv7/RTMDet在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。 +本目录下提供`infer_xxx.cc`快速完成PaddleDetection模型包括PPYOLOE/PicoDet/YOLOX/YOLOv3/PPYOLO/FasterRCNN/YOLOv5/YOLOv6/YOLOv7/RTMDet/CascadeRCNN/PSSDet/RetinaNet/PPYOLOESOD/FCOS/TTFNet/TOOD/GFL在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。 在部署前,需确认以下两个步骤 @@ -34,16 +34,21 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2 # 昆仑芯XPU推理 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 3 +# 华为昇腾推理 +./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 4 ``` 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + ## PaddleDetection C++接口 ### 模型类 -PaddleDetection目前支持6种模型系列,类名分别为`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`所有类名的构造函数和预测函数在参数上完全一致,本文档以PPYOLOE为例讲解API +PaddleDetection目前支持6种模型系列,类名分别为`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致,本文档以PPYOLOE为例讲解API ```c++ fastdeploy::vision::detection::PPYOLOE( const string& model_file, diff --git a/examples/vision/detection/paddledetection/cpp/infer_cascadercnn.cc b/examples/vision/detection/paddledetection/cpp/infer_cascadercnn.cc new file mode 100644 index 00000000000..35043aa35c1 --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_cascadercnn.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::CascadeRCNN(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::CascadeRCNN(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_fcos.cc b/examples/vision/detection/paddledetection/cpp/infer_fcos.cc new file mode 100644 index 00000000000..9c9bd18cb6b --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_fcos.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::FCOS(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::FCOS(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_gfl.cc b/examples/vision/detection/paddledetection/cpp/infer_gfl.cc new file mode 100644 index 00000000000..ae772f63a30 --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_gfl.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::GFL(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::GFL(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc b/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc index 088e984027f..6db82aaca77 100755 --- a/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc @@ -102,6 +102,33 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseAscend(); + auto model = fastdeploy::vision::detection::PPYOLO(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + int main(int argc, char* argv[]) { if (argc < 4) { std::cout @@ -120,6 +147,8 @@ int main(int argc, char* argv[]) { GpuInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 2) { KunlunXinInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 3) { + AscendInfer(argv[1], argv[2]); } return 0; } diff --git a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc index 99922e22a94..c7efaa7c5ba 100755 --- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc @@ -131,6 +131,33 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseAscend(); + auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + int main(int argc, char* argv[]) { if (argc < 4) { std::cout @@ -151,6 +178,8 @@ int main(int argc, char* argv[]) { TrtInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 3) { KunlunXinInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 4) { + AscendInfer(argv[1], argv[2]); } return 0; } diff --git a/examples/vision/detection/paddledetection/cpp/infer_ppyoloesod.cc b/examples/vision/detection/paddledetection/cpp/infer_ppyoloesod.cc new file mode 100644 index 00000000000..2ef3d0d1bff --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloesod.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::PPYOLOESOD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::PPYOLOESOD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void TrtInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + option.UseTrtBackend(); + auto model = fastdeploy::vision::detection::PPYOLOESOD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 2) { + TrtInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_pssdet.cc b/examples/vision/detection/paddledetection/cpp/infer_pssdet.cc new file mode 100644 index 00000000000..5d282d86104 --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_pssdet.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::PSSDet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::PSSDet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_retinanet.cc b/examples/vision/detection/paddledetection/cpp/infer_retinanet.cc new file mode 100644 index 00000000000..4accb51c88a --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_retinanet.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::RetinaNet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::RetinaNet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_ssd.cc b/examples/vision/detection/paddledetection/cpp/infer_ssd.cc index b71bf266cf1..6c050da7f69 100755 --- a/examples/vision/detection/paddledetection/cpp/infer_ssd.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_ssd.cc @@ -104,6 +104,33 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseAscend(); + auto model = fastdeploy::vision::detection::SSD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + int main(int argc, char* argv[]) { if (argc < 4) { std::cout @@ -122,6 +149,8 @@ int main(int argc, char* argv[]) { GpuInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 2) { KunlunXinInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 3) { + AscendInfer(argv[1], argv[2]); } return 0; } diff --git a/examples/vision/detection/paddledetection/cpp/infer_tood.cc b/examples/vision/detection/paddledetection/cpp/infer_tood.cc new file mode 100644 index 00000000000..fb0fd778b71 --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_tood.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::TOOD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::TOOD(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_ttfnet.cc b/examples/vision/detection/paddledetection/cpp/infer_ttfnet.cc new file mode 100644 index 00000000000..144486f159e --- /dev/null +++ b/examples/vision/detection/paddledetection/cpp/infer_ttfnet.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseCpu(); + auto model = fastdeploy::vision::detection::TTFNet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +void GpuInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::detection::TTFNet(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc b/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc index 3ec4410146d..2f866bf5e51 100755 --- a/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc @@ -102,6 +102,34 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } + +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + auto option = fastdeploy::RuntimeOption(); + option.UseAscend(); + auto model = fastdeploy::vision::detection::YOLOv3(model_file, params_file, + config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + int main(int argc, char* argv[]) { if (argc < 4) { std::cout @@ -120,6 +148,8 @@ int main(int argc, char* argv[]) { GpuInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 2) { KunlunXinInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 3) { + AscendInfer(argv[1], argv[2]); } return 0; } diff --git a/examples/vision/detection/paddledetection/python/README.md b/examples/vision/detection/paddledetection/python/README.md index b926dd9eea5..856629a5686 100755 --- a/examples/vision/detection/paddledetection/python/README.md +++ b/examples/vision/detection/paddledetection/python/README.md @@ -25,6 +25,8 @@ python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu --use_trt True # 昆仑芯XPU推理 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device kunlunxin +# 华为昇腾推理 +python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device ascend ``` 运行完成可视化结果如下图所示 @@ -47,6 +49,14 @@ fastdeploy.vision.detection.PaddleYOLOv5(model_file, params_file, config_file, r fastdeploy.vision.detection.PaddleYOLOv6(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) fastdeploy.vision.detection.PaddleYOLOv7(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) fastdeploy.vision.detection.RTMDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.CascadeRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.PSSDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.RetinaNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.PPYOLOESOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.FCOS(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.TTFNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.TOOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) +fastdeploy.vision.detection.GFL(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE) ``` PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件 diff --git a/examples/vision/detection/paddledetection/python/infer_cascadercnn.py b/examples/vision/detection/paddledetection/python/infer_cascadercnn.py new file mode 100644 index 00000000000..26541a8a7a6 --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_cascadercnn.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.CascadeRCNN( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_fcos.py b/examples/vision/detection/paddledetection/python/infer_fcos.py new file mode 100644 index 00000000000..f8ff6ffa333 --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_fcos.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.FCOS( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_gfl.py b/examples/vision/detection/paddledetection/python/infer_gfl.py new file mode 100644 index 00000000000..36194ea630b --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_gfl.py @@ -0,0 +1,51 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.GFL( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im.copy()) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_ppyolo.py b/examples/vision/detection/paddledetection/python/infer_ppyolo.py index 279c5bb9fc1..77feb8072f4 100755 --- a/examples/vision/detection/paddledetection/python/infer_ppyolo.py +++ b/examples/vision/detection/paddledetection/python/infer_ppyolo.py @@ -32,6 +32,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + if args.device.lower() == "gpu": option.use_gpu() diff --git a/examples/vision/detection/paddledetection/python/infer_ppyoloe.py b/examples/vision/detection/paddledetection/python/infer_ppyoloe.py index 2b0971f1289..9f3f6283263 100755 --- a/examples/vision/detection/paddledetection/python/infer_ppyoloe.py +++ b/examples/vision/detection/paddledetection/python/infer_ppyoloe.py @@ -33,6 +33,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + if args.device.lower() == "gpu": option.use_gpu() diff --git a/examples/vision/detection/paddledetection/python/infer_ppyoloesod.py b/examples/vision/detection/paddledetection/python/infer_ppyoloesod.py new file mode 100644 index 00000000000..6dece21387f --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_ppyoloesod.py @@ -0,0 +1,59 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PPYOLOESOD( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im.copy()) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_pssdet.py b/examples/vision/detection/paddledetection/python/infer_pssdet.py new file mode 100644 index 00000000000..00ab7fb73b3 --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_pssdet.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PSSDet( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_retinanet.py b/examples/vision/detection/paddledetection/python/infer_retinanet.py new file mode 100644 index 00000000000..65edc78c57f --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_retinanet.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.RetinaNet( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_ssd.py b/examples/vision/detection/paddledetection/python/infer_ssd.py index 536bf59447e..410f469dd7d 100755 --- a/examples/vision/detection/paddledetection/python/infer_ssd.py +++ b/examples/vision/detection/paddledetection/python/infer_ssd.py @@ -26,6 +26,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + if args.device.lower() == "gpu": option.use_gpu() return option diff --git a/examples/vision/detection/paddledetection/python/infer_tood.py b/examples/vision/detection/paddledetection/python/infer_tood.py new file mode 100644 index 00000000000..dc5d2253218 --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_tood.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.TOOD( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_ttfnet.py b/examples/vision/detection/paddledetection/python/infer_ttfnet.py new file mode 100644 index 00000000000..c3ec8a7f57c --- /dev/null +++ b/examples/vision/detection/paddledetection/python/infer_ttfnet.py @@ -0,0 +1,50 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.TTFNet( + model_file, params_file, config_file, runtime_option=runtime_option) + +# 预测图片检测结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/detection/paddledetection/python/infer_yolov3.py b/examples/vision/detection/paddledetection/python/infer_yolov3.py index 8f6b509f98a..26c12f7f2fc 100755 --- a/examples/vision/detection/paddledetection/python/infer_yolov3.py +++ b/examples/vision/detection/paddledetection/python/infer_yolov3.py @@ -32,6 +32,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + if args.device.lower() == "gpu": option.use_gpu() diff --git a/examples/vision/detection/paddledetection/rv1126/cpp/README.md b/examples/vision/detection/paddledetection/rv1126/cpp/README.md index de47ec8bfb8..c662ecb4409 100755 --- a/examples/vision/detection/paddledetection/rv1126/cpp/README.md +++ b/examples/vision/detection/paddledetection/rv1126/cpp/README.md @@ -4,13 +4,15 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) ### 模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) -- 3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe) +3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。) +4. 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 RV1126 上部署量化后的 PP-YOLOE 检测模型 请按照以下步骤完成在 RV1126 上部署 PP-YOLOE 量化模型: diff --git a/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py b/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py index 4872b0dee2c..35054e51657 100644 --- a/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py +++ b/examples/vision/detection/paddledetection/serving/models/postprocess/1/model.py @@ -95,7 +95,7 @@ def execute(self, requests): results = self.postprocess_.run(infer_outputs) r_str = fd.vision.utils.fd_result_to_json(results) - r_np = np.array(r_str, dtype=np.object) + r_np = np.array(r_str, dtype=np.object_) out_tensor = pb_utils.Tensor(self.output_names[0], r_np) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor, ]) diff --git a/examples/vision/detection/paddledetection/serving/models/runtime/ppyoloe_runtime_config.pbtxt b/examples/vision/detection/paddledetection/serving/models/runtime/ppyoloe_runtime_config.pbtxt index 39b2c6045f5..dc8d15845ce 100644 --- a/examples/vision/detection/paddledetection/serving/models/runtime/ppyoloe_runtime_config.pbtxt +++ b/examples/vision/detection/paddledetection/serving/models/runtime/ppyoloe_runtime_config.pbtxt @@ -1,5 +1,5 @@ # optional, If name is specified it must match the name of the model repository directory containing the model. -name: "ppyoloe_runtime" +name: "runtime" backend: "fastdeploy" # Input configuration of the model diff --git a/examples/vision/detection/scaledyolov4/README.md b/examples/vision/detection/scaledyolov4/README.md index 36ec1af0ce0..df3799282ef 100644 --- a/examples/vision/detection/scaledyolov4/README.md +++ b/examples/vision/detection/scaledyolov4/README.md @@ -23,19 +23,18 @@ ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了ScaledYOLOv4导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [ScaledYOLOv4-P5-896](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5-896.onnx) | 271MB | 51.2% | -| [ScaledYOLOv4-P5+BoF-896](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5_-896.onnx) | 271MB | 51.7% | -| [ScaledYOLOv4-P6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6-1280.onnx) | 487MB | 53.9% | -| [ScaledYOLOv4-P6+BoF-1280](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6_-1280.onnx) | 487MB | 54.4% | -| [ScaledYOLOv4-P7-1536](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p7-1536.onnx) | 1.1GB | 55.0% | -| [ScaledYOLOv4-P5](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5.onnx) | 271MB | - | -| [ScaledYOLOv4-P5+BoF](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5_.onnx) | 271MB | -| -| [ScaledYOLOv4-P6](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6.onnx) | 487MB | - | -| [ScaledYOLOv4-P6+BoF](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6_.onnx) | 487MB | - | -| [ScaledYOLOv4-P7](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p7.onnx) | 1.1GB | - | - +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [ScaledYOLOv4-P5-896](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5-896.onnx) | 271MB | 51.2% | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P5+BoF-896](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5_-896.onnx) | 271MB | 51.7% | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6-1280.onnx) | 487MB | 53.9% | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P6+BoF-1280](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6_-1280.onnx) | 487MB | 54.4% | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P7-1536](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p7-1536.onnx) | 1.1GB | 55.0% | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P5](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5.onnx) | 271MB | - | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P5+BoF](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p5_.onnx) | 271MB | -| 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P6](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6.onnx) | 487MB | - | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P6+BoF](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p6_.onnx) | 487MB | - | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | +| [ScaledYOLOv4-P7](https://bj.bcebos.com/paddlehub/fastdeploy/scaled_yolov4-p7.onnx) | 1.1GB | - | 此模型文件来源于[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolor/README.md b/examples/vision/detection/yolor/README.md index ffe29f39f5b..c9749a0886f 100644 --- a/examples/vision/detection/yolor/README.md +++ b/examples/vision/detection/yolor/README.md @@ -22,19 +22,18 @@ ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOR导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [YOLOR-P6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-p6-paper-541-1280-1280.onnx) | 143MB | 54.1% | -| [YOLOR-W6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-w6-paper-555-1280-1280.onnx) | 305MB | 55.5% | -| [YOLOR-E6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-e6-paper-564-1280-1280.onnx ) | 443MB | 56.4% | -| [YOLOR-D6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-570-1280-1280.onnx) | 580MB | 57.0% | -| [YOLOR-D6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-573-1280-1280.onnx) | 580MB | 57.3% | -| [YOLOR-P6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-p6-paper-541-640-640.onnx) | 143MB | - | -| [YOLOR-W6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-w6-paper-555-640-640.onnx) | 305MB | - | -| [YOLOR-E6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-e6-paper-564-640-640.onnx ) | 443MB | - | -| [YOLOR-D6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-570-640-640.onnx) | 580MB | - | -| [YOLOR-D6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-573-640-640.onnx) | 580MB | - | - +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [YOLOR-P6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-p6-paper-541-1280-1280.onnx) | 143MB | 54.1% | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-W6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-w6-paper-555-1280-1280.onnx) | 305MB | 55.5% | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-E6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-e6-paper-564-1280-1280.onnx ) | 443MB | 56.4% | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-D6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-570-1280-1280.onnx) | 580MB | 57.0% | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-D6-1280](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-573-1280-1280.onnx) | 580MB | 57.3% | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-P6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-p6-paper-541-640-640.onnx) | 143MB | - | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-W6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-w6-paper-555-640-640.onnx) | 305MB | - | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-E6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-e6-paper-564-640-640.onnx ) | 443MB | - | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-D6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-570-640-640.onnx) | 580MB | - | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | +| [YOLOR-D6](https://bj.bcebos.com/paddlehub/fastdeploy/yolor-d6-paper-573-640-640.onnx) | 580MB | - | 此模型文件来源于[YOLOR](https://github.com/WongKinYiu/yolor),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov5/README.md b/examples/vision/detection/yolov5/README.md index 095c8071d00..5ffe3f046a2 100644 --- a/examples/vision/detection/yolov5/README.md +++ b/examples/vision/detection/yolov5/README.md @@ -8,13 +8,13 @@ ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv5导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [YOLOv5n](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5n.onnx) | 7.6MB | 28.0% | -| [YOLOv5s](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx) | 28MB | 37.4% | -| [YOLOv5m](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5m.onnx) | 82MB | 45.4% | -| [YOLOv5l](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5l.onnx) | 178MB | 49.0% | -| [YOLOv5x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5x.onnx) | 332MB | 50.7% | +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:---- | +| [YOLOv5n](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5n.onnx) | 7.6MB | 28.0% | 此模型文件来源于[YOLOv5](https://github.com/ultralytics/yolov5),GPL-3.0 License | +| [YOLOv5s](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx) | 28MB | 37.4% | 此模型文件来源于[YOLOv5](https://github.com/ultralytics/yolov5),GPL-3.0 License | +| [YOLOv5m](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5m.onnx) | 82MB | 45.4% | 此模型文件来源于[YOLOv5](https://github.com/ultralytics/yolov5),GPL-3.0 License | +| [YOLOv5l](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5l.onnx) | 178MB | 49.0% | 此模型文件来源于[YOLOv5](https://github.com/ultralytics/yolov5),GPL-3.0 License | +| [YOLOv5x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5x.onnx) | 332MB | 50.7% | 此模型文件来源于[YOLOv5](https://github.com/ultralytics/yolov5),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov5/a311d/README.md b/examples/vision/detection/yolov5/a311d/README.md index fc2ddd40f0e..2f8451c60e7 100755 --- a/examples/vision/detection/yolov5/a311d/README.md +++ b/examples/vision/detection/yolov5/a311d/README.md @@ -1,8 +1,5 @@ # YOLOv5 量化模型在 A311D 上的部署 -目前 FastDeploy 已经支持基于 Paddle Lite 部署 YOLOv5 量化模型到 A311D 上。 - -模型的量化和量化模型的下载请参考:[模型量化](../quantize/README.md) - +目前 FastDeploy 已经支持基于 Paddle Lite 部署 [YOLOv5](https://github.com/ultralytics/yolov5/releases/tag/v6.1) 量化模型到 A311D 上。 ## 详细部署文档 diff --git a/examples/vision/detection/yolov5/a311d/cpp/README.md b/examples/vision/detection/yolov5/a311d/cpp/README.md index 0fad86f485b..2271af43500 100755 --- a/examples/vision/detection/yolov5/a311d/cpp/README.md +++ b/examples/vision/detection/yolov5/a311d/cpp/README.md @@ -4,12 +4,31 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) ### 量化模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。 -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +可以直接使用由 FastDeploy 提供的量化模型进行部署,也可以按照如下步骤准备量化模型: +1. 按照 [YOLOv5](https://github.com/ultralytics/yolov5/releases/tag/v6.1) 官方导出方式导出 ONNX 模型,或者直接使用如下命令下载 +```bash +wget https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx +``` +2. 准备 300 张左右量化用的图片,也可以使用如下命令下载我们准备好的数据。 +```bash +wget https://bj.bcebos.com/fastdeploy/models/COCO_val_320.tar.gz +tar -xf COCO_val_320.tar.gz +``` +3. 使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。 +```bash +fastdeploy compress --config_path=./configs/detection/yolov5s_quant.yaml --method='PTQ' --save_dir='./yolov5s_ptq_model_new/' +``` +4. YOLOv5 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了 YOLOv5 模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 +```bash +# 先下载我们提供的模型,解压后将其中的 subgraph.txt 文件拷贝到新量化的模型目录中 +wget https://bj.bcebos.com/fastdeploy/models/yolov5s_ptq_model.tar.gz +tar -xvf yolov5s_ptq_model.tar.gz +``` + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 A311D 上部署量化后的 YOLOv5 检测模型 请按照以下步骤完成在 A311D 上部署 YOLOv5 量化模型: diff --git a/examples/vision/detection/yolov5/cpp/README.md b/examples/vision/detection/yolov5/cpp/README.md index 8f03a39ad03..c70d0d11885 100755 --- a/examples/vision/detection/yolov5/cpp/README.md +++ b/examples/vision/detection/yolov5/cpp/README.md @@ -31,6 +31,8 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 2 # 昆仑芯XPU推理 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 3 +# 华为昇腾推理 +./infer_paddle_demo yolov5s_infer 000000014439.jpg 4 ``` 上述的模型为 Paddle 模型的推理,如果想要做 ONNX 模型的推理,可以按照如下步骤: @@ -53,6 +55,9 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + ## YOLOv5 C++接口 ### YOLOv5类 diff --git a/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc b/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc index e4c02af8ade..e5302eca20c 100755 --- a/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc +++ b/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc @@ -130,6 +130,35 @@ void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + fastdeploy::RuntimeOption option; + option.UseAscend(); + auto model = fastdeploy::vision::detection::YOLOv5( + model_file, params_file, option, fastdeploy::ModelFormat::PADDLE); + + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisDetection(im, res); + + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + + int main(int argc, char* argv[]) { if (argc < 4) { std::cout << "Usage: infer_demo path/to/model path/to/image run_option, " @@ -149,6 +178,8 @@ int main(int argc, char* argv[]) { TrtInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 3) { KunlunXinInfer(argv[1], argv[2]); - } + } else if (std::atoi(argv[3]) == 4) { + AscendInfer(argv[1], argv[2]); + } return 0; } diff --git a/examples/vision/detection/yolov5/python/README.md b/examples/vision/detection/yolov5/python/README.md index 77f9027d584..de93ef0ecb9 100755 --- a/examples/vision/detection/yolov5/python/README.md +++ b/examples/vision/detection/yolov5/python/README.md @@ -25,6 +25,8 @@ python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu --use_trt True # 昆仑芯XPU推理 python infer.py --model yolov5s_infer --image 000000014439.jpg --device kunlunxin +# 华为昇腾推理 +python infer.py --model yolov5s_infer --image 000000014439.jpg --device ascend ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/detection/yolov5/python/infer.py b/examples/vision/detection/yolov5/python/infer.py index 7f0823d8abe..74fd96e6c99 100755 --- a/examples/vision/detection/yolov5/python/infer.py +++ b/examples/vision/detection/yolov5/python/infer.py @@ -31,6 +31,9 @@ def build_option(args): if args.device.lower() == "gpu": option.use_gpu() + if args.device.lower() == "ascend": + option.use_ascend() + if args.use_trt: option.use_trt_backend() option.set_trt_input_shape("images", [1, 3, 640, 640]) diff --git a/examples/vision/detection/yolov5/rv1126/README.md b/examples/vision/detection/yolov5/rv1126/README.md index b92c9903b63..5bac651e207 100755 --- a/examples/vision/detection/yolov5/rv1126/README.md +++ b/examples/vision/detection/yolov5/rv1126/README.md @@ -1,8 +1,5 @@ # YOLOv5 量化模型在 RV1126 上的部署 -目前 FastDeploy 已经支持基于 Paddle Lite 部署 YOLOv5 量化模型到 RV1126 上。 - -模型的量化和量化模型的下载请参考:[模型量化](../quantize/README.md) - +目前 FastDeploy 已经支持基于 Paddle Lite 部署 [YOLOv5](https://github.com/ultralytics/yolov5/releases/tag/v6.1) 量化模型到 RV1126 上。 ## 详细部署文档 diff --git a/examples/vision/detection/yolov5/rv1126/cpp/README.md b/examples/vision/detection/yolov5/rv1126/cpp/README.md index 437c4c0b968..469bb6060b6 100755 --- a/examples/vision/detection/yolov5/rv1126/cpp/README.md +++ b/examples/vision/detection/yolov5/rv1126/cpp/README.md @@ -4,12 +4,31 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) ### 量化模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。 -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +可以直接使用由 FastDeploy 提供的量化模型进行部署,也可以按照如下步骤准备量化模型: +1. 按照 [YOLOv5](https://github.com/ultralytics/yolov5/releases/tag/v6.1) 官方导出方式导出 ONNX 模型,或者直接使用如下命令下载 +```bash +wget https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx +``` +2. 准备 300 张左右量化用的图片,也可以使用如下命令下载我们准备好的数据。 +```bash +wget https://bj.bcebos.com/fastdeploy/models/COCO_val_320.tar.gz +tar -xf COCO_val_320.tar.gz +``` +3. 使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。 +```bash +fastdeploy compress --config_path=./configs/detection/yolov5s_quant.yaml --method='PTQ' --save_dir='./yolov5s_ptq_model_new/' +``` +4. YOLOv5 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了 YOLOv5 模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 +```bash +# 先下载我们提供的模型,解压后将其中的 subgraph.txt 文件拷贝到新量化的模型目录中 +wget https://bj.bcebos.com/fastdeploy/models/yolov5s_ptq_model.tar.gz +tar -xvf yolov5s_ptq_model.tar.gz +``` + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 RV1126 上部署量化后的 YOLOv5 检测模型 请按照以下步骤完成在 RV1126 上部署 YOLOv5 量化模型: diff --git a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py index 7c608db4372..1204446c438 100644 --- a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py +++ b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py @@ -96,7 +96,7 @@ def execute(self, requests): results = self.postprocessor_.run([infer_outputs], im_infos) r_str = fd.vision.utils.fd_result_to_json(results) - r_np = np.array(r_str, dtype=np.object) + r_np = np.array(r_str, dtype=np.object_) out_tensor = pb_utils.Tensor(self.output_names[0], r_np) inference_response = pb_utils.InferenceResponse( diff --git a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py index cf4f7e8e8b3..d60de6541bc 100644 --- a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py +++ b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py @@ -95,7 +95,7 @@ def execute(self, requests): dlpack_tensor) output_tensor_1 = pb_utils.Tensor( self.output_names[1], np.array( - im_infos, dtype=np.object)) + im_infos, dtype=np.object_)) inference_response = pb_utils.InferenceResponse( output_tensors=[output_tensor_0, output_tensor_1]) responses.append(inference_response) diff --git a/examples/vision/detection/yolov5/sophgo/README.md b/examples/vision/detection/yolov5/sophgo/README.md new file mode 100644 index 00000000000..d4fa4f7a8b0 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/README.md @@ -0,0 +1,75 @@ +# YOLOv5 SOPHGO部署示例 + +## 支持模型列表 + +YOLOv5 v6.0部署模型实现来自[YOLOv5](https://github.com/ultralytics/yolov5/tree/v6.0),和[基于COCO的预训练模型](https://github.com/ultralytics/yolov5/releases/tag/v6.0) + +## 准备YOLOv5部署模型以及转换模型 + +SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型,具体步骤如下: +- 下载预训练ONNX模型,请参考[YOLOv5准备部署模型](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/vision/detection/yolov5) +- ONNX模型转换bmodel模型的过程,请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir) + +## 模型转换example + +下面以YOLOv5s为例子,教大家如何转换ONNX模型到SOPHGO-TPU模型 + +## 下载YOLOv5s模型 + +### 下载ONNX YOLOv5s静态图模型 +```shell +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx + +``` +### 导出bmodel模型 + +以转化BM1684x的bmodel模型为例子,我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程,安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 +### 1. 安装 +``` shell +docker pull sophgo/tpuc_dev:latest + +# myname1234是一个示例,也可以设置其他名字 +docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest + +source ./envsetup.sh +./build.sh +``` + +### 2. ONNX模型转换为bmodel模型 +``` shell +mkdir YOLOv5s && cd YOLOv5s + +# 在该文件中放入测试图片,同时将上一步下载的yolov5s.onnx放入该文件夹中 +cp -rf ${REGRESSION_PATH}/dataset/COCO2017 . +cp -rf ${REGRESSION_PATH}/image . +# 放入onnx模型文件yolov5s.onnx + +mkdir workspace && cd workspace + +# 将ONNX模型转换为mlir模型,其中参数--output_names可以通过NETRON查看 +model_transform.py \ + --model_name yolov5s \ + --model_def ../yolov5s.onnx \ + --input_shapes [[1,3,640,640]] \ + --mean 0.0,0.0,0.0 \ + --scale 0.0039216,0.0039216,0.0039216 \ + --keep_aspect_ratio \ + --pixel_format rgb \ + --output_names output,350,498,646 \ + --test_input ../image/dog.jpg \ + --test_result yolov5s_top_outputs.npz \ + --mlir yolov5s.mlir + +# 将mlir模型转换为BM1684x的F32 bmodel模型 +model_deploy.py \ + --mlir yolov5s.mlir \ + --quantize F32 \ + --chip bm1684x \ + --test_input yolov5s_in_f32.npz \ + --test_reference yolov5s_top_outputs.npz \ + --model yolov5s_1684x_f32.bmodel +``` +最终获得可以在BM1684x上能够运行的bmodel模型yolov5s_1684x_f32.bmodel。如果需要进一步对模型进行加速,可以将ONNX模型转换为INT8 bmodel,具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 + +## 其他链接 +- [Cpp部署](./cpp) diff --git a/examples/vision/detection/yolov5/sophgo/cpp/CMakeLists.txt b/examples/vision/detection/yolov5/sophgo/cpp/CMakeLists.txt new file mode 100644 index 00000000000..53837058969 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +set(ENABLE_LITE_BACKEND OFF) +#set(FDLIB ${FASTDEPLOY_INSTALL_DIR}) + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) +include_directories(${FastDeploy_INCLUDE_DIRS}) + +add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/detection/yolov5/sophgo/cpp/README.md b/examples/vision/detection/yolov5/sophgo/cpp/README.md new file mode 100644 index 00000000000..e313da85559 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/cpp/README.md @@ -0,0 +1,56 @@ +# YOLOv5 C++部署示例 + +本目录下提供`infer.cc`快速完成yolov5s模型在SOPHGO BM1684x板子上加速部署的示例。 + +在部署前,需确认以下两个步骤: + +1. 软硬件环境满足要求 +2. 根据开发环境,从头编译FastDeploy仓库 + +以上步骤请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)实现 + +## 生成基本目录文件 + +该例程由以下几个部分组成 +```text +. +├── CMakeLists.txt +├── build # 编译文件夹 +├── image # 存放图片的文件夹 +├── infer.cc +└── model # 存放模型文件的文件夹 +``` + +## 编译 + +### 编译并拷贝SDK到thirdpartys文件夹 + +请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)仓库编译SDK,编译完成后,将在build目录下生成fastdeploy-0.0.3目录. + +### 拷贝模型文件,以及配置文件至model文件夹 +将Paddle模型转换为SOPHGO bmodel模型,转换步骤参考[文档](../README.md) +将转换后的SOPHGO bmodel模型文件拷贝至model中 + +### 准备测试图片至image文件夹 +```bash +wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg +cp 000000014439.jpg ./images +``` + +### 编译example + +```bash +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-0.0.3 +make +``` + +## 运行例程 + +```bash +./infer_demo model images/000000014439.jpg +``` + + +- [模型介绍](../../) +- [模型转换](../) diff --git a/examples/vision/detection/yolov5/sophgo/cpp/infer.cc b/examples/vision/detection/yolov5/sophgo/cpp/infer.cc new file mode 100644 index 00000000000..f1f63bcdc40 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/cpp/infer.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "yolov5s_1684x_f32.bmodel"; + auto params_file = model_dir + sep + ""; + + fastdeploy::RuntimeOption option; + option.UseSophgo(); + auto model_format = fastdeploy::ModelFormat::SOPHGO; + + auto model = fastdeploy::vision::detection::YOLOv5( + model_file, params_file, option, model_format); + + assert(model.Initialized()); + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: infer_demo path/to/model " + "path/to/image " + "run_option, " + "e.g ./infer_demo ./model ./test.jpeg" + << std::endl; + return -1; + } + + std::string model_dir = argv[1]; + std::string test_image = argv[2]; + InitAndInfer(model_dir, test_image); + return 0; +} diff --git a/examples/vision/detection/yolov5/sophgo/python/README.md b/examples/vision/detection/yolov5/sophgo/python/README.md new file mode 100644 index 00000000000..ccf8ed7e872 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/python/README.md @@ -0,0 +1,46 @@ +# YOLOv5 Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/sophgo.md) + +本目录下提供`infer.py`快速完成 YOLOv5 在SOPHGO TPU上部署的示例。执行如下脚本即可完成 + +```bash +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/vision/detection/yolov5/sophgo/python + +# 下载图片 +wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg + +# 推理 +python3 infer.py --model_file ./bmodel/yolov5s_1684x_f32.bmodel --image 000000014439.jpg + +# 运行完成后返回结果如下所示 +DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] +268.480255,81.053055, 298.694794, 169.439026, 0.896569, 0 +104.731163,45.661972, 127.583824, 93.449387, 0.869531, 0 +378.909363,39.750137, 395.608643, 84.243454, 0.868430, 0 +158.552979,80.361511, 199.185760, 168.181915, 0.842988, 0 +414.375305,90.948090, 506.321899, 280.405182, 0.835842, 0 +364.003448,56.608932, 381.978607, 115.968216, 0.815136, 0 +351.725128,42.635330, 366.910309, 98.048386, 0.808936, 0 +505.888306,114.366791, 593.124878, 275.995270, 0.801361, 0 +327.708618,38.363693, 346.849915, 80.893021, 0.794725, 0 +583.493408,114.532883, 612.354614, 175.873535, 0.760649, 0 +186.470657,44.941360, 199.664505, 61.037643, 0.632591, 0 +169.615891,48.014603, 178.141556, 60.888596, 0.613938, 0 +25.810200,117.199692, 59.888783, 152.850128, 0.590614, 0 +352.145294,46.712723, 381.946075, 106.752151, 0.505329, 0 +1.875000,150.734375, 37.968750, 173.781250, 0.404573, 24 +464.657288,15.901413, 472.512939, 34.116409, 0.346033, 0 +64.625000,135.171875, 84.500000, 154.406250, 0.332831, 24 +57.812500,151.234375, 103.000000, 174.156250, 0.332566, 24 +165.906250,88.609375, 527.906250, 339.953125, 0.259424, 33 +101.406250,152.562500, 118.890625, 169.140625, 0.253891, 24 +``` + +## 其它文档 +- [YOLOv5 C++部署](../cpp) +- [转换YOLOv5 SOPHGO模型文档](../README.md) diff --git a/examples/vision/detection/yolov5/sophgo/python/infer.py b/examples/vision/detection/yolov5/sophgo/python/infer.py new file mode 100644 index 00000000000..d1ea190c586 --- /dev/null +++ b/examples/vision/detection/yolov5/sophgo/python/infer.py @@ -0,0 +1,40 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, help="Path of model.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + + return parser.parse_args() + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = fd.RuntimeOption() +runtime_option.use_sophgo() + +model_file = args.model +params_file = "" + +model = fd.vision.detection.YOLOv5( + model_file, + params_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.SOPHGO) + +# 预测图片分类结果 +im = cv2.imread(args.image) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_detection(im, result) +cv2.imwrite("sophgo_result.jpg", vis_im) +print("Visualized result save in ./sophgo_result.jpg") diff --git a/examples/vision/detection/yolov5lite/README.md b/examples/vision/detection/yolov5lite/README.md index e8f72099bd8..0429758b844 100644 --- a/examples/vision/detection/yolov5lite/README.md +++ b/examples/vision/detection/yolov5lite/README.md @@ -52,12 +52,12 @@ ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv5Lite导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [YOLOv5Lite-e](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-e-sim-320.onnx) | 3.1MB | 35.1% | -| [YOLOv5Lite-s](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-s-sim-416.onnx) | 6.3MB | 42.0% | -| [YOLOv5Lite-c](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-c-sim-512.onnx) | 18MB | 50.9% | -| [YOLOv5Lite-g](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-g-sim-640.onnx) | 21MB | 57.6% | +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [YOLOv5Lite-e](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-e-sim-320.onnx) | 3.1MB | 35.1% | 此模型文件来源于[YOLOv5-Lite](https://github.com/ppogg/YOLOv5-Lite),GPL-3.0 License | +| [YOLOv5Lite-s](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-s-sim-416.onnx) | 6.3MB | 42.0% | 此模型文件来源于[YOLOv5-Lite](https://github.com/ppogg/YOLOv5-Lite),GPL-3.0 License | +| [YOLOv5Lite-c](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-c-sim-512.onnx) | 18MB | 50.9% | 此模型文件来源于[YOLOv5-Lite](https://github.com/ppogg/YOLOv5-Lite),GPL-3.0 License | +| [YOLOv5Lite-g](https://bj.bcebos.com/paddlehub/fastdeploy/v5Lite-g-sim-640.onnx) | 21MB | 57.6% | 此模型文件来源于[YOLOv5-Lite](https://github.com/ppogg/YOLOv5-Lite),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov6/README.md b/examples/vision/detection/yolov6/README.md index 6c0ca042218..d31622a8b35 100644 --- a/examples/vision/detection/yolov6/README.md +++ b/examples/vision/detection/yolov6/README.md @@ -11,13 +11,12 @@ ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv6导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [YOLOv6s](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s.onnx) | 66MB | 43.1% | -| [YOLOv6s_640](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s-640x640.onnx) | 66MB | 43.1% | -| [YOLOv6t](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6t.onnx) | 58MB | 41.3% | -| [YOLOv6n](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6n.onnx) | 17MB | 35.0% | - +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [YOLOv6s](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s.onnx) | 66MB | 43.1% | 此模型文件来源于[YOLOv6](https://github.com/meituan/YOLOv6),GPL-3.0 License | +| [YOLOv6s_640](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s-640x640.onnx) | 66MB | 43.1% | 此模型文件来源于[YOLOv6](https://github.com/meituan/YOLOv6),GPL-3.0 License | +| [YOLOv6t](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6t.onnx) | 58MB | 41.3% | 此模型文件来源于[YOLOv6](https://github.com/meituan/YOLOv6),GPL-3.0 License | +| [YOLOv6n](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6n.onnx) | 17MB | 35.0% | 此模型文件来源于[YOLOv6](https://github.com/meituan/YOLOv6),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov6/cpp/README.md b/examples/vision/detection/yolov6/cpp/README.md index 973b6a54693..eceb5bc46f3 100755 --- a/examples/vision/detection/yolov6/cpp/README.md +++ b/examples/vision/detection/yolov6/cpp/README.md @@ -29,8 +29,14 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000 ./infer_paddle_demo yolov6s_infer 000000014439.jpg 1 # 昆仑芯XPU推理 ./infer_paddle_demo yolov6s_infer 000000014439.jpg 2 +# 华为昇腾推理 +./infer_paddle_demo yolov6s_infer 000000014439.jpg 3 ``` +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + + 如果想要验证ONNX模型的推理,可以参考如下命令: ```bash #下载官方转换好的YOLOv6 ONNX模型文件和测试图片 diff --git a/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc b/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc index fc43ee0ae63..bd5d0b0a6b7 100755 --- a/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc +++ b/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc @@ -96,6 +96,32 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + fastdeploy::RuntimeOption option; + option.UseAscend(); + auto model = fastdeploy::vision::detection::YOLOv6( + model_file, params_file, option, fastdeploy::ModelFormat::PADDLE); + + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::DetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} int main(int argc, char* argv[]) { if (argc < 4) { @@ -114,6 +140,8 @@ int main(int argc, char* argv[]) { GpuInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 2) { KunlunXinInfer(argv[1], argv[2]); - } + } else if (std::atoi(argv[3]) == 3) { + AscendInfer(argv[1], argv[2]); + } return 0; } diff --git a/examples/vision/detection/yolov6/python/README.md b/examples/vision/detection/yolov6/python/README.md index a12bb902084..37a06fd5cec 100755 --- a/examples/vision/detection/yolov6/python/README.md +++ b/examples/vision/detection/yolov6/python/README.md @@ -22,6 +22,8 @@ python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg --d python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg --device gpu # 昆仑芯XPU推理 python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg --device kunlunxin +# 华为昇腾推理 +python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg --device ascend ``` 如果想要验证ONNX模型的推理,可以参考如下命令: ```bash diff --git a/examples/vision/detection/yolov6/python/infer_paddle_model.py b/examples/vision/detection/yolov6/python/infer_paddle_model.py index 25048620117..9fd1c382631 100755 --- a/examples/vision/detection/yolov6/python/infer_paddle_model.py +++ b/examples/vision/detection/yolov6/python/infer_paddle_model.py @@ -28,6 +28,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + return option diff --git a/examples/vision/detection/yolov7/README.md b/examples/vision/detection/yolov7/README.md index 925c26100cf..d07350fbc5d 100644 --- a/examples/vision/detection/yolov7/README.md +++ b/examples/vision/detection/yolov7/README.md @@ -27,16 +27,14 @@ python models/export.py --grid --dynamic --end2end --weights PATH/TO/yolov7.pt ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv7导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [YOLOv7](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx) | 141MB | 51.4% | -| [YOLOv7x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x.onnx) | 273MB | 53.1% | -| [YOLOv7-w6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6.onnx) | 269MB | 54.9% | -| [YOLOv7-e6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6.onnx) | 372MB | 56.0% | -| [YOLOv7-d6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6.onnx) | 511MB | 56.6% | -| [YOLOv7-e6e](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e.onnx) | 579MB | 56.8% | - - +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- | :----- | +| [YOLOv7](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx) | 141MB | 51.4% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [YOLOv7x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x.onnx) | 273MB | 53.1% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [YOLOv7-w6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6.onnx) | 269MB | 54.9% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [YOLOv7-e6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6.onnx) | 372MB | 56.0% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [YOLOv7-d6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6.onnx) | 511MB | 56.6% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [YOLOv7-e6e](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e.onnx) | 579MB | 56.8% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov7/README_EN.md b/examples/vision/detection/yolov7/README_EN.md index 83394f52cc9..04001beb7b3 100644 --- a/examples/vision/detection/yolov7/README_EN.md +++ b/examples/vision/detection/yolov7/README_EN.md @@ -24,14 +24,14 @@ python models/export.py --grid --dynamic --end2end --weights PATH/TO/yolov7.pt To facilitate testing for developers, we provide below the models exported by YOLOv7, which developers can download and use directly. (The accuracy of the models in the table is sourced from the official library) -| Model | Size | Accuracy | -| ------------------------------------------------------------------------ | ----- | -------- | -| [YOLOv7](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx) | 141MB | 51.4% | -| [YOLOv7x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x.onnx) | 273MB | 53.1% | -| [YOLOv7-w6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6.onnx) | 269MB | 54.9% | -| [YOLOv7-e6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6.onnx) | 372MB | 56.0% | -| [YOLOv7-d6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6.onnx) | 511MB | 56.6% | -| [YOLOv7-e6e](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e.onnx) | 579MB | 56.8% | +| Model | Size | Accuracy | Note | +| ------------------------------------------------------------------------ | ----- | -------- | -------- | +| [YOLOv7](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx) | 141MB | 51.4% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | +| [YOLOv7x](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x.onnx) | 273MB | 53.1% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | +| [YOLOv7-w6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6.onnx) | 269MB | 54.9% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | +| [YOLOv7-e6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6.onnx) | 372MB | 56.0% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | +| [YOLOv7-d6](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6.onnx) | 511MB | 56.6% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | +| [YOLOv7-e6e](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e.onnx) | 579MB | 56.8% | This model file comes from [YOLOv7](https://github.com/WongKinYiu/yolov7), GPL-3.0 License | ## Detailed Deployment Tutorials diff --git a/examples/vision/detection/yolov7/cpp/README.md b/examples/vision/detection/yolov7/cpp/README.md index 8fc7928a8d6..5308f7ddbe7 100755 --- a/examples/vision/detection/yolov7/cpp/README.md +++ b/examples/vision/detection/yolov7/cpp/README.md @@ -28,7 +28,13 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 1 # 昆仑芯XPU推理 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 2 +# 华为昇腾推理 +./infer_paddle_model_demo yolov7_infer 000000014439.jpg 3 ``` + +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + 如果想要验证ONNX模型的推理,可以参考如下命令: ```bash #下载官方转换好的yolov7 ONNX模型文件和测试图片 diff --git a/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc b/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc index f88f0d3a0f4..baed4acce41 100755 --- a/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc +++ b/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc @@ -31,7 +31,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file, auto im = cv::imread(image_file); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } @@ -68,7 +68,9 @@ int main(int argc, char* argv[]) { option.UseTrtBackend(); } else if (flag == 2) { option.UseKunlunXin(); - } + } else if (flag == 3) { + option.UseAscend(); + } std::string model_dir = argv[1]; std::string test_image = argv[2]; diff --git a/examples/vision/detection/yolov7/python/README.md b/examples/vision/detection/yolov7/python/README.md index c0aa78337a2..a925e78efa0 100755 --- a/examples/vision/detection/yolov7/python/README.md +++ b/examples/vision/detection/yolov7/python/README.md @@ -24,6 +24,8 @@ python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --dev python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu # 昆仑芯XPU推理 python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device kunlunxin +# 华为昇腾推理 +python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device ascend ``` 如果想要验证ONNX模型的推理,可以参考如下命令: ```bash diff --git a/examples/vision/detection/yolov7/python/README_EN.md b/examples/vision/detection/yolov7/python/README_EN.md index 64b6fcc98d3..ee7b486b3ce 100755 --- a/examples/vision/detection/yolov7/python/README_EN.md +++ b/examples/vision/detection/yolov7/python/README_EN.md @@ -24,6 +24,8 @@ python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --dev python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu # KunlunXin XPU python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device kunlunxin +# Huawei Ascend +python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device ascend ``` If you want to test ONNX model: ```bash diff --git a/examples/vision/detection/yolov7/python/infer_paddle_model.py b/examples/vision/detection/yolov7/python/infer_paddle_model.py index 2a3351e595e..28f1b747a4d 100755 --- a/examples/vision/detection/yolov7/python/infer_paddle_model.py +++ b/examples/vision/detection/yolov7/python/infer_paddle_model.py @@ -28,6 +28,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + return option diff --git a/examples/vision/detection/yolov7end2end_ort/README.md b/examples/vision/detection/yolov7end2end_ort/README.md index fdfd0801936..8b733bd25f4 100644 --- a/examples/vision/detection/yolov7end2end_ort/README.md +++ b/examples/vision/detection/yolov7end2end_ort/README.md @@ -20,14 +20,14 @@ python export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv7End2EndORT导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [yolov7-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-end2end-ort-nms.onnx) | 141MB | 51.4% | -| [yolov7x-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x-end2end-ort-nms.onnx) | 273MB | 53.1% | -| [yolov7-w6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6-end2end-ort-nms.onnx) | 269MB | 54.9% | -| [yolov7-e6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6-end2end-ort-nms.onnx) | 372MB | 56.0% | -| [yolov7-d6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6-end2end-ort-nms.onnx) | 511MB | 56.6% | -| [yolov7-e6e-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e-end2end-ort-nms.onnx) | 579MB | 56.8% | +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [yolov7-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-end2end-ort-nms.onnx) | 141MB | 51.4% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7x-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x-end2end-ort-nms.onnx) | 273MB | 53.1% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-w6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6-end2end-ort-nms.onnx) | 269MB | 54.9% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-e6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6-end2end-ort-nms.onnx) | 372MB | 56.0% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-d6-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6-end2end-ort-nms.onnx) | 511MB | 56.6% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-e6e-end2end-ort-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e-end2end-ort-nms.onnx) | 579MB | 56.8% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/detection/yolov7end2end_trt/README.md b/examples/vision/detection/yolov7end2end_trt/README.md index 9e6d30cdf08..04be6f5687a 100644 --- a/examples/vision/detection/yolov7end2end_trt/README.md +++ b/examples/vision/detection/yolov7end2end_trt/README.md @@ -22,14 +22,14 @@ python export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 ## 下载预训练ONNX模型 为了方便开发者的测试,下面提供了YOLOv7End2EndTRT 导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库) -| 模型 | 大小 | 精度 | -|:---------------------------------------------------------------- |:----- |:----- | -| [yolov7-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-end2end-trt-nms.onnx) | 141MB | 51.4% | -| [yolov7x-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x-end2end-trt-nms.onnx) | 273MB | 53.1% | -| [yolov7-w6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6-end2end-trt-nms.onnx) | 269MB | 54.9% | -| [yolov7-e6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6-end2end-trt-nms.onnx) | 372MB | 56.0% | -| [yolov7-d6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6-end2end-trt-nms.onnx) | 511MB | 56.6% | -| [yolov7-e6e-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e-end2end-trt-nms.onnx) | 579MB | 56.8% | +| 模型 | 大小 | 精度 | 备注 | +|:---------------------------------------------------------------- |:----- |:----- |:----- | +| [yolov7-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-end2end-trt-nms.onnx) | 141MB | 51.4% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7x-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7x-end2end-trt-nms.onnx) | 273MB | 53.1% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-w6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-w6-end2end-trt-nms.onnx) | 269MB | 54.9% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-e6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6-end2end-trt-nms.onnx) | 372MB | 56.0% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-d6-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-d6-end2end-trt-nms.onnx) | 511MB | 56.6% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | +| [yolov7-e6e-end2end-trt-nms](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7-e6e-end2end-trt-nms.onnx) | 579MB | 56.8% | 此模型文件来源于[YOLOv7](https://github.com/WongKinYiu/yolov7),GPL-3.0 License | ## 详细部署文档 diff --git a/examples/vision/generation/anemigan/README.md b/examples/vision/generation/anemigan/README.md new file mode 100644 index 00000000000..721ed5644b4 --- /dev/null +++ b/examples/vision/generation/anemigan/README.md @@ -0,0 +1,36 @@ +# 图像生成模型 + +FastDeploy目前支持PaddleHub预训练模型库中如下风格迁移模型的部署 + +| 模型 | 说明 | 模型格式 | +| :--- | :--- | :------- | +|[animegan_v1_hayao_60](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v1_hayao_60&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格,模型权重转换自AnimeGAN V1官方开源项目|paddle| +|[animegan_v2_paprika_97](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_97&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_hayao_64](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_hayao_64&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_shinkai_53](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_shinkai_53&en_category=GANs)|可将输入的图像转换成新海诚动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_shinkai_33](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_shinkai_33&en_category=GANs)|可将输入的图像转换成新海诚动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_paprika_54](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_54&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_hayao_99](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_hayao_99&en_category=GANs)|可将输入的图像转换成宫崎骏动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_paprika_74](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_74&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| +|[animegan_v2_paprika_98](https://www.paddlepaddle.org.cn/hubdetail?name=animegan_v2_paprika_98&en_category=GANs)|可将输入的图像转换成今敏红辣椒动漫风格,模型权重转换自AnimeGAN V2官方开源项目|paddle| + +## FastDeploy paddle backend部署和hub速度对比(ips, 越高越好) +| Device | FastDeploy | Hub | +| :--- | :--- | :------- | +| CPU | 0.075 | 0.069| +| GPU | 8.33 | 8.26 | + + + +## 下载预训练模型 +使用fastdeploy.download_model即可以下载模型, 例如下载animegan_v1_hayao_60 +```python +import fastdeploy as fd +fd.download_model(name='animegan_v1_hayao_60', path='./', format='paddle') +``` +将会在当前目录获得animegan_v1_hayao_60的预训练模型。 + +## 详细部署文档 + +- [Python部署](python) +- [C++部署](cpp) diff --git a/examples/vision/generation/anemigan/cpp/CMakeLists.txt b/examples/vision/generation/anemigan/cpp/CMakeLists.txt new file mode 100755 index 00000000000..7d1bd2ee11a --- /dev/null +++ b/examples/vision/generation/anemigan/cpp/CMakeLists.txt @@ -0,0 +1,13 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") +include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake) +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) +target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES}) diff --git a/examples/vision/generation/anemigan/cpp/README.md b/examples/vision/generation/anemigan/cpp/README.md new file mode 100755 index 00000000000..9d58c6ad3f0 --- /dev/null +++ b/examples/vision/generation/anemigan/cpp/README.md @@ -0,0 +1,84 @@ +# AnimeGAN C++部署示例 + +本目录下提供`infer.cc`快速完成AnimeGAN在CPU/GPU部署的示例。 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +以Linux上AnimeGAN推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.2以上(x.x.x>=1.0.2) + +```bash +mkdir build +cd build +# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载准备好的模型文件和测试图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg +wget https://bj.bcebos.com/paddlehub/fastdeploy/animegan_v1_hayao_60_v1.0.0.tgz +tar xvfz animegan_v1_hayao_60_v1.0.0.tgz + +# CPU推理 +./infer_demo --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg --device cpu +# GPU推理 +./infer_demo --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg --device gpu +``` + +以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: +- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +## AnimeGAN C++接口 + +### AnimeGAN类 + +```c++ +fastdeploy::vision::generation::AnimeGAN( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) +``` + +AnimeGAN模型加载和初始化,其中model_file为导出的Paddle模型结构文件,params_file为模型参数文件。 + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(ModelFormat): 模型格式,默认为Paddle格式 + +#### Predict函数 + +> ```c++ +> bool AnimeGAN::Predict(cv::Mat& image, cv::Mat* result) +> ``` +> +> 模型预测入口,输入图像输出风格迁移后的结果。 +> +> **参数** +> +> > * **image**: 输入数据,注意需为HWC,BGR格式 +> > * **result**: 风格转换后的图像,BGR格式 + +#### BatchPredict函数 + +> ```c++ +> bool AnimeGAN::BatchPredict(const std::vector& images, std::vector* results); +> ``` +> +> 模型预测入口,输入一组图像并输出风格迁移后的结果。 +> +> **参数** +> +> > * **images**: 输入数据,一组图像数据,注意需为HWC,BGR格式 +> > * **results**: 风格转换后的一组图像,BGR格式 + +- [模型介绍](../../) +- [Python部署](../python) +- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/generation/anemigan/cpp/infer.cc b/examples/vision/generation/anemigan/cpp/infer.cc new file mode 100644 index 00000000000..ad10797e973 --- /dev/null +++ b/examples/vision/generation/anemigan/cpp/infer.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" +#include "gflags/gflags.h" + +DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(image, "", "Path of the image file."); +DEFINE_string(device, "cpu", + "Type of inference device, support 'cpu' or 'gpu'."); + +void PrintUsage() { + std::cout << "Usage: infer_demo --model model_path --image img_path --device [cpu|gpu]" + << std::endl; + std::cout << "Default value of device: cpu" << std::endl; +} + +bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { + if (FLAGS_device == "gpu") { + option->UseGpu(); + } + else if (FLAGS_device == "cpu") { + option->SetPaddleMKLDNN(false); + return true; + } else { + std::cerr << "Only support device CPU/GPU now, " << FLAGS_device << " is not supported." << std::endl; + return false; + } + + return true; +} + +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option)) { + PrintUsage(); + return -1; + } + + auto model = fastdeploy::vision::generation::AnimeGAN(FLAGS_model+"/model.pdmodel", FLAGS_model+"/model.pdiparams", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return -1; + } + + auto im = cv::imread(FLAGS_image); + cv::Mat res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return -1; + } + + cv::imwrite("style_transfer_result.png", res); + std::cout << "Visualized result saved in ./style_transfer_result.png" << std::endl; + + return 0; +} diff --git a/examples/vision/generation/anemigan/python/README.md b/examples/vision/generation/anemigan/python/README.md new file mode 100644 index 00000000000..9c4562402db --- /dev/null +++ b/examples/vision/generation/anemigan/python/README.md @@ -0,0 +1,70 @@ +# AnimeGAN Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本目录下提供`infer.py`快速完成AnimeGAN在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成 + +```bash +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/vision/generation/anemigan/python +# 下载准备好的测试图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg + +# CPU推理 +python infer.py --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg --device cpu +# GPU推理 +python infer.py --model animegan_v1_hayao_60 --image style_transfer_testimg.jpg --device gpu +``` + +## AnimeGAN Python接口 + +```python +fd.vision.generation.AnimeGAN(model_file, params_file, runtime_option=None, model_format=ModelFormat.PADDLE) +``` + +AnimeGAN模型加载和初始化,其中model_file和params_file为用于Paddle inference的模型结构文件和参数文件。 + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(ModelFormat): 模型格式,默认为Paddle格式 + + +### predict函数 + +> ```python +> AnimeGAN.predict(input_image) +> ``` +> +> 模型预测入口,输入图像输出风格迁移后的结果。 +> +> **参数** +> +> > * **input_image**(np.ndarray): 输入数据,注意需为HWC,BGR格式 + +> **返回** np.ndarray, 风格转换后的图像,BGR格式 + +### batch_predict函数 +> ```python +> AnimeGAN.batch_predict函数(input_images) +> ``` +> +> 模型预测入口,输入一组图像并输出风格迁移后的结果。 +> +> **参数** +> +> > * **input_images**(list(np.ndarray)): 输入数据,一组图像数据,注意需为HWC,BGR格式 + +> **返回** list(np.ndarray), 风格转换后的一组图像,BGR格式 + +## 其它文档 + +- [风格迁移 模型介绍](..) +- [C++部署](../cpp) +- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/generation/anemigan/python/infer.py b/examples/vision/generation/anemigan/python/infer.py new file mode 100644 index 00000000000..69f610eda14 --- /dev/null +++ b/examples/vision/generation/anemigan/python/infer.py @@ -0,0 +1,43 @@ +import cv2 +import os +import fastdeploy as fd + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, help="Name of the model.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + else: + option.set_paddle_mkldnn(False) + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +fd.download_model(name=args.model, path='./', format='paddle') +model_file = os.path.join(args.model, "model.pdmodel") +params_file = os.path.join(args.model, "model.pdiparams") +model = fd.vision.generation.AnimeGAN( + model_file, params_file, runtime_option=runtime_option) + +# 预测图片并保存结果 +im = cv2.imread(args.image) +result = model.predict(im) +cv2.imwrite('style_transfer_result.png', result) diff --git a/examples/vision/matting/rvm/README.md b/examples/vision/matting/rvm/README.md index 16f33aae44f..56d371c5c90 100755 --- a/examples/vision/matting/rvm/README.md +++ b/examples/vision/matting/rvm/README.md @@ -16,10 +16,10 @@ | 模型 | 参数大小 | 精度 | 备注 | |:---------------------------------------------------------------- |:----- |:----- | :------ | -| [rvm_mobilenetv3_fp32.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_mobilenetv3_fp32.onnx) | 15MB | - | -| [rvm_resnet50_fp32.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_resnet50_fp32.onnx) | 103MB | - | -| [rvm_mobilenetv3_trt.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_mobilenetv3_trt.onnx) | 15MB | - | -| [rvm_resnet50_trt.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_resnet50_trt.onnx) | 103MB | - | +| [rvm_mobilenetv3_fp32.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_mobilenetv3_fp32.onnx) | 15MB ||exported from [RobustVideoMatting](https://github.com/PeterL1n/RobustVideoMatting/commit/81a1093),GPL-3.0 License | +| [rvm_resnet50_fp32.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_resnet50_fp32.onnx) | 103MB | |exported from [RobustVideoMatting](https://github.com/PeterL1n/RobustVideoMatting/commit/81a1093),GPL-3.0 License | +| [rvm_mobilenetv3_trt.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_mobilenetv3_trt.onnx) | 15MB | |exported from [RobustVideoMatting](https://github.com/PeterL1n/RobustVideoMatting/commit/81a1093),GPL-3.0 License | +| [rvm_resnet50_trt.onnx](https://bj.bcebos.com/paddlehub/fastdeploy/rvm_resnet50_trt.onnx) | 103MB | | exported from [RobustVideoMatting](https://github.com/PeterL1n/RobustVideoMatting/commit/81a1093),GPL-3.0 License | **Note**: - 如果要使用 TensorRT 进行推理,需要下载后缀为 trt 的 onnx 模型文件 diff --git a/examples/vision/ocr/PP-OCRv2/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCRv2/cpp/CMakeLists.txt index 93540a7e83e..8b2f7aa6100 100644 --- a/examples/vision/ocr/PP-OCRv2/cpp/CMakeLists.txt +++ b/examples/vision/ocr/PP-OCRv2/cpp/CMakeLists.txt @@ -12,3 +12,7 @@ include_directories(${FASTDEPLOY_INCS}) add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) # 添加FastDeploy库依赖 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/ocr/PP-OCRv2/cpp/README.md b/examples/vision/ocr/PP-OCRv2/cpp/README.md index fbde53fffce..9052dd80ec6 100755 --- a/examples/vision/ocr/PP-OCRv2/cpp/README.md +++ b/examples/vision/ocr/PP-OCRv2/cpp/README.md @@ -43,11 +43,16 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_ ./infer_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 3 # 昆仑芯XPU推理 ./infer_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 4 +# 华为昇腾推理, 需要使用静态shape的demo, 若用户需要连续地预测图片, 输入图片尺寸需要准备为统一尺寸 +./infer_static_shape_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 1 ``` 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc index 3406246aaef..72a7fcf7ea9 100755 --- a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc @@ -65,9 +65,9 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. // When inference batch size is set to -1, it means that the inference batch size - // of the cls and rec models will be the same as the number of boxes detected by the det model. + // of the cls and rec models will be the same as the number of boxes detected by the det model. ppocr_v2.SetClsBatchSize(cls_batch_size); - ppocr_v2.SetRecBatchSize(rec_batch_size); + ppocr_v2.SetRecBatchSize(rec_batch_size); if(!ppocr_v2.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; diff --git a/examples/vision/ocr/PP-OCRv2/cpp/infer_static_shape.cc b/examples/vision/ocr/PP-OCRv2/cpp/infer_static_shape.cc new file mode 100755 index 00000000000..ba5527a2e90 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv2/cpp/infer_static_shape.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model_dir, const std::string& rec_model_dir, const std::string& rec_label_file, const std::string& image_file, const fastdeploy::RuntimeOption& option) { + auto det_model_file = det_model_dir + sep + "inference.pdmodel"; + auto det_params_file = det_model_dir + sep + "inference.pdiparams"; + + auto cls_model_file = cls_model_dir + sep + "inference.pdmodel"; + auto cls_params_file = cls_model_dir + sep + "inference.pdiparams"; + + auto rec_model_file = rec_model_dir + sep + "inference.pdmodel"; + auto rec_params_file = rec_model_dir + sep + "inference.pdiparams"; + + auto det_option = option; + auto cls_option = option; + auto rec_option = option; + + auto det_model = fastdeploy::vision::ocr::DBDetector(det_model_file, det_params_file, det_option); + auto cls_model = fastdeploy::vision::ocr::Classifier(cls_model_file, cls_params_file, cls_option); + auto rec_model = fastdeploy::vision::ocr::Recognizer(rec_model_file, rec_params_file, rec_label_file, rec_option); + + // Users could enable static shape infer for rec model when deploy PP-OCR on hardware + // which can not support dynamic shape infer well, like Huawei Ascend series. + rec_model.GetPreprocessor().SetStaticShapeInfer(true); + + assert(det_model.Initialized()); + assert(cls_model.Initialized()); + assert(rec_model.Initialized()); + + // The classification model is optional, so the PP-OCR can also be connected in series as follows + // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model); + auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model); + + // When users enable static shape infer for rec model, the batch size of cls and rec model must to be set to 1. + ppocr_v2.SetClsBatchSize(1); + ppocr_v2.SetRecBatchSize(1); + + if(!ppocr_v2.Initialized()){ + std::cerr << "Failed to initialize PP-OCR." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::OCRResult result; + if (!ppocr_v2.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisOcr(im, result); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 7) { + std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model " + "path/to/rec_model path/to/rec_label_file path/to/image " + "run_option, " + "e.g ./infer_demo ./ch_PP-OCRv2_det_infer " + "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer " + "./ppocr_keys_v1.txt ./12.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with ascend." + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + int flag = std::atoi(argv[6]); + + if (flag == 0) { + option.UseCpu(); + } else if (flag == 1) { + option.UseAscend(); + } + + std::string det_model_dir = argv[1]; + std::string cls_model_dir = argv[2]; + std::string rec_model_dir = argv[3]; + std::string rec_label_file = argv[4]; + std::string test_image = argv[5]; + InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, test_image, option); + return 0; +} diff --git a/examples/vision/ocr/PP-OCRv2/python/README.md b/examples/vision/ocr/PP-OCRv2/python/README.md index 66bba9e5bfd..1ea95695f3a 100755 --- a/examples/vision/ocr/PP-OCRv2/python/README.md +++ b/examples/vision/ocr/PP-OCRv2/python/README.md @@ -36,6 +36,8 @@ python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2 python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --backend trt # 昆仑芯XPU推理 python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device kunlunxin +# 华为昇腾推理,需要使用静态shape脚本, 若用户需要连续地预测图片, 输入图片尺寸需要准备为统一尺寸 +python infer_static_shape.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device ascend ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCRv2/python/infer.py b/examples/vision/ocr/PP-OCRv2/python/infer.py index b8c731ef364..6e8fe62b157 100755 --- a/examples/vision/ocr/PP-OCRv2/python/infer.py +++ b/examples/vision/ocr/PP-OCRv2/python/infer.py @@ -58,39 +58,113 @@ def parse_arguments(): type=int, default=9, help="Number of threads while inference on CPU.") + parser.add_argument( + "--cls_bs", + type=int, + default=1, + help="Classification model inference batch size.") + parser.add_argument( + "--rec_bs", + type=int, + default=6, + help="Recognition model inference batch size") return parser.parse_args() def build_option(args): - option = fd.RuntimeOption() - if args.device.lower() == "gpu": - option.use_gpu(0) - option.set_cpu_thread_num(args.cpu_thread_num) + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + + det_option.set_cpu_thread_num(args.cpu_thread_num) + cls_option.set_cpu_thread_num(args.cpu_thread_num) + rec_option.set_cpu_thread_num(args.cpu_thread_num) + + if args.device.lower() == "gpu": + det_option.use_gpu(args.device_id) + cls_option.use_gpu(args.device_id) + rec_option.use_gpu(args.device_id) if args.device.lower() == "kunlunxin": - option.use_kunlunxin() - return option + det_option.use_kunlunxin() + cls_option.use_kunlunxin() + rec_option.use_kunlunxin() + + return det_option, cls_option, rec_option if args.backend.lower() == "trt": assert args.device.lower( ) == "gpu", "TensorRT backend require inference on device GPU." - option.use_trt_backend() + det_option.use_trt_backend() + cls_option.use_trt_backend() + rec_option.use_trt_backend() + + # 设置trt input shape + # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. + det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], + [1, 3, 960, 960]) + cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.cls_bs, 3, 48, 320], + [args.cls_bs, 3, 48, 1024]) + rec_option.set_trt_input_shape("x", [1, 3, 32, 10], + [args.rec_bs, 3, 32, 320], + [args.rec_bs, 3, 32, 2304]) + + # 用户可以把TRT引擎文件保存至本地 + det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt") + cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") + rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") + elif args.backend.lower() == "pptrt": assert args.device.lower( ) == "gpu", "Paddle-TensorRT backend require inference on device GPU." - option.use_trt_backend() - option.enable_paddle_trt_collect_shape() - option.enable_paddle_to_trt() + det_option.use_trt_backend() + det_option.enable_paddle_trt_collect_shape() + det_option.enable_paddle_to_trt() + + cls_option.use_trt_backend() + cls_option.enable_paddle_trt_collect_shape() + cls_option.enable_paddle_to_trt() + + rec_option.use_trt_backend() + rec_option.enable_paddle_trt_collect_shape() + rec_option.enable_paddle_to_trt() + + # 设置trt input shape + # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. + det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], + [1, 3, 960, 960]) + cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.cls_bs, 3, 48, 320], + [args.cls_bs, 3, 48, 1024]) + rec_option.set_trt_input_shape("x", [1, 3, 32, 10], + [args.rec_bs, 3, 32, 320], + [args.rec_bs, 3, 32, 2304]) + + # 用户可以把TRT引擎文件保存至本地 + det_option.set_trt_cache_file(args.det_model) + cls_option.set_trt_cache_file(args.cls_model) + rec_option.set_trt_cache_file(args.rec_model) + elif args.backend.lower() == "ort": - option.use_ort_backend() + det_option.use_ort_backend() + cls_option.use_ort_backend() + rec_option.use_ort_backend() + elif args.backend.lower() == "paddle": - option.use_paddle_infer_backend() + det_option.use_paddle_infer_backend() + cls_option.use_paddle_infer_backend() + rec_option.use_paddle_infer_backend() + elif args.backend.lower() == "openvino": assert args.device.lower( ) == "cpu", "OpenVINO backend require inference on device CPU." - option.use_openvino_backend() - return option + det_option.use_openvino_backend() + cls_option.use_openvino_backend() + rec_option.use_openvino_backend() + + return det_option, cls_option, rec_option args = parse_arguments() @@ -107,40 +181,15 @@ def build_option(args): rec_label_file = args.rec_label_file # 对于三个模型,均采用同样的部署配置 -# 用户也可根据自行需求分别配置 -runtime_option = build_option(args) - -# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 -# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 -cls_batch_size = 1 -rec_batch_size = 6 - -# 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. -# 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. -# 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. -det_option = runtime_option -det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], - [1, 3, 960, 960]) -# 用户可以把TRT引擎文件保存至本地 -# det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt") +# 用户也可根据自己的需求,个性化配置 +det_option, cls_option, rec_option = build_option(args) + det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) -cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], - [cls_batch_size, 3, 48, 320], - [cls_batch_size, 3, 48, 1024]) -# 用户可以把TRT引擎文件保存至本地 -# cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) -rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 32, 10], - [rec_batch_size, 3, 32, 320], - [rec_batch_size, 3, 32, 2304]) -# 用户可以把TRT引擎文件保存至本地 -# rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option) @@ -151,8 +200,8 @@ def build_option(args): # 给cls和rec模型设置推理时的batch size # 此值能为-1, 和1到正无穷 # 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 -ppocr_v2.cls_batch_size = cls_batch_size -ppocr_v2.rec_batch_size = rec_batch_size +ppocr_v2.cls_batch_size = args.cls_bs +ppocr_v2.rec_batch_size = args.rec_bs # 预测图片准备 im = cv2.imread(args.image) diff --git a/examples/vision/ocr/PP-OCRv2/python/infer_static_shape.py b/examples/vision/ocr/PP-OCRv2/python/infer_static_shape.py new file mode 100755 index 00000000000..29055fdaae4 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv2/python/infer_static_shape.py @@ -0,0 +1,114 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + + # 当前需要对PP-OCR启用静态shape推理的硬件只有昇腾. + if args.device.lower() == "ascend": + det_option.use_ascend() + cls_option.use_ascend() + rec_option.use_ascend() + + return det_option, cls_option, rec_option + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = os.path.join(args.det_model, "inference.pdmodel") +det_params_file = os.path.join(args.det_model, "inference.pdiparams") +# Classification模型,方向分类,可选 +cls_model_file = os.path.join(args.cls_model, "inference.pdmodel") +cls_params_file = os.path.join(args.cls_model, "inference.pdiparams") +# Recognition模型,文字识别模型 +rec_model_file = os.path.join(args.rec_model, "inference.pdmodel") +rec_params_file = os.path.join(args.rec_model, "inference.pdiparams") +rec_label_file = args.rec_label_file + +det_option, cls_option, rec_option = build_option(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=det_option) + +cls_model = fd.vision.ocr.Classifier( + cls_model_file, cls_params_file, runtime_option=cls_option) + +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option) + +# Rec模型启用静态shape推理 +rec_model.preprocessor.static_shape_infer = True + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v2 = fd.vision.ocr.PPOCRv2( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理 +ppocr_v2.cls_batch_size = 1 +ppocr_v2.rec_batch_size = 1 + +# 预测图片准备 +im = cv2.imread(args.image) + +#预测并打印结果 +result = ppocr_v2.predict(im) + +print(result) + +# 可视化结果 +vis_im = fd.vision.vis_ppocr(im, result) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/ocr/PP-OCRv3/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCRv3/cpp/CMakeLists.txt index 93540a7e83e..8b2f7aa6100 100644 --- a/examples/vision/ocr/PP-OCRv3/cpp/CMakeLists.txt +++ b/examples/vision/ocr/PP-OCRv3/cpp/CMakeLists.txt @@ -12,3 +12,7 @@ include_directories(${FASTDEPLOY_INCS}) add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) # 添加FastDeploy库依赖 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/ocr/PP-OCRv3/cpp/README.md b/examples/vision/ocr/PP-OCRv3/cpp/README.md index 9c5eff4ef57..7f557a213b7 100755 --- a/examples/vision/ocr/PP-OCRv3/cpp/README.md +++ b/examples/vision/ocr/PP-OCRv3/cpp/README.md @@ -43,11 +43,16 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_ ./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 3 # 昆仑芯XPU推理 ./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 4 +# 华为昇腾推理,需要使用静态shape的demo, 若用户需要连续地预测图片, 输入图片尺寸需要准备为统一尺寸 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 1 ``` 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc index fd25eca7e09..3b35c1d44eb 100755 --- a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc @@ -34,7 +34,7 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model auto rec_option = option; // The cls and rec model can inference a batch of images now. - // User could initialize the inference batch size and set them after create PPOCR model. + // User could initialize the inference batch size and set them after create PP-OCR model. int cls_batch_size = 1; int rec_batch_size = 6; @@ -66,9 +66,9 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. // When inference batch size is set to -1, it means that the inference batch size - // of the cls and rec models will be the same as the number of boxes detected by the det model. + // of the cls and rec models will be the same as the number of boxes detected by the det model. ppocr_v3.SetClsBatchSize(cls_batch_size); - ppocr_v3.SetRecBatchSize(rec_batch_size); + ppocr_v3.SetRecBatchSize(rec_batch_size); if(!ppocr_v3.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; diff --git a/examples/vision/ocr/PP-OCRv3/cpp/infer_static_shape.cc b/examples/vision/ocr/PP-OCRv3/cpp/infer_static_shape.cc new file mode 100755 index 00000000000..aea3f5699ca --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/cpp/infer_static_shape.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model_dir, const std::string& rec_model_dir, const std::string& rec_label_file, const std::string& image_file, const fastdeploy::RuntimeOption& option) { + auto det_model_file = det_model_dir + sep + "inference.pdmodel"; + auto det_params_file = det_model_dir + sep + "inference.pdiparams"; + + auto cls_model_file = cls_model_dir + sep + "inference.pdmodel"; + auto cls_params_file = cls_model_dir + sep + "inference.pdiparams"; + + auto rec_model_file = rec_model_dir + sep + "inference.pdmodel"; + auto rec_params_file = rec_model_dir + sep + "inference.pdiparams"; + + auto det_option = option; + auto cls_option = option; + auto rec_option = option; + + auto det_model = fastdeploy::vision::ocr::DBDetector(det_model_file, det_params_file, det_option); + auto cls_model = fastdeploy::vision::ocr::Classifier(cls_model_file, cls_params_file, cls_option); + auto rec_model = fastdeploy::vision::ocr::Recognizer(rec_model_file, rec_params_file, rec_label_file, rec_option); + + // Users could enable static shape infer for rec model when deploy PP-OCR on hardware + // which can not support dynamic shape infer well, like Huawei Ascend series. + rec_model.GetPreprocessor().SetStaticShapeInfer(true); + + assert(det_model.Initialized()); + assert(cls_model.Initialized()); + assert(rec_model.Initialized()); + + // The classification model is optional, so the PP-OCR can also be connected in series as follows + // auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model); + auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); + + // When users enable static shape infer for rec model, the batch size of cls and rec model must to be set to 1. + ppocr_v3.SetClsBatchSize(1); + ppocr_v3.SetRecBatchSize(1); + + if(!ppocr_v3.Initialized()){ + std::cerr << "Failed to initialize PP-OCR." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::OCRResult result; + if (!ppocr_v3.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisOcr(im, result); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 7) { + std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model " + "path/to/rec_model path/to/rec_label_file path/to/image " + "run_option, " + "e.g ./infer_demo ./ch_PP-OCRv3_det_infer " + "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer " + "./ppocr_keys_v1.txt ./12.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with ascend." + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + int flag = std::atoi(argv[6]); + + if (flag == 0) { + option.UseCpu(); + } else if (flag == 1) { + option.UseAscend(); + } + + std::string det_model_dir = argv[1]; + std::string cls_model_dir = argv[2]; + std::string rec_model_dir = argv[3]; + std::string rec_label_file = argv[4]; + std::string test_image = argv[5]; + InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, test_image, option); + return 0; +} diff --git a/examples/vision/ocr/PP-OCRv3/python/README.md b/examples/vision/ocr/PP-OCRv3/python/README.md index e87729353d6..3fcf372e05d 100755 --- a/examples/vision/ocr/PP-OCRv3/python/README.md +++ b/examples/vision/ocr/PP-OCRv3/python/README.md @@ -35,6 +35,8 @@ python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2 python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --backend trt # 昆仑芯XPU推理 python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device kunlunxin +# 华为昇腾推理,需要使用静态shape脚本, 若用户需要连续地预测图片, 输入图片尺寸需要准备为统一尺寸 +python infer_static_shape.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device ascend ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCRv3/python/infer.py b/examples/vision/ocr/PP-OCRv3/python/infer.py index 97ee1d07030..6dabce80ee9 100755 --- a/examples/vision/ocr/PP-OCRv3/python/infer.py +++ b/examples/vision/ocr/PP-OCRv3/python/infer.py @@ -58,39 +58,113 @@ def parse_arguments(): type=int, default=9, help="Number of threads while inference on CPU.") + parser.add_argument( + "--cls_bs", + type=int, + default=1, + help="Classification model inference batch size.") + parser.add_argument( + "--rec_bs", + type=int, + default=6, + help="Recognition model inference batch size") return parser.parse_args() def build_option(args): - option = fd.RuntimeOption() - if args.device.lower() == "gpu": - option.use_gpu(0) - option.set_cpu_thread_num(args.cpu_thread_num) + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + + det_option.set_cpu_thread_num(args.cpu_thread_num) + cls_option.set_cpu_thread_num(args.cpu_thread_num) + rec_option.set_cpu_thread_num(args.cpu_thread_num) + + if args.device.lower() == "gpu": + det_option.use_gpu(args.device_id) + cls_option.use_gpu(args.device_id) + rec_option.use_gpu(args.device_id) if args.device.lower() == "kunlunxin": - option.use_kunlunxin() - return option + det_option.use_kunlunxin() + cls_option.use_kunlunxin() + rec_option.use_kunlunxin() + + return det_option, cls_option, rec_option if args.backend.lower() == "trt": assert args.device.lower( ) == "gpu", "TensorRT backend require inference on device GPU." - option.use_trt_backend() + det_option.use_trt_backend() + cls_option.use_trt_backend() + rec_option.use_trt_backend() + + # 设置trt input shape + # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. + det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], + [1, 3, 960, 960]) + cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.cls_bs, 3, 48, 320], + [args.cls_bs, 3, 48, 1024]) + rec_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.rec_bs, 3, 48, 320], + [args.rec_bs, 3, 48, 2304]) + + # 用户可以把TRT引擎文件保存至本地 + det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt") + cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") + rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") + elif args.backend.lower() == "pptrt": assert args.device.lower( ) == "gpu", "Paddle-TensorRT backend require inference on device GPU." - option.use_trt_backend() - option.enable_paddle_trt_collect_shape() - option.enable_paddle_to_trt() + det_option.use_trt_backend() + det_option.enable_paddle_trt_collect_shape() + det_option.enable_paddle_to_trt() + + cls_option.use_trt_backend() + cls_option.enable_paddle_trt_collect_shape() + cls_option.enable_paddle_to_trt() + + rec_option.use_trt_backend() + rec_option.enable_paddle_trt_collect_shape() + rec_option.enable_paddle_to_trt() + + # 设置trt input shape + # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. + det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], + [1, 3, 960, 960]) + cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.cls_bs, 3, 48, 320], + [args.cls_bs, 3, 48, 1024]) + rec_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.rec_bs, 3, 48, 320], + [args.rec_bs, 3, 48, 2304]) + + # 用户可以把TRT引擎文件保存至本地 + det_option.set_trt_cache_file(args.det_model) + cls_option.set_trt_cache_file(args.cls_model) + rec_option.set_trt_cache_file(args.rec_model) + elif args.backend.lower() == "ort": - option.use_ort_backend() + det_option.use_ort_backend() + cls_option.use_ort_backend() + rec_option.use_ort_backend() + elif args.backend.lower() == "paddle": - option.use_paddle_infer_backend() + det_option.use_paddle_infer_backend() + cls_option.use_paddle_infer_backend() + rec_option.use_paddle_infer_backend() + elif args.backend.lower() == "openvino": assert args.device.lower( ) == "cpu", "OpenVINO backend require inference on device CPU." - option.use_openvino_backend() - return option + det_option.use_openvino_backend() + cls_option.use_openvino_backend() + rec_option.use_openvino_backend() + + return det_option, cls_option, rec_option args = parse_arguments() @@ -107,40 +181,15 @@ def build_option(args): rec_label_file = args.rec_label_file # 对于三个模型,均采用同样的部署配置 -# 用户也可根据自行需求分别配置 -runtime_option = build_option(args) - -# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 -# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 -cls_batch_size = 1 -rec_batch_size = 6 - -# 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. -# 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. -# 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. -det_option = runtime_option -det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], - [1, 3, 960, 960]) -# 用户可以把TRT引擎文件保存至本地 -# det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt") +# 用户也可根据自己的需求,个性化配置 +det_option, cls_option, rec_option = build_option(args) + det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) -cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], - [cls_batch_size, 3, 48, 320], - [cls_batch_size, 3, 48, 1024]) -# 用户可以把TRT引擎文件保存至本地 -# cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) -rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 48, 10], - [rec_batch_size, 3, 48, 320], - [rec_batch_size, 3, 48, 2304]) -# 用户可以把TRT引擎文件保存至本地 -# rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option) @@ -151,8 +200,8 @@ def build_option(args): # 给cls和rec模型设置推理时的batch size # 此值能为-1, 和1到正无穷 # 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 -ppocr_v3.cls_batch_size = cls_batch_size -ppocr_v3.rec_batch_size = rec_batch_size +ppocr_v3.cls_batch_size = args.cls_bs +ppocr_v3.rec_batch_size = args.rec_bs # 预测图片准备 im = cv2.imread(args.image) diff --git a/examples/vision/ocr/PP-OCRv3/python/infer_static_shape.py b/examples/vision/ocr/PP-OCRv3/python/infer_static_shape.py new file mode 100755 index 00000000000..e707d378c38 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/python/infer_static_shape.py @@ -0,0 +1,114 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + + # 当前需要对PP-OCR启用静态shape推理的硬件只有昇腾. + if args.device.lower() == "ascend": + det_option.use_ascend() + cls_option.use_ascend() + rec_option.use_ascend() + + return det_option, cls_option, rec_option + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = os.path.join(args.det_model, "inference.pdmodel") +det_params_file = os.path.join(args.det_model, "inference.pdiparams") +# Classification模型,方向分类,可选 +cls_model_file = os.path.join(args.cls_model, "inference.pdmodel") +cls_params_file = os.path.join(args.cls_model, "inference.pdiparams") +# Recognition模型,文字识别模型 +rec_model_file = os.path.join(args.rec_model, "inference.pdmodel") +rec_params_file = os.path.join(args.rec_model, "inference.pdiparams") +rec_label_file = args.rec_label_file + +det_option, cls_option, rec_option = build_option(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=det_option) + +cls_model = fd.vision.ocr.Classifier( + cls_model_file, cls_params_file, runtime_option=cls_option) + +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option) + +# Rec模型启用静态shape推理 +rec_model.preprocessor.static_shape_infer = True + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v3 = fd.vision.ocr.PPOCRv3( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理 +ppocr_v3.cls_batch_size = 1 +ppocr_v3.rec_batch_size = 1 + +# 预测图片准备 +im = cv2.imread(args.image) + +#预测并打印结果 +result = ppocr_v3.predict(im) + +print(result) + +# 可视化结果 +vis_im = fd.vision.vis_ppocr(im, result) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py b/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py index faaca906737..9cfe2583e02 100644 --- a/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py +++ b/examples/vision/ocr/PP-OCRv3/serving/models/det_postprocess/1/model.py @@ -217,7 +217,7 @@ def execute(self, requests): out_tensor_0 = pb_utils.Tensor( self.output_names[0], np.array( - batch_rec_texts, dtype=np.object)) + batch_rec_texts, dtype=np.object_)) out_tensor_1 = pb_utils.Tensor(self.output_names[1], np.array(batch_rec_scores)) inference_response = pb_utils.InferenceResponse( diff --git a/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py b/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py index fe66e8c3f3c..c046cd929b7 100755 --- a/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py +++ b/examples/vision/ocr/PP-OCRv3/serving/models/rec_postprocess/1/model.py @@ -96,7 +96,7 @@ def execute(self, requests): results = self.postprocessor.run([infer_outputs]) out_tensor_0 = pb_utils.Tensor( self.output_names[0], np.array( - results[0], dtype=np.object)) + results[0], dtype=np.object_)) out_tensor_1 = pb_utils.Tensor(self.output_names[1], np.array(results[1])) inference_response = pb_utils.InferenceResponse( diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/README.md b/examples/vision/segmentation/paddleseg/a311d/cpp/README.md index c4eabf471d4..8bd94e6468e 100755 --- a/examples/vision/segmentation/paddleseg/a311d/cpp/README.md +++ b/examples/vision/segmentation/paddleseg/a311d/cpp/README.md @@ -4,12 +4,14 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建) ### 模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.) +3. 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 A311D 上部署量化后的 PP-LiteSeg 分割模型 请按照以下步骤完成在 A311D 上部署 PP-LiteSeg 量化模型: diff --git a/examples/vision/segmentation/paddleseg/cpp/README.md b/examples/vision/segmentation/paddleseg/cpp/README.md index 35d288ba498..07f9f4c62dc 100755 --- a/examples/vision/segmentation/paddleseg/cpp/README.md +++ b/examples/vision/segmentation/paddleseg/cpp/README.md @@ -34,6 +34,8 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png ./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2 # 昆仑芯XPU推理 ./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3 +# 华为昇腾推理 +./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 4 ``` 运行完成可视化结果如下图所示 @@ -44,6 +46,9 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + ## PaddleSeg C++接口 ### PaddleSeg类 diff --git a/examples/vision/segmentation/paddleseg/cpp/infer.cc b/examples/vision/segmentation/paddleseg/cpp/infer.cc index 389699a5129..ae97c0406ed 100755 --- a/examples/vision/segmentation/paddleseg/cpp/infer.cc +++ b/examples/vision/segmentation/paddleseg/cpp/infer.cc @@ -135,6 +135,34 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) { std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } +void AscendInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "deploy.yaml"; + auto option = fastdeploy::RuntimeOption(); + option.UseAscend(); + auto model = fastdeploy::vision::segmentation::PaddleSegModel( + model_file, params_file, config_file, option); + + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::SegmentationResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + int main(int argc, char* argv[]) { if (argc < 4) { std::cout @@ -155,6 +183,8 @@ int main(int argc, char* argv[]) { TrtInfer(argv[1], argv[2]); } else if (std::atoi(argv[3]) == 3) { KunlunXinInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 4) { + AscendInfer(argv[1], argv[2]); } return 0; } diff --git a/examples/vision/segmentation/paddleseg/python/README.md b/examples/vision/segmentation/paddleseg/python/README.md index 9b5163bdfe0..02b2e6ab5f5 100755 --- a/examples/vision/segmentation/paddleseg/python/README.md +++ b/examples/vision/segmentation/paddleseg/python/README.md @@ -27,6 +27,8 @@ python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_ python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True # 昆仑芯XPU推理 python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin +# 华为昇腾推理 +python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device ascend ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/segmentation/paddleseg/python/infer.py b/examples/vision/segmentation/paddleseg/python/infer.py index 0b19fedc2f8..6862330ed63 100755 --- a/examples/vision/segmentation/paddleseg/python/infer.py +++ b/examples/vision/segmentation/paddleseg/python/infer.py @@ -33,6 +33,9 @@ def build_option(args): if args.device.lower() == "kunlunxin": option.use_kunlunxin() + if args.device.lower() == "ascend": + option.use_ascend() + if args.use_trt: option.use_trt_backend() option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024], diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc index 834b2ccb3fc..f80d3fc8f5e 100644 --- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc +++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc @@ -62,7 +62,8 @@ void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) { std::cerr << "Failed to initialize." << std::endl; return; } - model.GetPreprocessor().DisableNormalizeAndPermute(); + model.GetPreprocessor().DisablePermute(); + model.GetPreprocessor().DisableNormalize(); fastdeploy::TimeCounter tc; tc.Start(); diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py index 4168d591df8..193a6dfb9b3 100644 --- a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py +++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py @@ -49,7 +49,8 @@ def build_option(args): runtime_option=runtime_option, model_format=fd.ModelFormat.RKNN) -model.preprocessor.disable_normalize_and_permute() +model.preprocessor.disable_normalize() +model.preprocessor.disable_permute() # 预测图片分割结果 im = cv2.imread(args.image) diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/README.md b/examples/vision/segmentation/paddleseg/rv1126/cpp/README.md index 5975ac3f670..a15dd0a6473 100755 --- a/examples/vision/segmentation/paddleseg/rv1126/cpp/README.md +++ b/examples/vision/segmentation/paddleseg/rv1126/cpp/README.md @@ -4,12 +4,14 @@ ## 部署准备 ### FastDeploy 交叉编译环境准备 -- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) +1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建) ### 模型准备 -- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 -- 2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.) -- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) +1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.) +3. 模型需要异构计算,异构计算文件可以参考:[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。 + +更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) ## 在 RV1126 上部署量化后的 PP-LiteSeg 分割模型 请按照以下步骤完成在 RV1126 上部署 PP-LiteSeg 量化模型: diff --git a/examples/vision/segmentation/paddleseg/sophgo/README.md b/examples/vision/segmentation/paddleseg/sophgo/README.md new file mode 100644 index 00000000000..afebe34519a --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/README.md @@ -0,0 +1,89 @@ +# PaddleSeg C++部署示例 + +## 支持模型列表 + +- PP-LiteSeg部署模型实现来自[PaddleSeg PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md) + +## 准备PP-LiteSeg部署模型以及转换模型 + +SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型,具体步骤如下: +- 下载Paddle模型[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) +- Pddle模型转换为ONNX模型,请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) +- ONNX模型转换bmodel模型的过程,请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir) + +## 模型转换example + +下面以[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz)为例子,教大家如何转换Paddle模型到SOPHGO-TPU模型 + +### 下载PP-LiteSeg-B(STDC2)-cityscapes-without-argmax模型,并转换为ONNX模型 +```shell +https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz +tar xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz + +# 修改PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer模型的输入shape,由动态输入变成固定输入 +python paddle_infer_shape.py --model_dir PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --save_dir pp_liteseg_fix \ + --input_shape_dict="{'x':[1,3,512,512]}" + +#将固定输入的Paddle模型转换成ONNX模型 +paddle2onnx --model_dir pp_liteseg_fix \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --save_file pp_liteseg.onnx \ + --enable_dev_version True +``` + +### 导出bmodel模型 + +以转换BM1684x的bmodel模型为例子,我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程,安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 +### 1. 安装 +``` shell +docker pull sophgo/tpuc_dev:latest + +# myname1234是一个示例,也可以设置其他名字 +docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest + +source ./envsetup.sh +./build.sh +``` + +### 2. ONNX模型转换为bmodel模型 +``` shell +mkdir pp_liteseg && cd pp_liteseg + +#在该文件中放入测试图片,同时将上一步转换的pp_liteseg.onnx放入该文件夹中 +cp -rf ${REGRESSION_PATH}/dataset/COCO2017 . +cp -rf ${REGRESSION_PATH}/image . +#放入onnx模型文件pp_liteseg.onnx + +mkdir workspace && cd workspace + +#将ONNX模型转换为mlir模型,其中参数--output_names可以通过NETRON查看 +model_transform.py \ + --model_name pp_liteseg \ + --model_def ../pp_liteseg.onnx \ + --input_shapes [[1,3,512,512]] \ + --mean 0.0,0.0,0.0 \ + --scale 0.0039216,0.0039216,0.0039216 \ + --keep_aspect_ratio \ + --pixel_format rgb \ + --output_names bilinear_interp_v2_6.tmp_0 \ + --test_input ../image/dog.jpg \ + --test_result pp_liteseg_top_outputs.npz \ + --mlir pp_liteseg.mlir + +#将mlir模型转换为BM1684x的F32 bmodel模型 +model_deploy.py \ + --mlir pp_liteseg.mlir \ + --quantize F32 \ + --chip bm1684x \ + --test_input pp_liteseg_in_f32.npz \ + --test_reference pp_liteseg_top_outputs.npz \ + --model pp_liteseg_1684x_f32.bmodel +``` +最终获得可以在BM1684x上能够运行的bmodel模型pp_liteseg_1684x_f32.bmodel。如果需要进一步对模型进行加速,可以将ONNX模型转换为INT8 bmodel,具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。 + +## 其他链接 +- [Cpp部署](./cpp) diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/sophgo/cpp/CMakeLists.txt new file mode 100644 index 00000000000..53837058969 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +set(ENABLE_LITE_BACKEND OFF) +#set(FDLIB ${FASTDEPLOY_INSTALL_DIR}) + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) +include_directories(${FastDeploy_INCLUDE_DIRS}) + +add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md b/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md new file mode 100644 index 00000000000..dac3ed565dd --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md @@ -0,0 +1,56 @@ +# PaddleSeg C++部署示例 + +本目录下提供`infer.cc`快速完成pp_liteseg模型在SOPHGO BM1684x板子上加速部署的示例。 + +在部署前,需确认以下两个步骤: + +1. 软硬件环境满足要求 +2. 根据开发环境,从头编译FastDeploy仓库 + +以上步骤请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)实现 + +## 生成基本目录文件 + +该例程由以下几个部分组成 +```text +. +├── CMakeLists.txt +├── build # 编译文件夹 +├── image # 存放图片的文件夹 +├── infer.cc +└── model # 存放模型文件的文件夹 +``` + +## 编译 + +### 编译并拷贝SDK到thirdpartys文件夹 + +请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)仓库编译SDK,编译完成后,将在build目录下生成fastdeploy-0.0.3目录. + +### 拷贝模型文件,以及配置文件至model文件夹 +将Paddle模型转换为SOPHGO bmodel模型,转换步骤参考[文档](../README.md) +将转换后的SOPHGO bmodel模型文件拷贝至model中 + +### 准备测试图片至image文件夹 +```bash +wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png +cp cityscapes_demo.png ./images +``` + +### 编译example + +```bash +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-0.0.3 +make +``` + +## 运行例程 + +```bash +./infer_demo model images/cityscapes_demo.png +``` + + +- [模型介绍](../../) +- [模型转换](../) diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/infer.cc b/examples/vision/segmentation/paddleseg/sophgo/cpp/infer.cc new file mode 100644 index 00000000000..934ab648c97 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/cpp/infer.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include "fastdeploy/vision.h" + +void SophgoInfer(const std::string& model_dir, const std::string& image_file) { + std::string model_file = model_dir + "/pp_liteseg_1684x_f32.bmodel"; + std::string params_file; + std::string config_file = model_dir + "/deploy.yaml"; + auto option = fastdeploy::RuntimeOption(); + option.UseSophgo(); + auto model_format = fastdeploy::ModelFormat::SOPHGO; + + auto model = fastdeploy::vision::segmentation::PaddleSegModel( + model_file, params_file, config_file, option, model_format); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + //model.GetPreprocessor().DisableNormalizeAndPermute(); + + fastdeploy::TimeCounter tc; + tc.Start(); + auto im_org = cv::imread(image_file); + + //the input of bmodel should be fixed + int new_width = 512; + int new_height = 512; + cv::Mat im; + cv::resize(im_org, im, cv::Size(new_width, new_height), cv::INTER_LINEAR); + + fastdeploy::vision::SegmentationResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + auto vis_im = fastdeploy::vision::VisSegmentation(im, res); + tc.End(); + tc.PrintInfo("PPSeg in Sophgo"); + + cv::imwrite("infer_sophgo.jpg", vis_im); + std::cout + << "Visualized result saved in ./infer_sophgo.jpg" + << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout + << "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./bmodel ./test.jpeg" + << std::endl; + return -1; + } + + SophgoInfer(argv[1], argv[2]); + return 0; +} + diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/README.md b/examples/vision/segmentation/paddleseg/sophgo/python/README.md new file mode 100644 index 00000000000..e04ad28c4d1 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/python/README.md @@ -0,0 +1,26 @@ +# PaddleSeg Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/sophgo.md) + +本目录下提供`infer.py`快速完成 pp_liteseg 在SOPHGO TPU上部署的示例。执行如下脚本即可完成 + +```bash +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/vision/segmentation/paddleseg/sophgo/python + +# 下载图片 +wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png + +# 推理 +python3 infer.py --model_file ./bmodel/pp_liteseg_1684x_f32.bmodel --config_file ./bmodel/deploy.yaml --image cityscapes_demo.png + +# 运行完成后返回结果如下所示 +运行结果保存在sophgo_img.png中 +``` + +## 其它文档 +- [pp_liteseg C++部署](../cpp) +- [转换 pp_liteseg SOPHGO模型文档](../README.md) diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/infer.py b/examples/vision/segmentation/paddleseg/sophgo/python/infer.py new file mode 100644 index 00000000000..1b294da6022 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/sophgo/python/infer.py @@ -0,0 +1,45 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, help="Path of model.") + parser.add_argument( + "--config_file", required=True, help="Path of config file.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + + return parser.parse_args() + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = fd.RuntimeOption() +runtime_option.use_sophgo() + +model_file = args.model +params_file = "" +config_file = args.config_file + +model = fd.vision.segmentation.PaddleSegModel( + model_file, + params_file, + config_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.SOPHGO) + +# 预测图片分类结果 +im_org = cv2.imread(args.image) +#bmodel 是静态模型,模型输入固定,这里设置为[512, 512] +im = cv2.resize(im_org, [512, 512], interpolation=cv2.INTER_LINEAR) +result = model.predict(im) +print(result) + +# 预测结果可视化 +vis_im = fd.vision.vis_segmentation(im, result, weight=0.5) +cv2.imwrite("sophgo_img.png", vis_im) diff --git a/fastdeploy/backends/backend.h b/fastdeploy/backends/backend.h index 02c94875d29..5affeb7567e 100644 --- a/fastdeploy/backends/backend.h +++ b/fastdeploy/backends/backend.h @@ -19,7 +19,6 @@ #include #include -#include "fastdeploy/backends/common/multiclass_nms.h" #include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_type.h" diff --git a/fastdeploy/backends/lite/lite_backend.h b/fastdeploy/backends/lite/lite_backend.h index 261a4af396e..7718dbb0f09 100755 --- a/fastdeploy/backends/lite/lite_backend.h +++ b/fastdeploy/backends/lite/lite_backend.h @@ -20,50 +20,10 @@ #include #include "fastdeploy/backends/backend.h" +#include "fastdeploy/backends/lite/option.h" #include "paddle_api.h" // NOLINT namespace fastdeploy { - -struct LiteBackendOption { - // cpu num threads - int threads = 1; - // lite power mode - // 0: LITE_POWER_HIGH - // 1: LITE_POWER_LOW - // 2: LITE_POWER_FULL - // 3: LITE_POWER_NO_BIND - // 4: LITE_POWER_RAND_HIGH - // 5: LITE_POWER_RAND_LOW - int power_mode = 3; - // enable fp16 - bool enable_fp16 = false; - // enable int8 - bool enable_int8 = false; - // optimized model dir for CxxConfig - std::string optimized_model_dir = ""; - // TODO(qiuyanjun): support more options for lite backend. - // Such as fp16, different device target (kARM/kXPU/kNPU/...) - std::string nnadapter_subgraph_partition_config_path = ""; - std::string nnadapter_subgraph_partition_config_buffer = ""; - std::string nnadapter_context_properties = ""; - std::string nnadapter_model_cache_dir = ""; - std::string nnadapter_mixed_precision_quantization_config_path = ""; - std::map>> - nnadapter_dynamic_shape_info = {{"", {{0}}}}; - std::vector nnadapter_device_names = {}; - bool enable_timvx = false; - bool enable_ascend = false; - bool enable_kunlunxin = false; - int device_id = 0; - int kunlunxin_l3_workspace_size = 0xfffc00; - bool kunlunxin_locked = false; - bool kunlunxin_autotune = true; - std::string kunlunxin_autotune_file = ""; - std::string kunlunxin_precision = "int16"; - bool kunlunxin_adaptive_seqlen = false; - bool kunlunxin_enable_multi_stream = false; -}; - // Convert data type from paddle lite to fastdeploy FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); diff --git a/fastdeploy/backends/lite/option.h b/fastdeploy/backends/lite/option.h new file mode 100755 index 00000000000..2a4ba7a330d --- /dev/null +++ b/fastdeploy/backends/lite/option.h @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +namespace fastdeploy { + +struct LiteBackendOption { + // cpu num threads + int threads = 1; + // lite power mode + // 0: LITE_POWER_HIGH + // 1: LITE_POWER_LOW + // 2: LITE_POWER_FULL + // 3: LITE_POWER_NO_BIND + // 4: LITE_POWER_RAND_HIGH + // 5: LITE_POWER_RAND_LOW + int power_mode = 3; + // enable fp16 + bool enable_fp16 = false; + // enable int8 + bool enable_int8 = false; + // optimized model dir for CxxConfig + std::string optimized_model_dir = ""; + // TODO(qiuyanjun): support more options for lite backend. + // Such as fp16, different device target (kARM/kXPU/kNPU/...) + std::string nnadapter_subgraph_partition_config_path = ""; + std::string nnadapter_subgraph_partition_config_buffer = ""; + std::string nnadapter_context_properties = ""; + std::string nnadapter_model_cache_dir = ""; + std::string nnadapter_mixed_precision_quantization_config_path = ""; + std::map>> + nnadapter_dynamic_shape_info = {{"", {{0}}}}; + std::vector nnadapter_device_names = {}; + bool enable_timvx = false; + bool enable_ascend = false; + bool enable_kunlunxin = false; + int device_id = 0; + int kunlunxin_l3_workspace_size = 0xfffc00; + bool kunlunxin_locked = false; + bool kunlunxin_autotune = true; + std::string kunlunxin_autotune_file = ""; + std::string kunlunxin_precision = "int16"; + bool kunlunxin_adaptive_seqlen = false; + bool kunlunxin_enable_multi_stream = false; +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu b/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu index 2fa63f36d98..3f1abb894a2 100755 --- a/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu +++ b/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu @@ -2,7 +2,9 @@ namespace fastdeploy { -__global__ void CudaCastKernel(const float* in, float* out, int edge, int out_bc_offset, int in_bc_offset, int ih, int iw, int oh, int ow, bool is_avg) { +__global__ void CudaCastKernel(const float* in, float* out, int edge, + int out_bc_offset, int in_bc_offset, int ih, + int iw, int oh, int ow, bool is_avg) { int position = blockDim.x * blockIdx.x + threadIdx.x; if (position >= edge) { return; @@ -14,38 +16,41 @@ __global__ void CudaCastKernel(const float* in, float* out, int edge, int out_b int hend = ceilf(static_cast((h + 1) * ih) / oh); int wstart = floorf(static_cast(w * iw) / ow); int wend = ceilf(static_cast((w + 1) * iw) / ow); - if(is_avg) { + if (is_avg) { out[position] = 0.0; } else { out[position] = in[offset * in_bc_offset + hstart * iw + wstart]; } for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { + for (int w = wstart; w < wend; ++w) { int input_idx = h * iw + w; - if(is_avg) { + if (is_avg) { out[position] = out[position] + in[offset * in_bc_offset + input_idx]; } else { - out[position] = max(out[position], in[offset * in_bc_offset + input_idx]); + out[position] = + max(out[position], in[offset * in_bc_offset + input_idx]); } } } out[position] = out[position] / ((hend - hstart) * (wend - wstart)); } -void CudaAdaptivePool(const std::vector& input_dims, const std::vector& output_dims, float* output, const float* input, void* compute_stream, const std::string& pooling_type){ +void CudaAdaptivePool(const std::vector& input_dims, + const std::vector& output_dims, float* output, + const float* input, void* compute_stream, + const std::string& pooling_type) { auto casted_compute_stream = reinterpret_cast(compute_stream); int out_bc_offset = output_dims[2] * output_dims[3]; int in_bc_offset = input_dims[2] * input_dims[3]; int jobs = 1; - for(int i : output_dims) { + for (int i : output_dims) { jobs *= i; } bool is_avg = pooling_type == "avg"; int threads = 256; int blocks = ceil(jobs / static_cast(threads)); CudaCastKernel<<>>( - input, - output, - jobs, out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]), int(output_dims[2]), int(output_dims[3]), is_avg); + input, output, jobs, out_bc_offset, in_bc_offset, int(input_dims[2]), + int(input_dims[3]), int(output_dims[2]), int(output_dims[3]), is_avg); } } // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h b/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h index 3e68908ed61..dc29c07dc0f 100755 --- a/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h +++ b/fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h @@ -15,21 +15,18 @@ #pragma once +#include #include #include -#include #include -#include #include +#include namespace fastdeploy { void CudaAdaptivePool(const std::vector& input_dims, - const std::vector& output_dims, - float* output, - const float* input, - void* compute_stream, + const std::vector& output_dims, float* output, + const float* input, void* compute_stream, const std::string& pooling_type); - } // namespace fastdeploy diff --git a/fastdeploy/backends/openvino/option.h b/fastdeploy/backends/openvino/option.h new file mode 100644 index 00000000000..fa18d5ef900 --- /dev/null +++ b/fastdeploy/backends/openvino/option.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +namespace fastdeploy { + +struct OpenVINOBackendOption { + std::string device = "CPU"; + int cpu_thread_num = -1; + int num_streams = 0; + std::map> shape_infos; + std::set cpu_operators{"MulticlassNms"}; +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/openvino/ov_backend.cc b/fastdeploy/backends/openvino/ov_backend.cc index 6858f85471c..553d116e096 100755 --- a/fastdeploy/backends/openvino/ov_backend.cc +++ b/fastdeploy/backends/openvino/ov_backend.cc @@ -341,8 +341,7 @@ int OpenVINOBackend::NumInputs() const { return input_infos_.size(); } int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); } bool OpenVINOBackend::Infer(std::vector& inputs, - std::vector* outputs, - bool copy_to_fd) { + std::vector* outputs, bool copy_to_fd) { if (inputs.size() != input_infos_.size()) { FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size() << ") should keep same with the inputs of this model(" @@ -365,19 +364,17 @@ bool OpenVINOBackend::Infer(std::vector& inputs, auto out_tensor_shape = out_tensor.get_shape(); std::vector shape(out_tensor_shape.begin(), out_tensor_shape.end()); - if(copy_to_fd) { - (*outputs)[i].Resize(shape, + if (copy_to_fd) { + (*outputs)[i].Resize(shape, OpenVINODataTypeToFD(out_tensor.get_element_type()), - output_infos_[i].name, - Device::CPU); + output_infos_[i].name, Device::CPU); memcpy((*outputs)[i].MutableData(), out_tensor.data(), - (*outputs)[i].Nbytes()); + (*outputs)[i].Nbytes()); } else { (*outputs)[i].name = output_infos_[i].name; - (*outputs)[i].SetExternalData(shape, - OpenVINODataTypeToFD(out_tensor.get_element_type()), - out_tensor.data(), - Device::CPU); + (*outputs)[i].SetExternalData( + shape, OpenVINODataTypeToFD(out_tensor.get_element_type()), + out_tensor.data(), Device::CPU); } } return true; diff --git a/fastdeploy/backends/openvino/ov_backend.h b/fastdeploy/backends/openvino/ov_backend.h index 2dadab29d2b..6c07de0f064 100644 --- a/fastdeploy/backends/openvino/ov_backend.h +++ b/fastdeploy/backends/openvino/ov_backend.h @@ -21,18 +21,11 @@ #include "fastdeploy/backends/backend.h" #include "fastdeploy/utils/unique_ptr.h" +#include "fastdeploy/backends/openvino/option.h" #include "openvino/openvino.hpp" namespace fastdeploy { -struct OpenVINOBackendOption { - std::string device = "CPU"; - int cpu_thread_num = -1; - int num_streams = 0; - std::map> shape_infos; - std::set cpu_operators{"MulticlassNms"}; -}; - class OpenVINOBackend : public BaseBackend { public: static ov::Core core_; @@ -47,8 +40,7 @@ class OpenVINOBackend : public BaseBackend { InitFromOnnx(const std::string& model_file, const OpenVINOBackendOption& option = OpenVINOBackendOption()); - bool Infer(std::vector& inputs, - std::vector* outputs, + bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; int NumInputs() const override; diff --git a/fastdeploy/backends/ort/ops/adaptive_pool2d.cc b/fastdeploy/backends/ort/ops/adaptive_pool2d.cc index 045b42eb627..6e7a1d6940a 100755 --- a/fastdeploy/backends/ort/ops/adaptive_pool2d.cc +++ b/fastdeploy/backends/ort/ops/adaptive_pool2d.cc @@ -25,30 +25,38 @@ struct OrtTensorDimensions : std::vector { } }; -void AdaptivePool2dKernel::CpuAdaptivePool(const std::vector& input_size, const std::vector& output_size, const float* input_data, float* output_data){ +void AdaptivePool2dKernel::CpuAdaptivePool( + const std::vector& input_size, + const std::vector& output_size, const float* input_data, + float* output_data) { int64_t in_bc_offset = input_size[2] * input_size[3]; int64_t out_bc_offset = output_size[2] * output_size[3]; - for (int64_t b = 0; b < output_size[0] ; b++) { - for (int64_t c = 0; c < output_size[1] ; c++) { - for(int64_t h = 0; h < output_size[2]; h++){ - int64_t hstart = std::floor( static_cast(h * input_size[2]) / output_size[2]); - int64_t hend = std::ceil(static_cast((h + 1) * input_size[2]) / output_size[2]); - for(int64_t w = 0; w < output_size[3]; w++){ - int64_t wstart = std::floor(static_cast(w * input_size[3]) / output_size[3]); - int64_t wend = std::ceil(static_cast((w + 1) * input_size[3]) / output_size[3]); + for (int64_t b = 0; b < output_size[0]; b++) { + for (int64_t c = 0; c < output_size[1]; c++) { + for (int64_t h = 0; h < output_size[2]; h++) { + int64_t hstart = + std::floor(static_cast(h * input_size[2]) / output_size[2]); + int64_t hend = std::ceil(static_cast((h + 1) * input_size[2]) / + output_size[2]); + for (int64_t w = 0; w < output_size[3]; w++) { + int64_t wstart = std::floor(static_cast(w * input_size[3]) / + output_size[3]); + int64_t wend = std::ceil(static_cast((w + 1) * input_size[3]) / + output_size[3]); int64_t out_offset = h * output_size[3] + w; output_data[out_offset] = 0; - for(auto i = hstart; i < hend; i++){ - for(auto j = wstart; j< wend; j++){ - if(pooling_type_ == "avg"){ + for (auto i = hstart; i < hend; i++) { + for (auto j = wstart; j < wend; j++) { + if (pooling_type_ == "avg") { output_data[out_offset] += input_data[i * input_size[3] + j]; } - if(pooling_type_ == "max"){ - output_data[out_offset] = std::max(output_data[out_offset], input_data[i * input_size[3] + j]); + if (pooling_type_ == "max") { + output_data[out_offset] = std::max( + output_data[out_offset], input_data[i * input_size[3] + j]); } } } - if(pooling_type_ == "avg"){ + if (pooling_type_ == "avg") { output_data[out_offset] /= ((hend - hstart) * (wend - wstart)); } } @@ -64,26 +72,27 @@ void AdaptivePool2dKernel::Compute(OrtKernelContext* context) { const float* input_data = reinterpret_cast(ort_.GetTensorData(input)); - + OrtTensorDimensions input_dim(ort_, input); output_size_[0] = input_dim[0]; std::vector input_size; - for(auto i: input_dim){ + for (auto i : input_dim) { input_size.push_back(i); } - + OrtValue* output = ort_.KernelContext_GetOutput( context, 0, output_size_.data(), output_size_.size()); - + float* output_data = ort_.GetTensorMutableData(output); - if(!strcmp(this->provider_, "CUDAExecutionProvider")){ + if (!strcmp(this->provider_, "CUDAExecutionProvider")) { #ifdef WITH_GPU auto compute_stream = ort_.KernelContext_GetGPUComputeStream(context); - CudaAdaptivePool(input_size, output_size_, output_data, input_data, compute_stream, pooling_type_); + CudaAdaptivePool(input_size, output_size_, output_data, input_data, + compute_stream, pooling_type_); #else - FDWARNING << "FastDeploy didn't compile with WITH_GPU. " - << "Will force to use CPU to run." << std::endl; - CpuAdaptivePool(input_size, output_size_, input_data, output_data); + FDWARNING << "FastDeploy didn't compile with WITH_GPU. " + << "Will force to use CPU to run." << std::endl; + CpuAdaptivePool(input_size, output_size_, input_data, output_data); #endif } else { CpuAdaptivePool(input_size, output_size_, input_data, output_data); @@ -91,9 +100,13 @@ void AdaptivePool2dKernel::Compute(OrtKernelContext* context) { } void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) { - pooling_type_ = ort_.KernelInfoGetAttribute(info, "pooling_type"); - output_size_ = ort_.KernelInfoGetAttribute>(info, "output_size"); - FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 && output_size_[3] > 0, "The output size of adaptive pool must be positive."); + pooling_type_ = + ort_.KernelInfoGetAttribute(info, "pooling_type"); + output_size_ = + ort_.KernelInfoGetAttribute>(info, "output_size"); + FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 && + output_size_[3] > 0, + "The output size of adaptive pool must be positive."); } } // namespace fastdeploy diff --git a/fastdeploy/backends/ort/ops/adaptive_pool2d.h b/fastdeploy/backends/ort/ops/adaptive_pool2d.h index 556ca033b5f..7d0acda1000 100755 --- a/fastdeploy/backends/ort/ops/adaptive_pool2d.h +++ b/fastdeploy/backends/ort/ops/adaptive_pool2d.h @@ -14,12 +14,12 @@ #pragma once -#include -#include -#include -#include #include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/utils/utils.h" +#include +#include +#include +#include #ifndef NON_64_PLATFORM #include "onnxruntime_cxx_api.h" // NOLINT @@ -38,9 +38,8 @@ struct AdaptivePool2dKernel { const char* provider_; public: - AdaptivePool2dKernel(Ort::CustomOpApi ort, - const OrtKernelInfo* info, - const char* provider) + AdaptivePool2dKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info, + const char* provider) : ort_(ort) { GetAttribute(info); provider_ = provider; @@ -51,9 +50,8 @@ struct AdaptivePool2dKernel { void Compute(OrtKernelContext* context); void CpuAdaptivePool(const std::vector& input_size, - const std::vector& output_size, - const float* input_data, - float* output_data); + const std::vector& output_size, + const float* input_data, float* output_data); }; struct AdaptivePool2dOp @@ -77,9 +75,8 @@ struct AdaptivePool2dOp return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; } - const char* GetExecutionProviderType() const { - return provider_; - } + const char* GetExecutionProviderType() const { return provider_; } + private: const char* provider_; }; diff --git a/fastdeploy/backends/ort/ops/multiclass_nms.cc b/fastdeploy/backends/ort/ops/multiclass_nms.cc index 36bc5dadfcd..590dc29a8b5 100644 --- a/fastdeploy/backends/ort/ops/multiclass_nms.cc +++ b/fastdeploy/backends/ort/ops/multiclass_nms.cc @@ -15,9 +15,9 @@ #ifndef NON_64_PLATFORM #include "fastdeploy/backends/ort/ops/multiclass_nms.h" -#include #include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/utils/utils.h" +#include namespace fastdeploy { diff --git a/fastdeploy/backends/ort/option.h b/fastdeploy/backends/ort/option.h new file mode 100644 index 00000000000..db58dbdd738 --- /dev/null +++ b/fastdeploy/backends/ort/option.h @@ -0,0 +1,44 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +namespace fastdeploy { + +struct OrtBackendOption { + // -1 means default + // 0: ORT_DISABLE_ALL + // 1: ORT_ENABLE_BASIC + // 2: ORT_ENABLE_EXTENDED + // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert) + int graph_optimization_level = -1; + int intra_op_num_threads = -1; + int inter_op_num_threads = -1; + // 0: ORT_SEQUENTIAL + // 1: ORT_PARALLEL + int execution_mode = -1; + bool use_gpu = false; + int gpu_id = 0; + void* external_stream_ = nullptr; + + // inside parameter, maybe remove next version + bool remove_multiclass_nms_ = false; + std::map custom_op_info_; +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc index 1e6d8bfb581..5ea47d6e4bf 100755 --- a/fastdeploy/backends/ort/ort_backend.cc +++ b/fastdeploy/backends/ort/ort_backend.cc @@ -16,8 +16,8 @@ #include -#include "fastdeploy/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/backends/ort/ops/adaptive_pool2d.h" +#include "fastdeploy/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/backends/ort/utils.h" #include "fastdeploy/core/float16.h" #include "fastdeploy/utils/utils.h" @@ -64,7 +64,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) { } else { OrtCUDAProviderOptions cuda_options; cuda_options.device_id = option.gpu_id; - if(option.external_stream_) { + if (option.external_stream_) { cuda_options.has_user_compute_stream = 1; cuda_options.user_compute_stream = option.external_stream_; } @@ -91,11 +91,11 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file, strcpy(ops[0].export_op_name, "MultiClassNMS"); strcpy(ops[1].op_name, "pool2d"); strcpy(ops[1].export_op_name, "AdaptivePool2d"); - + if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), &model_content_ptr, &model_content_size, 11, true, - verbose, true, true, true, ops.data(), - 2, "onnxruntime", nullptr, 0, "", &save_external)) { + verbose, true, true, true, ops.data(), 2, + "onnxruntime", nullptr, 0, "", &save_external)) { FDERROR << "Error occured while export PaddlePaddle to ONNX format." << std::endl; return false; @@ -105,11 +105,11 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file, model_content_ptr + model_content_size); delete[] model_content_ptr; model_content_ptr = nullptr; - if(save_external){ + if (save_external) { std::string model_file_name = "model.onnx"; std::fstream f(model_file_name, std::ios::out); FDASSERT(f.is_open(), "Can not open file: %s to save model.", - model_file_name.c_str()); + model_file_name.c_str()); f << onnx_model_proto; f.close(); return InitFromOnnx(model_file_name, option, false); @@ -182,7 +182,7 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file, } void OrtBackend::OrtValueToFDTensor(const Ort::Value& value, FDTensor* tensor, - const std::string& name, bool copy_to_fd) { + const std::string& name, bool copy_to_fd) { const auto info = value.GetTensorTypeAndShapeInfo(); const auto data_type = info.GetElementType(); size_t numel = info.GetElementCount(); @@ -216,15 +216,13 @@ void OrtBackend::OrtValueToFDTensor(const Ort::Value& value, FDTensor* tensor, memcpy(tensor->MutableData(), value_ptr, numel); } else { tensor->name = name; - tensor->SetExternalData( - shape, dtype, - const_cast(value_ptr), Device::CPU); + tensor->SetExternalData(shape, dtype, const_cast(value_ptr), + Device::CPU); } } bool OrtBackend::Infer(std::vector& inputs, - std::vector* outputs, - bool copy_to_fd) { + std::vector* outputs, bool copy_to_fd) { if (inputs.size() != inputs_desc_.size()) { FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() << ") should keep same with the inputs of this model(" @@ -256,8 +254,8 @@ bool OrtBackend::Infer(std::vector& inputs, std::vector ort_outputs = binding_->GetOutputValues(); outputs->resize(ort_outputs.size()); for (size_t i = 0; i < ort_outputs.size(); ++i) { - OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), - outputs_desc_[i].name, copy_to_fd); + OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name, + copy_to_fd); } return true; @@ -310,11 +308,13 @@ void OrtBackend::InitCustomOperators() { if (custom_operators_.size() == 0) { MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{}; custom_operators_.push_back(multiclass_nms); - if(option_.use_gpu){ - AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CUDAExecutionProvider"}; + if (option_.use_gpu) { + AdaptivePool2dOp* adaptive_pool2d = + new AdaptivePool2dOp{"CUDAExecutionProvider"}; custom_operators_.push_back(adaptive_pool2d); - }else{ - AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CPUExecutionProvider"}; + } else { + AdaptivePool2dOp* adaptive_pool2d = + new AdaptivePool2dOp{"CPUExecutionProvider"}; custom_operators_.push_back(adaptive_pool2d); } } diff --git a/fastdeploy/backends/ort/ort_backend.h b/fastdeploy/backends/ort/ort_backend.h index ab5f38e6128..3b4fae208be 100644 --- a/fastdeploy/backends/ort/ort_backend.h +++ b/fastdeploy/backends/ort/ort_backend.h @@ -18,8 +18,10 @@ #include #include #include +#include #include "fastdeploy/backends/backend.h" +#include "fastdeploy/backends/ort/option.h" #include "onnxruntime_cxx_api.h" // NOLINT namespace fastdeploy { @@ -30,27 +32,6 @@ struct OrtValueInfo { ONNXTensorElementDataType dtype; }; -struct OrtBackendOption { - // -1 means default - // 0: ORT_DISABLE_ALL - // 1: ORT_ENABLE_BASIC - // 2: ORT_ENABLE_EXTENDED - // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert) - int graph_optimization_level = -1; - int intra_op_num_threads = -1; - int inter_op_num_threads = -1; - // 0: ORT_SEQUENTIAL - // 1: ORT_PARALLEL - int execution_mode = -1; - bool use_gpu = false; - int gpu_id = 0; - void* external_stream_ = nullptr; - - // inside parameter, maybe remove next version - bool remove_multiclass_nms_ = false; - std::map custom_op_info_; -}; - class OrtBackend : public BaseBackend { public: OrtBackend() {} @@ -67,8 +48,7 @@ class OrtBackend : public BaseBackend { const OrtBackendOption& option = OrtBackendOption(), bool from_memory_buffer = false); - bool Infer(std::vector& inputs, - std::vector* outputs, + bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; int NumInputs() const override { return inputs_desc_.size(); } diff --git a/fastdeploy/backends/paddle/option.h b/fastdeploy/backends/paddle/option.h new file mode 100644 index 00000000000..24fda827795 --- /dev/null +++ b/fastdeploy/backends/paddle/option.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include "fastdeploy/backends/tensorrt/option.h" + + +namespace fastdeploy { + +struct IpuOption { + int ipu_device_num; + int ipu_micro_batch_size; + bool ipu_enable_pipelining; + int ipu_batches_per_step; + bool ipu_enable_fp16; + int ipu_replica_num; + float ipu_available_memory_proportion; + bool ipu_enable_half_partial; +}; + +struct PaddleBackendOption { + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + + std::string model_buffer_ = ""; + std::string params_buffer_ = ""; + size_t model_buffer_size_ = 0; + size_t params_buffer_size_ = 0; + bool model_from_memory_ = false; + +#ifdef WITH_GPU + bool use_gpu = true; +#else + bool use_gpu = false; +#endif + bool enable_mkldnn = true; + + bool enable_log_info = false; + + bool enable_trt = false; + TrtBackendOption trt_option; + bool collect_shape = false; + std::vector trt_disabled_ops_{}; + +#ifdef WITH_IPU + bool use_ipu = true; + IpuOption ipu_option; +#else + bool use_ipu = false; +#endif + + int mkldnn_cache_size = 1; + int cpu_thread_num = 8; + // initialize memory size(MB) for GPU + int gpu_mem_init_size = 100; + // gpu device id + int gpu_id = 0; + bool enable_pinned_memory = false; + void* external_stream_ = nullptr; + + std::vector delete_pass_names = {}; +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc index 67f1eb762ea..de2ac622313 100644 --- a/fastdeploy/backends/paddle/paddle_backend.cc +++ b/fastdeploy/backends/paddle/paddle_backend.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "fastdeploy/backends/paddle/paddle_backend.h" -#include "fastdeploy/utils/path.h" + #include +#include "fastdeploy/utils/path.h" + namespace fastdeploy { void PaddleBackend::BuildOption(const PaddleBackendOption& option) { @@ -26,7 +28,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { config_.SetExecStream(option_.external_stream_); } if (option.enable_trt) { -#ifdef ENABLE_TRT_BACKEND config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_); auto precision = paddle_infer::PrecisionType::kFloat32; if (option.trt_option.enable_fp16) { @@ -46,11 +47,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { option.trt_option.max_batch_size, 3, precision, use_static); SetTRTDynamicShapeToConfig(option); -#else - FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so " - "will fallback to GPU with Paddle Inference Backend." - << std::endl; -#endif } } else if (option.use_ipu) { #ifdef WITH_IPU @@ -100,11 +96,13 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, return false; } - // The input/output information get from predictor is not right, use PaddleReader instead now + // The input/output information get from predictor is not right, use + // PaddleReader instead now std::string contents; if (option.model_from_memory_) { - config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); + config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, + params_file.c_str(), option.params_buffer_size_); contents = model_file; } else { config_.SetModel(model_file, params_file); @@ -115,14 +113,14 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, config_.EnableMemoryOptim(); BuildOption(option); auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); - // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode + // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to + // int8 mode if (reader.is_quantize_model) { if (option.use_gpu) { FDWARNING << "The loaded model is a quantized model, while inference on " "GPU, please use TensorRT backend to get better performance." << std::endl; if (option.enable_trt) { -#ifdef ENABLE_TRT_BACKEND bool use_static = false; if (option.trt_option.serialize_file != "") { FDWARNING @@ -138,7 +136,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, paddle_infer::PrecisionType::kInt8, use_static, false); SetTRTDynamicShapeToConfig(option); -#endif } } if (option.enable_mkldnn) { @@ -162,14 +159,13 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, outputs_desc_.resize(reader.num_outputs); for (int i = 0; i < reader.num_outputs; ++i) { std::string name(reader.outputs[i].name); - std::vector shape(reader.outputs[i].shape, - reader.outputs[i].shape + - reader.outputs[i].rank); + std::vector shape( + reader.outputs[i].shape, + reader.outputs[i].shape + reader.outputs[i].rank); outputs_desc_[i].name = name; outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype); } -#ifdef ENABLE_TRT_BACKEND if (option.collect_shape) { // Set the shape info file. std::string curr_model_dir = "./"; @@ -182,7 +178,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, FDINFO << "Start generating shape range info file." << std::endl; paddle_infer::Config analysis_config; if (option.model_from_memory_) { - analysis_config.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); + analysis_config.SetModelBuffer( + model_file.c_str(), option.model_buffer_size_, params_file.c_str(), + option.params_buffer_size_); } else { analysis_config.SetModel(model_file, params_file); } @@ -202,7 +200,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, << " to set TensorRT dynamic shape." << std::endl; config_.EnableTunedTensorRtDynamicShape(shape_range_info, false); } -#endif predictor_ = paddle_infer::CreatePredictor(config_); initialized_ = true; return true; @@ -281,7 +278,6 @@ std::unique_ptr PaddleBackend::Clone(void* stream, int device_id) { return new_backend; } -#ifdef ENABLE_TRT_BACKEND void PaddleBackend::SetTRTDynamicShapeToConfig( const PaddleBackendOption& option) { std::map> max_shape; @@ -351,30 +347,30 @@ void PaddleBackend::CollectShapeRun( tensor->Reshape(shape_value); auto dtype = input_type[name]; switch (dtype) { - case paddle_infer::DataType::FLOAT32: { - std::vector input_data(shape_num, 1.0); - tensor->CopyFromCpu(input_data.data()); - break; - } - case paddle_infer::DataType::INT32: { - std::vector input_data(shape_num, 1); - tensor->CopyFromCpu(input_data.data()); - break; - } - case paddle_infer::DataType::INT64: { - std::vector input_data(shape_num, 1); - tensor->CopyFromCpu(input_data.data()); - break; - } - default: { - FDASSERT(false, "Input data Paddle backend only supports " - "FP32/INT32/INT64 currently."); - break; - } + case paddle_infer::DataType::FLOAT32: { + std::vector input_data(shape_num, 1.0); + tensor->CopyFromCpu(input_data.data()); + break; + } + case paddle_infer::DataType::INT32: { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + break; + } + case paddle_infer::DataType::INT64: { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + break; + } + default: { + FDASSERT(false, + "Input data Paddle backend only supports " + "FP32/INT32/INT64 currently."); + break; + } } } predictor->Run(); } -#endif } // namespace fastdeploy diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h index 1d4c345966f..47274162c2b 100644 --- a/fastdeploy/backends/paddle/paddle_backend.h +++ b/fastdeploy/backends/paddle/paddle_backend.h @@ -20,74 +20,15 @@ #include #include "fastdeploy/backends/backend.h" +#include "fastdeploy/backends/paddle/option.h" #ifdef ENABLE_PADDLE_FRONTEND #include "paddle2onnx/converter.h" #endif #include "fastdeploy/utils/unique_ptr.h" #include "paddle_inference_api.h" // NOLINT -#ifdef ENABLE_TRT_BACKEND -#include "fastdeploy/backends/tensorrt/trt_backend.h" -#endif - namespace fastdeploy { -struct IpuOption { - int ipu_device_num; - int ipu_micro_batch_size; - bool ipu_enable_pipelining; - int ipu_batches_per_step; - bool ipu_enable_fp16; - int ipu_replica_num; - float ipu_available_memory_proportion; - bool ipu_enable_half_partial; -}; - -struct PaddleBackendOption { - std::string model_file = ""; // Path of model file - std::string params_file = ""; // Path of parameters file, can be empty - - std::string model_buffer_ = ""; - std::string params_buffer_ = ""; - size_t model_buffer_size_ = 0; - size_t params_buffer_size_ = 0; - bool model_from_memory_ = false; - -#ifdef WITH_GPU - bool use_gpu = true; -#else - bool use_gpu = false; -#endif - bool enable_mkldnn = true; - - bool enable_log_info = false; - - bool enable_trt = false; -#ifdef ENABLE_TRT_BACKEND - TrtBackendOption trt_option; - bool collect_shape = false; - std::vector trt_disabled_ops_{}; -#endif - -#ifdef WITH_IPU - bool use_ipu = true; - IpuOption ipu_option; -#else - bool use_ipu = false; -#endif - - int mkldnn_cache_size = 1; - int cpu_thread_num = 8; - // initialize memory size(MB) for GPU - int gpu_mem_init_size = 100; - // gpu device id - int gpu_id = 0; - bool enable_pinned_memory = false; - void* external_stream_ = nullptr; - - std::vector delete_pass_names = {}; -}; - // convert FD device to paddle place type paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device); @@ -132,7 +73,6 @@ class PaddleBackend : public BaseBackend { std::vector GetOutputInfos() override; private: -#ifdef ENABLE_TRT_BACKEND void CollectShapeRun(paddle_infer::Predictor* predictor, const std::map>& shape) const; @@ -142,7 +82,6 @@ class PaddleBackend : public BaseBackend { std::map>* min_shape, std::map>* opt_shape) const; void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option); -#endif PaddleBackendOption option_; paddle_infer::Config config_; std::shared_ptr predictor_; diff --git a/fastdeploy/backends/paddle/util.cc b/fastdeploy/backends/paddle/util.cc index eff6a361fb1..fa6e757f382 100644 --- a/fastdeploy/backends/paddle/util.cc +++ b/fastdeploy/backends/paddle/util.cc @@ -30,24 +30,24 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, auto place = ConvertFDDeviceToPlace(fd_tensor.device); if (fd_tensor.dtype == FDDataType::FP32) { if (place == paddle_infer::PlaceType::kGPU) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, place); + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); } else { tensor->CopyFromCpu(static_cast(fd_tensor.Data())); } return; } else if (fd_tensor.dtype == FDDataType::INT32) { if (place == paddle_infer::PlaceType::kGPU) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, place); + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); } else { tensor->CopyFromCpu(static_cast(fd_tensor.Data())); } return; } else if (fd_tensor.dtype == FDDataType::INT64) { if (place == paddle_infer::PlaceType::kGPU) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, place); + tensor->ShareExternalData(static_cast(fd_tensor.Data()), + shape, place); } else { tensor->CopyFromCpu(static_cast(fd_tensor.Data())); } @@ -62,13 +62,12 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, } void PaddleTensorToFDTensor(std::unique_ptr& tensor, - FDTensor* fd_tensor, - bool copy_to_fd) { + FDTensor* fd_tensor, bool copy_to_fd) { auto fd_dtype = PaddleDataTypeToFD(tensor->type()); std::vector shape; auto tmp_shape = tensor->shape(); shape.assign(tmp_shape.begin(), tmp_shape.end()); - if(copy_to_fd) { + if (copy_to_fd) { fd_tensor->Resize(shape, fd_dtype, tensor->name()); if (fd_tensor->dtype == FDDataType::FP32) { tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); @@ -79,9 +78,9 @@ void PaddleTensorToFDTensor(std::unique_ptr& tensor, } else if (fd_tensor->dtype == FDDataType::INT64) { tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); return; - } + } FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", - Str(fd_tensor->dtype).c_str()); + Str(fd_tensor->dtype).c_str()); } else { paddle_infer::PlaceType place; int size = 0; @@ -99,17 +98,17 @@ void PaddleTensorToFDTensor(std::unique_ptr& tensor, } else if (fd_dtype == FDDataType::UINT8) { out_data = tensor->data(&place, &size); } else { - FDASSERT(false, "Unexpected data type(%s) while infer shared with PaddleBackend.", + FDASSERT( + false, + "Unexpected data type(%s) while infer shared with PaddleBackend.", Str(fd_dtype).c_str()); } Device device = Device::CPU; - if(place == paddle_infer::PlaceType::kGPU) { + if (place == paddle_infer::PlaceType::kGPU) { device = Device::GPU; } fd_tensor->name = tensor->name(); - fd_tensor->SetExternalData( - shape, fd_dtype, - out_data, device); + fd_tensor->SetExternalData(shape, fd_dtype, out_data, device); } } @@ -153,7 +152,10 @@ FDDataType ReaderDataTypeToFD(int32_t dtype) { } else if (dtype == 6) { fd_dtype = FDDataType::FP16; } else { - FDASSERT(false, "Unexpected data type: %d while call ReaderDataTypeToFD in PaddleBackend.", dtype); + FDASSERT(false, + "Unexpected data type: %d while call ReaderDataTypeToFD in " + "PaddleBackend.", + dtype); } return fd_dtype; } diff --git a/fastdeploy/backends/poros/common/compile.h b/fastdeploy/backends/poros/common/compile.h index c7cbc6756e2..8e09c3664fc 100755 --- a/fastdeploy/backends/poros/common/compile.h +++ b/fastdeploy/backends/poros/common/compile.h @@ -14,14 +14,14 @@ #pragma once -#include #include -#include #include +#include +#include -#include "torch/script.h" #include "iengine.h" #include "poros_module.h" +#include "torch/script.h" namespace baidu { namespace mirana { @@ -36,28 +36,29 @@ namespace poros { * @return porosmodule * @retval !nullptr => succeed nullptr => failed **/ -std::unique_ptr Compile(const torch::jit::Module& module, - const std::vector >& prewarm_datas, +std::unique_ptr +Compile(const torch::jit::Module& module, + const std::vector>& prewarm_datas, const PorosOptions& options); class Compiler { -public: - typedef std::unordered_map engine_map_t; - typedef std::vector > ivalue_vec_t; + public: + typedef std::unordered_map engine_map_t; + typedef std::vector> ivalue_vec_t; - Compiler() : _origin_module(NULL) {} - ~Compiler(); + Compiler() : _origin_module(NULL) {} + ~Compiler(); - /** + /** * @brief initial Compiler * * @param [in] options : poros options * @return int * @retval 0 => succeed <0 => failed **/ - int init(const PorosOptions& options); + int init(const PorosOptions& options); - /** + /** * @brief compile whole graph * * @param [in] origin_module @@ -66,13 +67,12 @@ class Compiler { * @return int * @retval 0 => succeed <0 => failed **/ - int compile(const torch::jit::Module& origin_module, - const ivalue_vec_t& prewarm_datas, - torch::jit::Module* optimized_module); - -private: + int compile(const torch::jit::Module& origin_module, + const ivalue_vec_t& prewarm_datas, + torch::jit::Module* optimized_module); - /** + private: + /** * @brief preprocess this calculation graph * * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue @@ -80,23 +80,25 @@ class Compiler { * @return int * @retval 0 => succeed <0 => failed **/ - int preprocess_graph(const ivalue_vec_t& prewarm_datas, std::shared_ptr& graph); + int preprocess_graph(const ivalue_vec_t& prewarm_datas, + std::shared_ptr& graph); - /** + /** * @brief segement this calculation graph * * @param [in/out] graph * @return int * @retval 0 => succeed <0 => failed **/ - int segment_graph(std::shared_ptr& graph); + int segment_graph(std::shared_ptr& graph); - // Split subgraph(block) - // The divided subgraph, as a subgraph, is associated with the block - int segment_block(torch::jit::Block& block, IEngine* engine, int current_depth); + // Split subgraph(block) + // The divided subgraph, as a subgraph, is associated with the block + int segment_block(torch::jit::Block& block, IEngine* engine, + int current_depth); - // Subgraph optimization - /** + // Subgraph optimization + /** * @brief Subgraph optimization * * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue @@ -105,15 +107,15 @@ class Compiler { * @return int * @retval 0 => succeed <0 => failed **/ - int optimize_subgraph(const ivalue_vec_t& prewarm_datas, - const std::shared_ptr& opt_graph, - torch::jit::Module* optimized_module); + int optimize_subgraph(const ivalue_vec_t& prewarm_datas, + const std::shared_ptr& opt_graph, + torch::jit::Module* optimized_module); - // Subgraph optimization(block) - int optimize_subblock(torch::jit::Block* block, - torch::jit::Module* optimized_module); + // Subgraph optimization(block) + int optimize_subblock(torch::jit::Block* block, + torch::jit::Module* optimized_module); - /** + /** * @brief Compile the subgraph into a new graph based on the engine * * @param [in] engine : The engine used by the subgraph @@ -121,32 +123,32 @@ class Compiler { * @return [out] module : Transformed model * @retval 0 => succeed <0 => failed **/ - int transform(IEngine* engine, torch::jit::Node& subgraph_node, - torch::jit::Module& module); + int transform(IEngine* engine, torch::jit::Node& subgraph_node, + torch::jit::Module& module); - /** + /** * @brief Select engine based on subgraph and options * * @param [in] node : Jit Node * @return int * @retval 0 => succeed <0 => failed **/ - IEngine* select_engine(const torch::jit::Node* n); + IEngine* select_engine(const torch::jit::Node* n); - /** + /** * @brief destory * * @return void **/ - void close(); - -private: - int _max_segment_depth{5}; // Maximum subgraph segmentation depth - ivalue_vec_t _prewarm_datas; // Prewarm datas - PorosOptions _options; - engine_map_t _engine_map; // The engine used to record the subgraph - const torch::jit::Module* _origin_module; // Origin_module - std::atomic _engine_index = {0}; // Record engine index + void close(); + + private: + int _max_segment_depth{5}; // Maximum subgraph segmentation depth + ivalue_vec_t _prewarm_datas; // Prewarm datas + PorosOptions _options; + engine_map_t _engine_map; // The engine used to record the subgraph + const torch::jit::Module* _origin_module; // Origin_module + std::atomic _engine_index = {0}; // Record engine index }; /** @@ -158,9 +160,10 @@ class Compiler { * @return optimized_module * @retval !nullptr => succeed nullptr => failed **/ -std::unique_ptr CompileGraph(const torch::jit::Module& module, - const std::vector >& prewarm_datas, - const PorosOptions& options); +std::unique_ptr +CompileGraph(const torch::jit::Module& module, + const std::vector>& prewarm_datas, + const PorosOptions& options); } // namespace poros } // namespace mirana diff --git a/fastdeploy/backends/poros/common/iengine.h b/fastdeploy/backends/poros/common/iengine.h index c945621c1f3..e5368511707 100755 --- a/fastdeploy/backends/poros/common/iengine.h +++ b/fastdeploy/backends/poros/common/iengine.h @@ -17,9 +17,9 @@ #include //from pytorch -#include "torch/script.h" -#include "torch/csrc/jit/ir/ir.h" #include "ATen/core/interned_strings.h" +#include "torch/csrc/jit/ir/ir.h" +#include "torch/script.h" #include "plugin_create.h" @@ -28,50 +28,51 @@ namespace mirana { namespace poros { struct PorosGraph { - torch::jit::Graph* graph = NULL; - torch::jit::Node* node = NULL; + torch::jit::Graph* graph = NULL; + torch::jit::Node* node = NULL; }; typedef uint64_t EngineID; -class IEngine : public IPlugin, public torch::CustomClassHolder{ -public: - virtual ~IEngine() {} +class IEngine : public IPlugin, public torch::CustomClassHolder { + public: + virtual ~IEngine() {} - /** + /** * @brief init, initialization must be successful if the init is successful * @return int * @retval 0 => success, <0 => fail **/ - virtual int init() = 0; + virtual int init() = 0; - /** + /** * @brief During compilation, the subgraph is converted into the graph structure of the corresponding engine and stored inside the engine, so that the execute_engine at runtime can be called * @param [in] sub_graph : subgraph * @return [res]int * @retval 0 => success, <0 => fail **/ - virtual int transform(const PorosGraph& sub_graph) = 0; + virtual int transform(const PorosGraph& sub_graph) = 0; - /** + /** * @brief Subgraph execution period logic * @param [in] inputs : input tensor * @return [res] output tensor **/ - virtual std::vector excute_engine(const std::vector& inputs) = 0; - - virtual void register_module_attribute(const std::string& name, torch::jit::Module& module) = 0; + virtual std::vector + excute_engine(const std::vector& inputs) = 0; - // Logo - virtual const std::string who_am_i() = 0; + virtual void register_module_attribute(const std::string& name, + torch::jit::Module& module) = 0; - // Whether the node is supported by the current engine - bool is_node_supported(const torch::jit::Node* node); + // Logo + virtual const std::string who_am_i() = 0; -public: - std::pair _num_io; // Number of input/output parameters - EngineID _id; + // Whether the node is supported by the current engine + bool is_node_supported(const torch::jit::Node* node); + public: + std::pair _num_io; // Number of input/output parameters + EngineID _id; }; } // namespace poros diff --git a/fastdeploy/backends/poros/common/plugin_create.h b/fastdeploy/backends/poros/common/plugin_create.h index d160f2440c0..61b5e8da173 100755 --- a/fastdeploy/backends/poros/common/plugin_create.h +++ b/fastdeploy/backends/poros/common/plugin_create.h @@ -14,52 +14,56 @@ #pragma once -#include #include +#include namespace baidu { namespace mirana { namespace poros { class IPlugin { -public: - virtual ~IPlugin() {} - virtual const std::string who_am_i() = 0; + public: + virtual ~IPlugin() {} + virtual const std::string who_am_i() = 0; }; typedef IPlugin* (*plugin_creator_t)(); typedef std::unordered_map plugin_creator_map_t; IPlugin* create_plugin(const std::string& plugin_name); -IPlugin* create_plugin(const std::string& plugin_name, const plugin_creator_map_t& plugin_creator_map); +IPlugin* create_plugin(const std::string& plugin_name, + const plugin_creator_map_t& plugin_creator_map); void create_all_plugins(const plugin_creator_map_t& plugin_creator_map, - std::unordered_map& plugin_m); + std::unordered_map& plugin_m); //void create_all_plugins(std::unordered_map& plugin_m); -template -IPlugin* default_plugin_creator() { - return new (std::nothrow)PluginType; +template IPlugin* default_plugin_creator() { + return new (std::nothrow) PluginType; } -void register_plugin_creator(const std::string& plugin_name, plugin_creator_t creator); void register_plugin_creator(const std::string& plugin_name, - plugin_creator_t creator, plugin_creator_map_t& plugin_creator_map); + plugin_creator_t creator); +void register_plugin_creator(const std::string& plugin_name, + plugin_creator_t creator, + plugin_creator_map_t& plugin_creator_map); template void register_plugin_class(const std::string& plugin_name) { - return register_plugin_creator(plugin_name, default_plugin_creator); + return register_plugin_creator(plugin_name, + default_plugin_creator); } // This version is recommended template -void register_plugin_class(const std::string& plugin_name, plugin_creator_map_t& plugin_creator_map) { - return register_plugin_creator(plugin_name, default_plugin_creator, plugin_creator_map); +void register_plugin_class(const std::string& plugin_name, + plugin_creator_map_t& plugin_creator_map) { + return register_plugin_creator( + plugin_name, default_plugin_creator, plugin_creator_map); } -}//poros -}//mirana -}//baidu - +} // namespace poros +} // namespace mirana +} // namespace baidu /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/fastdeploy/backends/poros/common/poros_module.h b/fastdeploy/backends/poros/common/poros_module.h index 74ba485d42d..71cabc8e6a6 100755 --- a/fastdeploy/backends/poros/common/poros_module.h +++ b/fastdeploy/backends/poros/common/poros_module.h @@ -14,53 +14,45 @@ #pragma once -#include -#include "torch/script.h" #include "torch/csrc/jit/jit_log.h" +#include "torch/script.h" +#include // #include "ATen/Context.h" namespace baidu { namespace mirana { namespace poros { -enum Device : int8_t { - GPU = 0, - CPU, - XPU, - UNKNOW -}; +enum Device : int8_t { GPU = 0, CPU, XPU, UNKNOW }; struct PorosOptions { - Device device = GPU; - bool debug = false; - bool use_fp16 = false; - bool is_dynamic = false; - bool long_to_int = true; - uint64_t max_workspace_size = 1ULL << 30; - int32_t device_id = -1; - int32_t unconst_ops_thres = -1; - bool use_nvidia_tf32 = false; + Device device = GPU; + bool debug = false; + bool use_fp16 = false; + bool is_dynamic = false; + bool long_to_int = true; + uint64_t max_workspace_size = 1ULL << 30; + int32_t device_id = -1; + int32_t unconst_ops_thres = -1; + bool use_nvidia_tf32 = false; }; class PorosModule : public torch::jit::Module { -public: - PorosModule(torch::jit::Module module) : torch::jit::Module(module) { - } - ~PorosModule() = default; - - void to_device(Device device){ - _options.device = device; - } + public: + PorosModule(torch::jit::Module module) : torch::jit::Module(module) {} + ~PorosModule() = default; - //c10::IValue forward(std::vector inputs); - //void save(const std::string& filename); -public: - PorosOptions _options; + void to_device(Device device) { _options.device = device; } + //c10::IValue forward(std::vector inputs); + //void save(const std::string& filename); + public: + PorosOptions _options; }; //via porosmodule.save -std::unique_ptr Load(const std::string& filename, const PorosOptions& options); +std::unique_ptr Load(const std::string& filename, + const PorosOptions& options); } // namespace poros } // namespace mirana diff --git a/fastdeploy/backends/poros/option.h b/fastdeploy/backends/poros/option.h new file mode 100755 index 00000000000..4d9a11a07a3 --- /dev/null +++ b/fastdeploy/backends/poros/option.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace fastdeploy { + +struct PorosBackendOption { +#ifdef WITH_GPU + bool use_gpu = true; +#else + bool use_gpu = false; +#endif + int gpu_id = 0; + bool long_to_int = true; + // There is calculation precision in tf32 mode on A10, it can bring some + // performance improvement, but there may be diff + bool use_nvidia_tf32 = false; + // Threshold for the number of non-const ops + int32_t unconst_ops_thres = -1; + std::string poros_file = ""; + std::vector prewarm_datatypes = {FDDataType::FP32}; + // TRT options + bool enable_fp16 = false; + bool enable_int8 = false; + bool is_dynamic = false; + size_t max_batch_size = 32; + size_t max_workspace_size = 1 << 30; +}; + +} // namespace fastdeploy diff --git a/fastdeploy/backends/poros/poros_backend.cc b/fastdeploy/backends/poros/poros_backend.cc index a7c96f7cd0e..26a167ebec9 100755 --- a/fastdeploy/backends/poros/poros_backend.cc +++ b/fastdeploy/backends/poros/poros_backend.cc @@ -188,8 +188,7 @@ bool PorosBackend::InitFromPoros(const std::string& model_file, } bool PorosBackend::Infer(std::vector& inputs, - std::vector* outputs, - bool copy_to_fd) { + std::vector* outputs, bool copy_to_fd) { // Convert FD Tensor to PyTorch Tensor std::vector poros_inputs; bool is_backend_cuda = diff --git a/fastdeploy/backends/poros/poros_backend.h b/fastdeploy/backends/poros/poros_backend.h index 00dfe4444cd..3f704179bc2 100755 --- a/fastdeploy/backends/poros/poros_backend.h +++ b/fastdeploy/backends/poros/poros_backend.h @@ -20,35 +20,12 @@ #include #include "fastdeploy/backends/backend.h" - +#include "fastdeploy/backends/poros/option.h" #include "fastdeploy/backends/poros/common/compile.h" #include "fastdeploy/backends/poros/common/poros_module.h" namespace fastdeploy { -struct PorosBackendOption { -#ifdef WITH_GPU - bool use_gpu = true; -#else - bool use_gpu = false; -#endif - int gpu_id = 0; - bool long_to_int = true; - // There is calculation precision in tf32 mode on A10, it can bring some - // performance improvement, but there may be diff - bool use_nvidia_tf32 = false; - // Threshold for the number of non-const ops - int32_t unconst_ops_thres = -1; - std::string poros_file = ""; - std::vector prewarm_datatypes = {FDDataType::FP32}; - // TRT options - bool enable_fp16 = false; - bool enable_int8 = false; - bool is_dynamic = false; - size_t max_batch_size = 32; - size_t max_workspace_size = 1 << 30; -}; - // Convert data type from fastdeploy to poros at::ScalarType GetPorosDtype(const FDDataType& fd_dtype); @@ -74,9 +51,9 @@ class PorosBackend : public BaseBackend { void BuildOption(const PorosBackendOption& option); - bool InitFromTorchScript( - const std::string& model_file, - const PorosBackendOption& option = PorosBackendOption()); + bool + InitFromTorchScript(const std::string& model_file, + const PorosBackendOption& option = PorosBackendOption()); bool InitFromPoros(const std::string& model_file, const PorosBackendOption& option = PorosBackendOption()); @@ -85,8 +62,7 @@ class PorosBackend : public BaseBackend { std::vector>& prewarm_tensors, const PorosBackendOption& option = PorosBackendOption()); - bool Infer(std::vector& inputs, - std::vector* outputs, + bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; int NumInputs() const { return _numinputs; } diff --git a/fastdeploy/backends/poros/utils.cc b/fastdeploy/backends/poros/utils.cc index e7b749b5812..ee4b5f681d8 100644 --- a/fastdeploy/backends/poros/utils.cc +++ b/fastdeploy/backends/poros/utils.cc @@ -23,32 +23,32 @@ namespace fastdeploy { std::string AtType2String(const at::ScalarType& dtype) { std::string out; switch (dtype) { - case at::kByte: - out = "at::kByte"; - break; - case at::kChar: - out = "at::kChar"; - break; - case at::kShort: - out = "at::kShort"; - break; - case at::kInt: - out = "at::kInt"; - break; - case at::kLong: - out = "at::kLong"; - break; - case at::kHalf: - out = "at::kHalf"; - break; - case at::kFloat: - out = "at::kFloat"; - break; - case at::kDouble: - out = "at::kDouble"; - break; - default: - out = "at::UNKNOWN"; + case at::kByte: + out = "at::kByte"; + break; + case at::kChar: + out = "at::kChar"; + break; + case at::kShort: + out = "at::kShort"; + break; + case at::kInt: + out = "at::kInt"; + break; + case at::kLong: + out = "at::kLong"; + break; + case at::kHalf: + out = "at::kHalf"; + break; + case at::kFloat: + out = "at::kFloat"; + break; + case at::kDouble: + out = "at::kDouble"; + break; + default: + out = "at::UNKNOWN"; } return out; } @@ -129,9 +129,8 @@ at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) { numel * sizeof(double)); } } else { - FDASSERT(false, - "Unrecognized data type while calling " - "PorosBackend::CreatePorosValue()."); + FDASSERT(false, "Unrecognized data type while calling " + "PorosBackend::CreatePorosValue()."); } return poros_value; } diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc index bfda4345197..94a6d42d35e 100644 --- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc @@ -27,14 +27,14 @@ RKNPU2Backend::~RKNPU2Backend() { for (uint32_t i = 0; i < io_num.n_input; i++) { rknn_destroy_mem(ctx, input_mems_[i]); } - if(input_mems_ != nullptr){ + if (input_mems_ != nullptr) { free(input_mems_); } for (uint32_t i = 0; i < io_num.n_output; i++) { rknn_destroy_mem(ctx, output_mems_[i]); } - if(output_mems_ != nullptr){ + if (output_mems_ != nullptr) { free(output_mems_); } } @@ -173,16 +173,15 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { // create input tensor memory // rknn_tensor_mem* input_mems[io_num.n_input]; - input_mems_ = (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_input); + input_mems_ = + (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_input); // get input info and copy to input tensor info for (uint32_t i = 0; i < io_num.n_input; i++) { input_attrs_[i].index = i; // query info - ret = rknn_query(ctx, - RKNN_QUERY_INPUT_ATTR, - &(input_attrs_[i]), + ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs_[i]), sizeof(rknn_tensor_attr)); DumpTensorAttr(input_attrs_[i]); @@ -190,12 +189,12 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { printf("rknn_init error! ret=%d\n", ret); return false; } - if((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) && - (input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)){ - FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" << std::endl; + if ((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) && + (input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)) { + FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" + << std::endl; } - // copy input_attrs_ to input tensor info std::string temp_name = input_attrs_[i].name; std::vector temp_shape{}; @@ -203,25 +202,28 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { for (int j = 0; j < input_attrs_[i].n_dims; j++) { temp_shape[j] = (int)input_attrs_[i].dims[j]; } - FDDataType temp_dtype = fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(input_attrs_[i].type); + FDDataType temp_dtype = + fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType( + input_attrs_[i].type); TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; inputs_desc_[i] = temp_input_info; } // Get detailed output parameters - output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); + output_attrs_ = + (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); memset(output_attrs_, 0, io_num.n_output * sizeof(rknn_tensor_attr)); outputs_desc_.resize(io_num.n_output); // Create output tensor memory - output_mems_ = (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_output);; + output_mems_ = + (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_output); + ; for (uint32_t i = 0; i < io_num.n_output; i++) { output_attrs_[i].index = i; // query info - ret = rknn_query(ctx, - RKNN_QUERY_OUTPUT_ATTR, - &(output_attrs_[i]), + ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]), sizeof(rknn_tensor_attr)); DumpTensorAttr(output_attrs_[i]); @@ -233,7 +235,7 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { // If the output dimension is 3, the runtime will automatically change it to 4. // Obviously, this is wrong, and manual correction is required here. int n_dims = output_attrs_[i].n_dims; - if((n_dims == 4) && (output_attrs_[i].dims[3] == 1)){ + if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) { n_dims--; } @@ -292,8 +294,7 @@ std::vector RKNPU2Backend::GetOutputInfos() { } bool RKNPU2Backend::Infer(std::vector& inputs, - std::vector* outputs, - bool copy_to_fd) { + std::vector* outputs, bool copy_to_fd) { int ret = RKNN_SUCC; // Judge whether the input and output size are the same if (inputs.size() != inputs_desc_.size()) { @@ -303,15 +304,17 @@ bool RKNPU2Backend::Infer(std::vector& inputs, return false; } - if(!this->infer_init){ + if (!this->infer_init) { for (uint32_t i = 0; i < io_num.n_input; i++) { // Judge whether the input and output types are the same rknn_tensor_type input_type = - fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[i].dtype); + fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType( + inputs[i].dtype); if (input_type != input_attrs_[i].type) { FDWARNING << "The input tensor type != model's inputs type." - << "The input_type need " << get_type_string(input_attrs_[i].type) - << ",but inputs["<< i << "].type is " << get_type_string(input_type) + << "The input_type need " + << get_type_string(input_attrs_[i].type) << ",but inputs[" + << i << "].type is " << get_type_string(input_type) << std::endl; } @@ -319,10 +322,11 @@ bool RKNPU2Backend::Infer(std::vector& inputs, input_attrs_[i].type = input_type; input_attrs_[i].size = inputs[0].Nbytes(); input_attrs_[i].size_with_stride = inputs[0].Nbytes(); - if(input_attrs_[i].type == RKNN_TENSOR_FLOAT16 || - input_attrs_[i].type == RKNN_TENSOR_FLOAT32){ + if (input_attrs_[i].type == RKNN_TENSOR_FLOAT16 || + input_attrs_[i].type == RKNN_TENSOR_FLOAT32) { FDINFO << "The input model is not a quantitative model. " - "Close the normalize operation." << std::endl; + "Close the normalize operation." + << std::endl; } input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes()); @@ -474,4 +478,4 @@ RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) { FDERROR << "rknn_tensor_type don't support this type" << std::endl; return RKNN_TENSOR_TYPE_MAX; } -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h index af28fdddfbc..33704679c2f 100644 --- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h @@ -14,9 +14,9 @@ #pragma once #include "fastdeploy/backends/backend.h" -#include "fastdeploy/core/fd_tensor.h" -#include "rknn_api.h" // NOLINT #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h" +#include "fastdeploy/core/fd_tensor.h" +#include "rknn_api.h" // NOLINT #include #include #include @@ -71,8 +71,7 @@ class RKNPU2Backend : public BaseBackend { TensorInfo GetOutputInfo(int index) override; std::vector GetInputInfos() override; std::vector GetOutputInfos() override; - bool Infer(std::vector& inputs, - std::vector* outputs, + bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; private: diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h index 60e1a76aab2..7205d0bb4ca 100644 --- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h @@ -24,9 +24,9 @@ typedef enum _rknpu2_cpu_name { /*! RKNPU2 core mask for mobile device. */ typedef enum _rknpu2_core_mask { RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly. - RKNN_NPU_CORE_0 = 1, //< run on NPU core 0. - RKNN_NPU_CORE_1 = 2, //< run on NPU core 1. - RKNN_NPU_CORE_2 = 4, //< run on NPU core 2. + RKNN_NPU_CORE_0 = 1, //< run on NPU core 0. + RKNN_NPU_CORE_1 = 2, //< run on NPU core 1. + RKNN_NPU_CORE_2 = 4, //< run on NPU core 2. RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2. RKNN_NPU_CORE_0_1_2 = diff --git a/fastdeploy/backends/sophgo/option.h b/fastdeploy/backends/sophgo/option.h new file mode 100644 index 00000000000..320cb7ae281 --- /dev/null +++ b/fastdeploy/backends/sophgo/option.h @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +namespace fastdeploy { +struct SophgoBackendOption{ +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/sophgo/sophgo_backend.cc b/fastdeploy/backends/sophgo/sophgo_backend.cc new file mode 100644 index 00000000000..2e52e4e5d28 --- /dev/null +++ b/fastdeploy/backends/sophgo/sophgo_backend.cc @@ -0,0 +1,290 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/backends/sophgo/sophgo_backend.h" + +#include + +namespace fastdeploy { +SophgoBackend::~SophgoBackend() { bm_dev_free(handle_); } +/*************************************************************** + * @name GetSDKAndDeviceVersion + * @brief get Sophgo sdk and device version + * @param None + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::GetSDKAndDeviceVersion() { return true; } + +/*************************************************************** + * @name BuildOption + * @brief save option + * @param SOPHGOTPU2BackendOption + * @note None + ***************************************************************/ +void SophgoBackend::BuildOption(const SophgoBackendOption& option) { + // this->option_ = option; + // save cpu_name + // this->option_.cpu_name = option.cpu_name; +} + +/*************************************************************** + * @name InitFromSophgo + * @brief Initialize Sophgo model + * @param model_file: Binary data for the Sophgo model. + * params_file: None + * option: config + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::InitFromSophgo(const std::string& model_file, + const SophgoBackendOption& option) { + // LoadModel + if (!this->LoadModel((char*)model_file.data())) { + FDERROR << "load model failed" << std::endl; + return false; + } + + // GetSDKAndDeviceVersion + if (!this->GetSDKAndDeviceVersion()) { + FDERROR << "get SDK and device version failed" << std::endl; + return false; + } + + // BuildOption + this->BuildOption(option); + + // GetModelInputOutputInfos + if (!this->GetModelInputOutputInfos()) { + FDERROR << "get model input output infos failed" << std::endl; + return false; + } + + return true; +} + +/*************************************************************** + * @name LoadModel + * @brief read Sophgo bmodel + * @param model: Binary data for the Sophgo model. + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::LoadModel(void* model) { + unsigned int card_num = 0; + bm_status_t status = bm_get_card_num(&card_num); + status = bm_dev_request(&handle_, 0); + p_bmrt_ = bmrt_create(handle_); + assert(NULL != p_bmrt_); + + bool load_status = bmrt_load_bmodel(p_bmrt_, (char*)model); + assert(load_status); + + int network_num = bmrt_get_network_number(p_bmrt_); + + const char** net_names = NULL; + bmrt_get_network_names(p_bmrt_, &net_names); + net_name_ = net_names[0]; + free(net_names); + + net_info_ = bmrt_get_network_info(p_bmrt_, net_name_.c_str()); + assert(NULL != net_info_); + + return true; +} + +/*************************************************************** + * @name GetModelInputOutputInfos + * @brief Get the detailed input and output infos of Model + * @param None + * @return bool + * @note None + ***************************************************************/ +bool SophgoBackend::GetModelInputOutputInfos() { + inputs_desc_.resize(net_info_->input_num); + bm_shape_t* input_shapes = net_info_->stages->input_shapes; + for (int idx = 0; idx < net_info_->input_num; idx++) { + std::string temp_name = (net_info_->input_names)[idx]; + std::vector temp_shape{}; + temp_shape.resize(input_shapes[idx].num_dims); + for (int i = 0; i < input_shapes[idx].num_dims; i++) { + temp_shape[i] = input_shapes[idx].dims[i]; + } + bm_data_type_t* input_dtypes = net_info_->input_dtypes; + // SophgoType to FDDataType + FDDataType temp_dtype = SophgoTensorTypeToFDDataType(*input_dtypes); + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[idx] = temp_input_info; + } + + outputs_desc_.resize(net_info_->output_num); + bm_shape_t* output_shapes = net_info_->stages->output_shapes; + for (int idx = 0; idx < net_info_->output_num; idx++) { + std::string temp_name1 = (net_info_->output_names)[idx]; + std::vector temp_shape1{}; + temp_shape1.resize(output_shapes[idx].num_dims); + for (int i = 0; i < output_shapes[idx].num_dims; i++) { + temp_shape1[i] = output_shapes[idx].dims[i]; + } + bm_data_type_t* output_dtypes = net_info_->output_dtypes; + // SophgoType to FDDataType + FDDataType temp_dtype1 = SophgoTensorTypeToFDDataType(*output_dtypes); + TensorInfo temp_output_info = {temp_name1, temp_shape1, temp_dtype1}; + outputs_desc_[idx] = temp_output_info; + } + return true; +} + +TensorInfo SophgoBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()) + return inputs_desc_[index]; +} + +std::vector SophgoBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo SophgoBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()) + return outputs_desc_[index]; +} + +std::vector SophgoBackend::GetOutputInfos() { + return outputs_desc_; +} + +bool SophgoBackend::Infer(std::vector& inputs, + std::vector* outputs, bool copy_to_fd) { + int input_size = inputs.size(); + assert(input_size != 0); + assert(input_size == NumInputs()); + bm_tensor_t input_tensors[input_size]; + bm_status_t status = BM_SUCCESS; + + bm_data_type_t* input_dtypes = net_info_->input_dtypes; + for (int i = 0; i < input_size; i++) { + status = bm_malloc_device_byte(handle_, &input_tensors[i].device_mem, + net_info_->max_input_bytes[i]); + assert(BM_SUCCESS == status); + input_tensors[i].dtype = input_dtypes[i]; + input_tensors[i].st_mode = BM_STORE_1N; + input_tensors[i].shape = *(net_info_->stages[i].input_shapes); + unsigned int input_byte = bmrt_tensor_bytesize(&input_tensors[i]); + bm_memcpy_s2d_partial(handle_, input_tensors[i].device_mem, + (void*)inputs[i].Data(), + bmrt_tensor_bytesize(&input_tensors[i])); + } + + int output_size = NumOutputs(); + bm_tensor_t output_tensors[output_size]; + for (int i = 0; i < output_size; i++) { + status = bm_malloc_device_byte(handle_, &output_tensors[i].device_mem, + net_info_->max_output_bytes[i]); + assert(BM_SUCCESS == status); + } + + bool launch_status = bmrt_launch_tensor_ex( + p_bmrt_, net_name_.c_str(), input_tensors, net_info_->input_num, + output_tensors, net_info_->output_num, true, false); + assert(launch_status); + status = bm_thread_sync(handle_); + assert(status == BM_SUCCESS); + + outputs->resize(outputs_desc_.size()); + bm_data_type_t* output_dtypes = net_info_->output_dtypes; + for (int i = 0; i < output_size; i++) { + int temp_bytesize = bmrt_tensor_bytesize(&output_tensors[i]); // Byte + float* temp_out = (float*)malloc(temp_bytesize); + bm_memcpy_d2s_partial(handle_, temp_out, output_tensors[i].device_mem, + temp_bytesize); + + std::vector temp_shape; + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + + memcpy((*outputs)[i].MutableData(), temp_out, (*outputs)[i].Nbytes()); + free(temp_out); + } + + return true; +} + +/*************************************************************** + * @name SophgoTensorTypeToFDDataType + * @brief Change SophgoTensorType To FDDataType + * @param bm_data_type_t + * @return None + * @note None + ***************************************************************/ +FDDataType SophgoBackend::SophgoTensorTypeToFDDataType(bm_data_type_t type) { + if (type == BM_FLOAT16) { + return FDDataType::FP32; + } + if (type == BM_FLOAT32) { + return FDDataType::FP32; + } + if (type == BM_INT8) { + return FDDataType::INT8; + } + if (type == BM_INT16) { + return FDDataType::INT16; + } + if (type == BM_INT32) { + return FDDataType::INT32; + } + if (type == BM_UINT8) { + return FDDataType::UINT8; + } + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +/*************************************************************** + * @name FDDataTypeToSophgoTensorType + * @brief Change FDDataType To SophgoTensorType + * @param FDDataType + * @return None + * @note None + ***************************************************************/ +// Sophgo_tensor_type +bm_data_type_t SophgoBackend::FDDataTypeToSophgoTensorType( + fastdeploy::FDDataType type) { + if (type == FDDataType::FP16) { + return BM_FLOAT16; + } + if (type == FDDataType::FP32) { + return BM_FLOAT32; + } + if (type == FDDataType::INT8) { + return BM_INT8; + } + if (type == FDDataType::INT16) { + return BM_INT16; + } + if (type == FDDataType::INT32) { + return BM_INT32; + } + if (type == FDDataType::UINT8) { + return BM_UINT8; + } + FDERROR << "Sophgo_tensor_type don't support this type" << std::endl; + return BM_FLOAT32; +} + +} // namespace fastdeploy diff --git a/fastdeploy/backends/sophgo/sophgo_backend.h b/fastdeploy/backends/sophgo/sophgo_backend.h new file mode 100644 index 00000000000..348d844af17 --- /dev/null +++ b/fastdeploy/backends/sophgo/sophgo_backend.h @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/backends/backend.h" +#include "fastdeploy/core/fd_tensor.h" +#include "bmruntime_interface.h" // NOLINT +#include "bmlib_runtime.h" // NOLINT +#include "fastdeploy/backends/sophgo/option.h" +#include +#include +#include +#include +#include + +namespace fastdeploy { + +class SophgoBackend : public BaseBackend { + public: + SophgoBackend() = default; + virtual ~SophgoBackend(); + bool LoadModel(void* model); + bool GetSDKAndDeviceVersion(); + bool GetModelInputOutputInfos(); + void BuildOption(const SophgoBackendOption& option); + bool InitFromSophgo(const std::string& model_file, + const SophgoBackendOption& option = SophgoBackendOption()); + + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + bool Infer(std::vector& inputs, + std::vector* outputs, + bool copy_to_fd = true) override; + + private: + std::vector inputs_desc_; + std::vector outputs_desc_; + std::string net_name_; + + bm_handle_t handle_; + void * p_bmrt_ = nullptr; + + bool infer_init = false; + + const bm_net_info_t* net_info_ = nullptr; + + // SophgoTPU2BackendOption option_; + + static FDDataType SophgoTensorTypeToFDDataType(bm_data_type_t type); + static bm_data_type_t FDDataTypeToSophgoTensorType(FDDataType type); +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.cc b/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.cc index bfec5e356ed..191ac156047 100755 --- a/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.cc +++ b/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.cc @@ -17,108 +17,106 @@ namespace fastdeploy { nvinfer1::PluginFieldCollection AdaptivePool2dPluginCreator::mFC{}; -std::vector AdaptivePool2dPluginCreator::mPluginAttributes; +std::vector + AdaptivePool2dPluginCreator::mPluginAttributes; -pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n, const void* input, void* output); +pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n, + const void* input, void* output); -AdaptivePool2d::AdaptivePool2d(std::vector output_size, std::string pooling_type) { +AdaptivePool2d::AdaptivePool2d(std::vector output_size, + std::string pooling_type) { output_size_ = output_size; pooling_type_ = pooling_type; } AdaptivePool2d::AdaptivePool2d(const void* buffer, size_t length) { - const char *d = reinterpret_cast(buffer), *a = d; - output_size_.resize(4); - for(int64_t i =0 ; i < 4; i++){ - output_size_[i] =read(d); - } - if(read(d) == 0){ - pooling_type_ = "avg"; - }else{ - pooling_type_ = "max"; - } - FDASSERT(d == a + length, "deserialize failed."); + const char *d = reinterpret_cast(buffer), *a = d; + output_size_.resize(4); + for (int64_t i = 0; i < 4; i++) { + output_size_[i] = read(d); + } + if (read(d) == 0) { + pooling_type_ = "avg"; + } else { + pooling_type_ = "max"; + } + FDASSERT(d == a + length, "deserialize failed."); } -int AdaptivePool2d::getNbOutputs() const noexcept { - return 1; -} +int AdaptivePool2d::getNbOutputs() const noexcept { return 1; } nvinfer1::DimsExprs AdaptivePool2d::getOutputDimensions( - int outputIndex, const nvinfer1::DimsExprs* inputs, - int nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept { + int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept { try { nvinfer1::DimsExprs output(inputs[0]); output.d[2] = exprBuilder.constant(static_cast(output_size_[2])); output.d[3] = exprBuilder.constant(static_cast(output_size_[3])); return output; - } - catch (const std::exception& e) { - FDASSERT(false, "getOutputDimensions failed: %s.",e.what()); + } catch (const std::exception& e) { + FDASSERT(false, "getOutputDimensions failed: %s.", e.what()); } return nvinfer1::DimsExprs{}; } -int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, - const nvinfer1::PluginTensorDesc* outputDesc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) noexcept { +int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, + void* workspace, cudaStream_t stream) noexcept { if (inputDesc[0].type != nvinfer1::DataType::kFLOAT) { - return -1; + return -1; } auto const* data = static_cast(inputs[0]); auto* result = static_cast(outputs[0]); - int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] * outputDesc[0].dims.d[2]* outputDesc[0].dims.d[3]; + int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] * + outputDesc[0].dims.d[2] * outputDesc[0].dims.d[3]; std::vector input_size, output_size; - for(int i =0; i< 4; i++){ + for (int i = 0; i < 4; i++) { input_size.push_back(inputDesc[0].dims.d[i]); output_size.push_back(outputDesc[0].dims.d[i]); } - CudaAdaptivePool(input_size, output_size, result, data, stream, pooling_type_); + CudaAdaptivePool(input_size, output_size, result, data, stream, + pooling_type_); return cudaPeekAtLastError(); } size_t AdaptivePool2d::getSerializationSize() const noexcept { - return 5 * sizeof(int32_t) ; + return 5 * sizeof(int32_t); } -void AdaptivePool2d::serialize(void* buffer) const noexcept { +void AdaptivePool2d::serialize(void* buffer) const noexcept { char *d = reinterpret_cast(buffer), *a = d; - for(int64_t i=0; i< 4; i++){ + for (int64_t i = 0; i < 4; i++) { write(d, output_size_[i]); } int32_t pooling_type_val = 0; - if(pooling_type_ != "avg"){ + if (pooling_type_ != "avg") { pooling_type_val = 1; } write(d, pooling_type_val); FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()"); } -nvinfer1::DataType AdaptivePool2d::getOutputDataType( - int index, const nvinfer1::DataType* inputType, int nbInputs) const noexcept { +nvinfer1::DataType +AdaptivePool2d::getOutputDataType(int index, + const nvinfer1::DataType* inputType, + int nbInputs) const noexcept { return inputType[0]; } bool AdaptivePool2d::supportsFormatCombination( - int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) noexcept { + int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, + int nbOutputs) noexcept { return (inOut[pos].format == nvinfer1::PluginFormat::kLINEAR); } -int AdaptivePool2d::initialize() noexcept { - return 0; -} +int AdaptivePool2d::initialize() noexcept { return 0; } -void AdaptivePool2d::terminate() noexcept { - return; -} +void AdaptivePool2d::terminate() noexcept { return; } -size_t AdaptivePool2d::getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, - int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const noexcept { +size_t AdaptivePool2d::getWorkspaceSize( + const nvinfer1::PluginTensorDesc* inputs, int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const noexcept { return 0; } @@ -126,33 +124,32 @@ const char* AdaptivePool2d::getPluginType() const noexcept { return "AdaptivePool2d"; } -const char* AdaptivePool2d::getPluginVersion() const noexcept { - return "1"; -} +const char* AdaptivePool2d::getPluginVersion() const noexcept { return "1"; } -void AdaptivePool2d::destroy() noexcept { +void AdaptivePool2d::destroy() noexcept { return; } +void AdaptivePool2d::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept { return; } -void AdaptivePool2d::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept { - return; -} nvinfer1::IPluginV2DynamicExt* AdaptivePool2d::clone() const noexcept { - try{ - nvinfer1::IPluginV2DynamicExt* plugin = new AdaptivePool2d(output_size_, pooling_type_); - plugin->setPluginNamespace(mNamespace.c_str()); - return plugin; - } - catch (std::exception const& e){ - FDASSERT(false, "clone failed: %s.",e.what()); + try { + nvinfer1::IPluginV2DynamicExt* plugin = + new AdaptivePool2d(output_size_, pooling_type_); + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; + } catch (std::exception const& e) { + FDASSERT(false, "clone failed: %s.", e.what()); } return nullptr; } AdaptivePool2dPluginCreator::AdaptivePool2dPluginCreator() { mPluginAttributes.clear(); - mPluginAttributes.emplace_back(nvinfer1::PluginField("output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4)); - mPluginAttributes.emplace_back(nvinfer1::PluginField("pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3)); + mPluginAttributes.emplace_back(nvinfer1::PluginField( + "output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4)); + mPluginAttributes.emplace_back(nvinfer1::PluginField( + "pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3)); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); @@ -166,17 +163,18 @@ const char* AdaptivePool2dPluginCreator::getPluginVersion() const noexcept { return "1"; } -const nvinfer1::PluginFieldCollection* AdaptivePool2dPluginCreator::getFieldNames() noexcept { +const nvinfer1::PluginFieldCollection* +AdaptivePool2dPluginCreator::getFieldNames() noexcept { return &mFC; } -nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const char* name, - const nvinfer1::PluginFieldCollection* fc) noexcept { - try{ +nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept { + try { const nvinfer1::PluginField* fields = fc->fields; auto const dims = static_cast(fields[0].data); output_size_.resize(4); - for(int64_t i = 0; i < 4; i++){ + for (int64_t i = 0; i < 4; i++) { output_size_[i] = dims[i]; } @@ -184,23 +182,20 @@ nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const c std::string pooling_type(pooling_type_ptr, 3); pooling_type_ = pooling_type; return new AdaptivePool2d(output_size_, pooling_type_); - } - catch (std::exception const& e){ - FDASSERT(false, "createPlugin failed: %s.",e.what()); + } catch (std::exception const& e) { + FDASSERT(false, "createPlugin failed: %s.", e.what()); } return nullptr; } -nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin(const char* name, - const void* serialData, - size_t serialLength) noexcept { - try{ +nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin( + const char* name, const void* serialData, size_t serialLength) noexcept { + try { return new AdaptivePool2d(serialData, serialLength); - } - catch (std::exception const& e){ - FDASSERT(false, "deserializePlugin failed: %s.",e.what()); + } catch (std::exception const& e) { + FDASSERT(false, "deserializePlugin failed: %s.", e.what()); } return nullptr; } -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.h b/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.h index 2e6e45e2cce..1fe78892000 100755 --- a/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.h +++ b/fastdeploy/backends/tensorrt/ops/adaptive_pool2d.h @@ -13,98 +13,93 @@ // limitations under the License. #pragma once +#include "common.h" // NOLINT #include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h" -#include "common.h" // NOLINT namespace fastdeploy { class AdaptivePool2d : public BasePlugin { public: - AdaptivePool2d(std::vector output_size, std::string pooling_type); + AdaptivePool2d(std::vector output_size, std::string pooling_type); - AdaptivePool2d(const void* buffer, size_t length); + AdaptivePool2d(const void* buffer, size_t length); - ~AdaptivePool2d() override = default; + ~AdaptivePool2d() override = default; - int getNbOutputs() const noexcept override; + int getNbOutputs() const noexcept override; - nvinfer1::DimsExprs getOutputDimensions( - int outputIndex, - const nvinfer1::DimsExprs* inputs, - int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept override; + nvinfer1::DimsExprs + getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs* inputs, + int nbInputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; - nvinfer1::DataType getOutputDataType( - int index, - const nvinfer1::DataType* inputType, - int nbInputs) const noexcept override; + nvinfer1::DataType getOutputDataType(int index, + const nvinfer1::DataType* inputType, + int nbInputs) const noexcept override; - bool supportsFormatCombination( - int pos, - const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, - int nbOutputs) noexcept override; + bool supportsFormatCombination(int pos, + const nvinfer1::PluginTensorDesc* inOut, + int nbInputs, int nbOutputs) noexcept override; - int initialize() noexcept override; + int initialize() noexcept override; - void terminate() noexcept override; + void terminate() noexcept override; - size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, - int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const noexcept override; + size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, + int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const noexcept override; - int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, - const nvinfer1::PluginTensorDesc* outputDesc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) noexcept override; + int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, void* workspace, + cudaStream_t stream) noexcept override; - size_t getSerializationSize() const noexcept override; + size_t getSerializationSize() const noexcept override; - void serialize(void* buffer) const noexcept override; + void serialize(void* buffer) const noexcept override; - const char* getPluginType() const noexcept override; + const char* getPluginType() const noexcept override; - const char* getPluginVersion() const noexcept override; - void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, - int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) noexcept override; - void destroy() noexcept override; + const char* getPluginVersion() const noexcept override; + void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, + int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, + int nbOutputs) noexcept override; + void destroy() noexcept override; - nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; + nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; private: - std::vector output_size_; - std::string pooling_type_; + std::vector output_size_; + std::string pooling_type_; }; class AdaptivePool2dPluginCreator : public BaseCreator { public: - AdaptivePool2dPluginCreator(); + AdaptivePool2dPluginCreator(); - ~AdaptivePool2dPluginCreator() override = default; + ~AdaptivePool2dPluginCreator() override = default; - const char* getPluginName() const noexcept override; + const char* getPluginName() const noexcept override; - const char* getPluginVersion() const noexcept override; + const char* getPluginVersion() const noexcept override; - const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override; + const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override; - nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, - const nvinfer1::PluginFieldCollection* fc) noexcept override; + nvinfer1::IPluginV2DynamicExt* + createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) noexcept override; - nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, - const void* serialData, - size_t serialLength) noexcept override; + nvinfer1::IPluginV2DynamicExt* + deserializePlugin(const char* name, const void* serialData, + size_t serialLength) noexcept override; private: - static nvinfer1::PluginFieldCollection mFC; - static std::vector mPluginAttributes; - std::vector output_size_; - std::string pooling_type_; + static nvinfer1::PluginFieldCollection mFC; + static std::vector mPluginAttributes; + std::vector output_size_; + std::string pooling_type_; }; REGISTER_TENSORRT_PLUGIN(AdaptivePool2dPluginCreator); diff --git a/fastdeploy/backends/tensorrt/ops/common.h b/fastdeploy/backends/tensorrt/ops/common.h index beada71ff6a..975582ffd4c 100755 --- a/fastdeploy/backends/tensorrt/ops/common.h +++ b/fastdeploy/backends/tensorrt/ops/common.h @@ -17,40 +17,40 @@ #include "NvInferPlugin.h" #include "NvInferRuntimeCommon.h" #include "fastdeploy/utils/utils.h" +#include #include -#include -#include #include -#include #include +#include +#include namespace fastdeploy { class BasePlugin : public nvinfer1::IPluginV2DynamicExt { protected: - void setPluginNamespace(const char* libNamespace) noexcept override { - mNamespace = libNamespace; - } + void setPluginNamespace(const char* libNamespace) noexcept override { + mNamespace = libNamespace; + } - const char* getPluginNamespace() const noexcept override { - return mNamespace.c_str(); - } + const char* getPluginNamespace() const noexcept override { + return mNamespace.c_str(); + } - std::string mNamespace; + std::string mNamespace; }; class BaseCreator : public nvinfer1::IPluginCreator { public: - void setPluginNamespace(const char* libNamespace) noexcept override { - mNamespace = libNamespace; - } + void setPluginNamespace(const char* libNamespace) noexcept override { + mNamespace = libNamespace; + } - const char* getPluginNamespace() const noexcept override { - return mNamespace.c_str(); - } + const char* getPluginNamespace() const noexcept override { + return mNamespace.c_str(); + } protected: - std::string mNamespace; + std::string mNamespace; }; typedef enum { @@ -62,19 +62,17 @@ typedef enum { } pluginStatus_t; // Write values into buffer -template -void write(char*& buffer, const T& val) { - std::memcpy(buffer, &val, sizeof(T)); - buffer += sizeof(T); +template void write(char*& buffer, const T& val) { + std::memcpy(buffer, &val, sizeof(T)); + buffer += sizeof(T); } // Read values from buffer -template -T read(const char*& buffer) { - T val{}; - std::memcpy(&val, buffer, sizeof(T)); - buffer += sizeof(T); - return val; +template T read(const char*& buffer) { + T val{}; + std::memcpy(&val, buffer, sizeof(T)); + buffer += sizeof(T); + return val; } } // namespace fastdeploy diff --git a/fastdeploy/backends/tensorrt/option.h b/fastdeploy/backends/tensorrt/option.h new file mode 100755 index 00000000000..3f7c2a20871 --- /dev/null +++ b/fastdeploy/backends/tensorrt/option.h @@ -0,0 +1,41 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include + +namespace fastdeploy { + +struct TrtBackendOption { + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + // format of input model + ModelFormat model_format = ModelFormat::AUTOREC; + + int gpu_id = 0; + bool enable_fp16 = false; + bool enable_int8 = false; + size_t max_batch_size = 32; + size_t max_workspace_size = 1 << 30; + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + std::string serialize_file = ""; + bool enable_pinned_memory = false; + void* external_stream_ = nullptr; +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc index 3a8659acee5..bdd23c8d606 100755 --- a/fastdeploy/backends/tensorrt/trt_backend.cc +++ b/fastdeploy/backends/tensorrt/trt_backend.cc @@ -134,9 +134,9 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file, int calibration_cache_size = 0; if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), &model_content_ptr, &model_content_size, 11, true, - verbose, true, true, true, ops.data(), - 1, "tensorrt", - &calibration_cache_ptr, &calibration_cache_size, "", &save_external_)) { + verbose, true, true, true, ops.data(), 1, "tensorrt", + &calibration_cache_ptr, &calibration_cache_size, "", + &save_external_)) { FDERROR << "Error occured while export PaddlePaddle to ONNX format." << std::endl; return false; @@ -152,11 +152,11 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file, calibration_str_ = calibration_str; delete[] calibration_cache_ptr; } - if(save_external_){ + if (save_external_) { model_file_name_ = "model.onnx"; std::fstream f(model_file_name_, std::ios::out); FDASSERT(f.is_open(), "Can not open file: %s to save model.", - model_file_name_.c_str()); + model_file_name_.c_str()); f << onnx_model_proto; f.close(); return InitFromOnnx(model_file_name_, option, false); @@ -215,13 +215,14 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file, outputs_desc_.resize(onnx_reader.num_outputs); for (int i = 0; i < onnx_reader.num_inputs; ++i) { std::string name(onnx_reader.inputs[i].name); - std::vector shape( - onnx_reader.inputs[i].shape, - onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank); + std::vector shape(onnx_reader.inputs[i].shape, + onnx_reader.inputs[i].shape + + onnx_reader.inputs[i].rank); inputs_desc_[i].name = name; inputs_desc_[i].shape.assign(shape.begin(), shape.end()); inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype); - inputs_desc_[i].original_dtype = ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype); + inputs_desc_[i].original_dtype = + ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype); auto info = ShapeRangeInfo(shape); info.name = name; auto iter_min = option.min_shape.find(name); @@ -237,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file, for (int i = 0; i < onnx_reader.num_outputs; ++i) { std::string name(onnx_reader.outputs[i].name); - std::vector shape( - onnx_reader.outputs[i].shape, - onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank); + std::vector shape(onnx_reader.outputs[i].shape, + onnx_reader.outputs[i].shape + + onnx_reader.outputs[i].rank); outputs_desc_[i].name = name; outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].dtype = @@ -252,10 +253,10 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file, stream_ = reinterpret_cast(option_.external_stream_); } else { FDASSERT(cudaStreamCreate(&stream_) == 0, - "[ERROR] Error occurs while calling cudaStreamCreate()."); + "[ERROR] Error occurs while calling cudaStreamCreate()."); } - if(save_external_){ + if (save_external_) { onnx_content.clear(); onnx_content = model_file_name_; } @@ -283,8 +284,7 @@ int TrtBackend::ShapeRangeInfoUpdated(const std::vector& inputs) { } bool TrtBackend::Infer(std::vector& inputs, - std::vector* outputs, - bool copy_to_fd) { + std::vector* outputs, bool copy_to_fd) { if (inputs.size() != NumInputs()) { FDERROR << "Require " << NumInputs() << "inputs, but get " << inputs.size() << "." << std::endl; @@ -297,7 +297,8 @@ bool TrtBackend::Infer(std::vector& inputs, << "TensorRT engine will be rebuilt once shape range information " "changed, this may take lots of time, you can set a proper shape " "range before loading model to avoid rebuilding process. refer " - "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/" + "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/" + "faq/" "tensorrt_tricks.md for more details." << std::endl; BuildTrtEngine(); @@ -314,38 +315,42 @@ bool TrtBackend::Infer(std::vector& inputs, for (size_t i = 0; i < outputs->size(); ++i) { // if the final output tensor's dtype is different from the model output tensor's dtype, // then we need cast the data to the final output's dtype - auto model_output_dtype = GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype()); + auto model_output_dtype = + GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype()); if ((*outputs)[i].dtype != model_output_dtype) { FDTensor output_tensor; - output_tensor.SetExternalData((*outputs)[i].shape, model_output_dtype, - outputs_device_buffer_[(*outputs)[i].name].data(), - Device::GPU); - - casted_output_tensors_[(*outputs)[i].name].Resize((*outputs)[i].shape, (*outputs)[i].dtype, - (*outputs)[i].name, Device::GPU); - function::CudaCast(output_tensor, &casted_output_tensors_[(*outputs)[i].name], stream_); - if(!copy_to_fd) { - (*outputs)[i].SetExternalData((*outputs)[i].shape, model_output_dtype, - casted_output_tensors_[(*outputs)[i].name].MutableData(), - Device::GPU, option_.gpu_id); + output_tensor.SetExternalData( + (*outputs)[i].shape, model_output_dtype, + outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU); + + casted_output_tensors_[(*outputs)[i].name].Resize( + (*outputs)[i].shape, (*outputs)[i].dtype, (*outputs)[i].name, + Device::GPU); + function::CudaCast(output_tensor, + &casted_output_tensors_[(*outputs)[i].name], stream_); + if (!copy_to_fd) { + (*outputs)[i].SetExternalData( + (*outputs)[i].shape, model_output_dtype, + casted_output_tensors_[(*outputs)[i].name].MutableData(), + Device::GPU, option_.gpu_id); } } else { casted_output_tensors_[(*outputs)[i].name].SetExternalData( (*outputs)[i].shape, model_output_dtype, - outputs_device_buffer_[(*outputs)[i].name].data(), - Device::GPU); + outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU); } } if (copy_to_fd) { for (size_t i = 0; i < outputs->size(); ++i) { - FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(), - casted_output_tensors_[(*outputs)[i].name].Data(), - (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost, - stream_) == 0, - "[ERROR] Error occurs while copy memory from GPU to CPU."); + FDASSERT( + cudaMemcpyAsync((*outputs)[i].Data(), + casted_output_tensors_[(*outputs)[i].name].Data(), + (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost, + stream_) == 0, + "[ERROR] Error occurs while copy memory from GPU to CPU."); } FDASSERT(cudaStreamSynchronize(stream_) == cudaSuccess, - "[ERROR] Error occurs while sync cuda stream."); + "[ERROR] Error occurs while sync cuda stream."); } return true; @@ -356,10 +361,12 @@ void TrtBackend::GetInputOutputInfo() { std::unordered_map inputs_original_dtype_map; std::unordered_map outputs_original_dtype_map; for (size_t i = 0; i < inputs_desc_.size(); ++i) { - inputs_original_dtype_map[inputs_desc_[i].name] = inputs_desc_[i].original_dtype; + inputs_original_dtype_map[inputs_desc_[i].name] = + inputs_desc_[i].original_dtype; } for (size_t i = 0; i < outputs_desc_.size(); ++i) { - outputs_original_dtype_map[outputs_desc_[i].name] = outputs_desc_[i].original_dtype; + outputs_original_dtype_map[outputs_desc_[i].name] = + outputs_desc_[i].original_dtype; } // Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_ @@ -373,12 +380,18 @@ void TrtBackend::GetInputOutputInfo() { auto shape = ToVec(engine_->getBindingDimensions(i)); auto dtype = engine_->getBindingDataType(i); if (engine_->bindingIsInput(i)) { - auto original_dtype = inputs_original_dtype_map.count(name) ? inputs_original_dtype_map[name] : GetFDDataType(dtype); - inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype}); + auto original_dtype = inputs_original_dtype_map.count(name) + ? inputs_original_dtype_map[name] + : GetFDDataType(dtype); + inputs_desc_.emplace_back( + TrtValueInfo{name, shape, dtype, original_dtype}); inputs_device_buffer_[name] = FDDeviceBuffer(dtype); } else { - auto original_dtype = outputs_original_dtype_map.count(name) ? outputs_original_dtype_map[name] : GetFDDataType(dtype); - outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype}); + auto original_dtype = outputs_original_dtype_map.count(name) + ? outputs_original_dtype_map[name] + : GetFDDataType(dtype); + outputs_desc_.emplace_back( + TrtValueInfo{name, shape, dtype, original_dtype}); outputs_device_buffer_[name] = FDDeviceBuffer(dtype); casted_output_tensors_[name] = FDTensor(); } @@ -391,8 +404,9 @@ void TrtBackend::SetInputs(const std::vector& inputs) { for (const auto& item : inputs) { // auto idx = engine_->getBindingIndex(item.name.c_str()); auto iter = io_name_index_.find(item.name); - FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str()); - auto idx = iter->second; + FDASSERT(iter != io_name_index_.end(), + "TRTBackend SetInputs not find name:%s", item.name.c_str()); + auto idx = iter->second; std::vector shape(item.shape.begin(), item.shape.end()); auto dims = ToDims(shape); context_->setBindingDimensions(idx, dims); @@ -424,9 +438,8 @@ void TrtBackend::SetInputs(const std::vector& inputs) { "Error occurs while copy memory from CPU to GPU."); } else { FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), - item.Data(), - item.Nbytes(), cudaMemcpyHostToDevice, - stream_) == 0, + item.Data(), item.Nbytes(), + cudaMemcpyHostToDevice, stream_) == 0, "Error occurs while copy memory from CPU to GPU."); } } @@ -443,8 +456,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector* outputs, for (size_t i = 0; i < outputs_desc_.size(); ++i) { // auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str()); auto idx_iter = io_name_index_.find(outputs_desc_[i].name); - FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str()); - auto idx = idx_iter->second; + FDASSERT(idx_iter != io_name_index_.end(), + "TRTBackend Outputs not find name:%s", + outputs_desc_[i].name.c_str()); + auto idx = idx_iter->second; auto output_dims = context_->getBindingDimensions(idx); // find the original index of output @@ -457,23 +472,22 @@ void TrtBackend::AllocateOutputsBuffer(std::vector* outputs, // Allocate output buffer memory outputs_device_buffer_[outputs_desc_[i].name].resize(output_dims); - + // binding output buffer - bindings_[idx] = outputs_device_buffer_[outputs_desc_[i].name].data(); - + bindings_[idx] = outputs_device_buffer_[outputs_desc_[i].name].data(); + // set user's outputs info std::vector shape(output_dims.d, output_dims.d + output_dims.nbDims); - if(copy_to_fd) { + if (copy_to_fd) { (*outputs)[ori_idx].is_pinned_memory = option_.enable_pinned_memory; (*outputs)[ori_idx].Resize(shape, outputs_desc_[i].original_dtype, outputs_desc_[i].name); } else { (*outputs)[ori_idx].name = outputs_desc_[i].name; (*outputs)[ori_idx].SetExternalData( - shape, outputs_desc_[i].original_dtype, - bindings_[idx], Device::GPU, - option_.gpu_id); + shape, outputs_desc_[i].original_dtype, bindings_[idx], Device::GPU, + option_.gpu_id); } } } @@ -587,7 +601,8 @@ bool TrtBackend::BuildTrtEngine() { if (option_.serialize_file != "") { FDINFO << "Serialize TensorRTEngine to local file " << option_.serialize_file << "." << std::endl; - std::ofstream engine_file(option_.serialize_file.c_str(), std::ios::binary | std::ios::out); + std::ofstream engine_file(option_.serialize_file.c_str(), + std::ios::binary | std::ios::out); if (!engine_file) { FDERROR << "Failed to open " << option_.serialize_file << " to write." << std::endl; @@ -628,10 +643,11 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) { return false; } bool model_parser; - if(save_external_){ - model_parser=!parser_->parseFromFile(onnx_model_buffer.c_str(), 0); - }else{ - model_parser = !parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size()); + if (save_external_) { + model_parser = !parser_->parseFromFile(onnx_model_buffer.c_str(), 0); + } else { + model_parser = + !parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size()); } if (model_parser) { FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl; @@ -665,7 +681,8 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) { "should be noticed that FastDeploy will rebuild the engine while " "new input shape is out of the collected shape range, this may " "bring some time consuming problem, refer " - "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/" + "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/" + "faq/" "tensorrt_tricks.md for more details." << std::endl; initialized_ = true; @@ -721,27 +738,24 @@ std::vector TrtBackend::GetOutputInfos() { return infos; } -std::unique_ptr TrtBackend::Clone(void *stream, int device_id) { +std::unique_ptr TrtBackend::Clone(void* stream, int device_id) { std::unique_ptr new_backend = utils::make_unique(); auto casted_backend = dynamic_cast(new_backend.get()); - if(device_id > 0 && device_id != option_.gpu_id) { + if (device_id > 0 && device_id != option_.gpu_id) { auto clone_option = option_; clone_option.gpu_id = device_id; clone_option.external_stream_ = stream; if (option_.model_format == ModelFormat::ONNX) { FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option), - "Clone model from ONNX failed while initialize TrtBackend."); + "Clone model from ONNX failed while initialize TrtBackend."); } else { - FDASSERT(casted_backend->InitFromPaddle(option_.model_file, - option_.params_file, clone_option), - "Clone model from Paddle failed while initialize TrtBackend."); + FDASSERT(casted_backend->InitFromPaddle( + option_.model_file, option_.params_file, clone_option), + "Clone model from Paddle failed while initialize TrtBackend."); } - FDWARNING << "The target device id:" - << device_id - << " is different from current device id:" - << option_.gpu_id - << ", cannot share memory with current engine." - << std::endl; + FDWARNING << "The target device id:" << device_id + << " is different from current device id:" << option_.gpu_id + << ", cannot share memory with current engine." << std::endl; return new_backend; } cudaSetDevice(option_.gpu_id); @@ -750,12 +764,15 @@ std::unique_ptr TrtBackend::Clone(void *stream, int device_id) { casted_backend->stream_ = reinterpret_cast(stream); } else { FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0, - "[ERROR] Error occurs while clone calling cudaStreamCreate()."); + "[ERROR] Error occurs while clone calling cudaStreamCreate()."); } casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end()); - casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end()); - casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end()); - casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end()); + casted_backend->outputs_desc_.assign(outputs_desc_.begin(), + outputs_desc_.end()); + casted_backend->outputs_order_.insert(outputs_order_.begin(), + outputs_order_.end()); + casted_backend->shape_range_info_.insert(shape_range_info_.begin(), + shape_range_info_.end()); casted_backend->engine_ = engine_; casted_backend->context_ = std::shared_ptr( casted_backend->engine_->createExecutionContext()); diff --git a/fastdeploy/backends/tensorrt/trt_backend.h b/fastdeploy/backends/tensorrt/trt_backend.h index 425087fad79..0ea03c65946 100755 --- a/fastdeploy/backends/tensorrt/trt_backend.h +++ b/fastdeploy/backends/tensorrt/trt_backend.h @@ -25,6 +25,7 @@ #include "NvOnnxParser.h" #include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/tensorrt/utils.h" +#include "fastdeploy/backends/tensorrt/option.h" #include "fastdeploy/utils/unique_ptr.h" class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { @@ -58,29 +59,10 @@ namespace fastdeploy { struct TrtValueInfo { std::string name; std::vector shape; - nvinfer1::DataType dtype; // dtype of TRT model + nvinfer1::DataType dtype; // dtype of TRT model FDDataType original_dtype; // dtype of original ONNX/Paddle model }; -struct TrtBackendOption { - std::string model_file = ""; // Path of model file - std::string params_file = ""; // Path of parameters file, can be empty - // format of input model - ModelFormat model_format = ModelFormat::AUTOREC; - - int gpu_id = 0; - bool enable_fp16 = false; - bool enable_int8 = false; - size_t max_batch_size = 32; - size_t max_workspace_size = 1 << 30; - std::map> max_shape; - std::map> min_shape; - std::map> opt_shape; - std::string serialize_file = ""; - bool enable_pinned_memory = false; - void* external_stream_ = nullptr; -}; - std::vector toVec(const nvinfer1::Dims& dim); size_t TrtDataTypeSize(const nvinfer1::DataType& dtype); FDDataType GetFDDataType(const nvinfer1::DataType& dtype); @@ -97,8 +79,7 @@ class TrtBackend : public BaseBackend { bool InitFromOnnx(const std::string& model_file, const TrtBackendOption& option = TrtBackendOption(), bool from_memory_buffer = false); - bool Infer(std::vector& inputs, - std::vector* outputs, + bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; int NumInputs() const { return inputs_desc_.size(); } @@ -107,7 +88,7 @@ class TrtBackend : public BaseBackend { TensorInfo GetOutputInfo(int index); std::vector GetInputInfos() override; std::vector GetOutputInfos() override; - std::unique_ptr Clone(void *stream = nullptr, + std::unique_ptr Clone(void* stream = nullptr, int device_id = -1) override; ~TrtBackend() { diff --git a/fastdeploy/backends/tensorrt/utils.h b/fastdeploy/backends/tensorrt/utils.h index af62c445ee6..3d4c11f319e 100644 --- a/fastdeploy/backends/tensorrt/utils.h +++ b/fastdeploy/backends/tensorrt/utils.h @@ -32,17 +32,15 @@ namespace fastdeploy { struct FDInferDeleter { - template - void operator()(T* obj) const { + template void operator()(T* obj) const { if (obj) { delete obj; -// obj->destroy(); + // obj->destroy(); } } }; -template -using FDUniquePtr = std::unique_ptr; +template using FDUniquePtr = std::unique_ptr; int64_t Volume(const nvinfer1::Dims& d); @@ -72,17 +70,13 @@ std::ostream& operator<<(std::ostream& out, const std::vector& vec) { return out; } -template -class FDGenericBuffer { +template class FDGenericBuffer { public: //! //! \brief Construct an empty buffer. //! explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) - : mSize(0), - mCapacity(0), - mType(type), - mBuffer(nullptr), + : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr), mExternal_buffer(nullptr) {} //! @@ -104,9 +98,7 @@ class FDGenericBuffer { } FDGenericBuffer(FDGenericBuffer&& buf) - : mSize(buf.mSize), - mCapacity(buf.mCapacity), - mType(buf.mType), + : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType), mBuffer(buf.mBuffer) { buf.mSize = 0; buf.mCapacity = 0; @@ -133,7 +125,8 @@ class FDGenericBuffer { //! \brief Returns pointer to underlying array. //! void* data() { - if (mExternal_buffer != nullptr) return mExternal_buffer; + if (mExternal_buffer != nullptr) + return mExternal_buffer; return mBuffer; } @@ -141,7 +134,8 @@ class FDGenericBuffer { //! \brief Returns pointer to underlying array. //! const void* data() const { - if (mExternal_buffer != nullptr) return mExternal_buffer; + if (mExternal_buffer != nullptr) + return mExternal_buffer; return mBuffer; } @@ -213,8 +207,8 @@ class FDGenericBuffer { }; using FDDeviceBuffer = FDGenericBuffer; -using FDDeviceHostBuffer = FDGenericBuffer; +using FDDeviceHostBuffer = + FDGenericBuffer; class FDTrtLogger : public nvinfer1::ILogger { public: diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc index 21e3d30c394..420e03ff74d 100755 --- a/fastdeploy/core/fd_type.cc +++ b/fastdeploy/core/fd_type.cc @@ -56,6 +56,9 @@ std::string Str(const Device& d) { case Device::RKNPU: out = "Device::RKNPU"; break; + case Device::SOPHGOTPUD: + out = "Device::SOPHGOTPUD"; + break; case Device::IPU: out = "Device::IPU"; break; @@ -85,6 +88,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){ case Device::RKNPU: out << "Device::RKNPU"; break; + case Device::SOPHGOTPUD: + out << "Device::SOPHGOTPUD"; + break; case Device::TIMVX: out << "Device::TIMVX"; break; @@ -205,8 +211,10 @@ std::string Str(const ModelFormat& f) { return "ModelFormat::PADDLE"; } else if (f == ModelFormat::ONNX) { return "ModelFormat::ONNX"; - }else if (f == ModelFormat::RKNN) { + } else if (f == ModelFormat::RKNN) { return "ModelFormat::RKNN"; + } else if (f == ModelFormat::SOPHGO) { + return "ModelFormat::SOPHGO"; } else if (f == ModelFormat::TORCHSCRIPT) { return "ModelFormat::TORCHSCRIPT"; } @@ -220,6 +228,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { out << "ModelFormat::ONNX"; } else if (format == ModelFormat::RKNN) { out << "ModelFormat::RKNN"; + } else if (format == ModelFormat::SOPHGO) { + out << "ModelFormat::SOPHGO"; } else if (format == ModelFormat::TORCHSCRIPT) { out << "ModelFormat::TORCHSCRIPT"; } diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h index fda26c1c87a..5b49f1e8603 100755 --- a/fastdeploy/core/fd_type.h +++ b/fastdeploy/core/fd_type.h @@ -22,7 +22,8 @@ namespace fastdeploy { -enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND}; +enum FASTDEPLOY_DECL Device {CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND, + SOPHGOTPUD}; FASTDEPLOY_DECL std::string Str(const Device& d); @@ -72,6 +73,7 @@ enum ModelFormat { ONNX, ///< Model with ONNX format RKNN, ///< Model with RKNN format TORCHSCRIPT, ///< Model with TorchScript format + SOPHGO, ///< Model with SOPHGO format }; FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, diff --git a/fastdeploy/encryption.h b/fastdeploy/encryption.h new file mode 100755 index 00000000000..744bc317255 --- /dev/null +++ b/fastdeploy/encryption.h @@ -0,0 +1,20 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/core/config.h" +#ifdef ENABLE_ENCRYPTION +#include "fastdeploy/encryption/include/decrypt.h" +#include "fastdeploy/encryption/include/encrypt.h" +#endif diff --git a/fastdeploy/encryption/encryption_pybind.cc b/fastdeploy/encryption/encryption_pybind.cc new file mode 100755 index 00000000000..29935020469 --- /dev/null +++ b/fastdeploy/encryption/encryption_pybind.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindEncryption(pybind11::module& m) { + m.def("encrypt", [](const std::string& input, const std::string& key) { + return Encrypt(input, key); + }); + m.def("decrypt", [](const std::string& cipher, const std::string& key) { + return Decrypt(cipher, key); + }); + m.def("generate_key", []() { + return GenerateRandomKey(); + }); +} +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/include/decrypt.h b/fastdeploy/encryption/include/decrypt.h new file mode 100755 index 00000000000..35d88c6f55c --- /dev/null +++ b/fastdeploy/encryption/include/decrypt.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#include "fastdeploy/utils/utils.h" + +#ifndef PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_DECRYPT_H +#define PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_DECRYPT_H +namespace fastdeploy { +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief check stream is encrypted or not + * + * \param[in] cipher_stream The encrypted stream + * \return 0 if stream is encrypted. + */ +FASTDEPLOY_DECL int CheckStreamEncrypted(std::istream& cipher_stream); + + +/** \brief decrypt an encrypted stream + * + * \param[in] cipher_stream The encrypted stream + * \param[in] plain_stream The decrypted stream + * \param[in] key_base64 The key for decryption + * \return 0 if decrypt success. + */ +FASTDEPLOY_DECL int DecryptStream(std::istream& cipher_stream, + std::ostream& plain_stream, + const std::string& key_base64); + + +/** \brief decrypt an encrypted string + * + * \param[in] cipher The encrypted string + * \param[in] key The key for decryption + * \return The decrypted string + */ +FASTDEPLOY_DECL std::string Decrypt(const std::string& cipher, + const std::string& key); +#ifdef __cplusplus +} +#endif +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_DECRYPT_H diff --git a/fastdeploy/encryption/include/encrypt.h b/fastdeploy/encryption/include/encrypt.h new file mode 100755 index 00000000000..0357a429c3e --- /dev/null +++ b/fastdeploy/encryption/include/encrypt.h @@ -0,0 +1,58 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#include "fastdeploy/utils/utils.h" + +#ifndef PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_ENCRYPT_H +#define PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_ENCRYPT_H +namespace fastdeploy { +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief generate a random key(base64-32bytes) for an encrypted model + * + * \return std::string key + */ +FASTDEPLOY_DECL std::string GenerateRandomKey(); + +/** \brief encrypt a std::istream with key + * + * \param[in] keydata The key(base64-32bytes) for encryption + * \param[in] in_stream The plain stream + * \param[in] out_stream The ecrypted stream + * \return true if encrypt successed, otherwise false + */ +FASTDEPLOY_DECL int EncryptStream(const std::string &keydata, + std::istream& in_stream, + std::ostream& out_stream); + +/** \brief encrypt a string with key + * + * \param[in] input The input string for encryption + * \param[in] key If not given by user, generate key automatically. + * \return std::vector [encrypted string, key] + */ +FASTDEPLOY_DECL std::vector Encrypt(const std::string& input, + const std::string& key = GenerateRandomKey()); + +#ifdef __cplusplus +} +#endif +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_API_PADDLE_MODEL_ENCRYPT_H diff --git a/fastdeploy/encryption/include/model_code.h b/fastdeploy/encryption/include/model_code.h new file mode 100755 index 00000000000..4ff904e8cf1 --- /dev/null +++ b/fastdeploy/encryption/include/model_code.h @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef DYGRAPH_DEPLOY_CPP_ENCRYPTION_INCLUDE_MODEL_CODE_H_ +#define DYGRAPH_DEPLOY_CPP_ENCRYPTION_INCLUDE_MODEL_CODE_H_ +namespace fastdeploy { +#ifdef __cplusplus +extern "C" { +#endif + +enum { + CODE_OK = 0, + CODE_OPEN_FAILED = 100, + CODE_READ_FILE_PTR_IS_NULL = 101, + CODE_AES_GCM_ENCRYPT_FIALED = 102, + CODE_AES_GCM_DECRYPT_FIALED = 103, + CODE_KEY_NOT_MATCH = 104, + CODE_KEY_LENGTH_ABNORMAL = 105, + CODE_NOT_EXIST_DIR = 106, + CODE_FILES_EMPTY_WITH_DIR = 107, + CODE_MODEL_FILE_NOT_EXIST = 108, + CODE_PARAMS_FILE_NOT_EXIST = 109, + CODE_MODEL_YML_FILE_NOT_EXIST = 110, + CODE_MKDIR_FAILED = 111 +}; + +#ifdef __cplusplus +} +#endif +} // namespace fastdeploy +#endif // DYGRAPH_DEPLOY_CPP_ENCRYPTION_INCLUDE_MODEL_CODE_H_ diff --git a/fastdeploy/encryption/src/decrypt.cc b/fastdeploy/encryption/src/decrypt.cc new file mode 100755 index 00000000000..9fa917c6418 --- /dev/null +++ b/fastdeploy/encryption/src/decrypt.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include +#include + +#include "fastdeploy/encryption/include/decrypt.h" +#include "fastdeploy/encryption/include/model_code.h" +#include "fastdeploy/encryption/util/include/crypto/aes_gcm.h" +#include "fastdeploy/encryption/util/include/crypto/base64.h" +#include "fastdeploy/encryption/util/include/io_utils.h" +#include "fastdeploy/encryption/util/include/log.h" +#include "fastdeploy/encryption/util/include/constant/constant_model.h" +#include "fastdeploy/encryption/util/include/system_utils.h" + +namespace fastdeploy { +/** + * 0 - encrypted + * 1 - unencrypt + */ +int CheckStreamEncrypted(std::istream& cipher_stream) { + return util::SystemUtils::check_file_encrypted(cipher_stream); +} + +int DecryptStream(std::istream& cipher_stream, + std::ostream& plain_stream, + const std::string& key_base64) { + int ret = CheckStreamEncrypted(cipher_stream); + if (ret != CODE_OK) { + LOGD("[M]check file encrypted failed, code: %d", ret); + return ret; + } + + std::string key_str = + baidu::base::base64::base64_decode(key_base64.c_str()); + int ret_check = util::SystemUtils::check_key_match(key_str, cipher_stream); + if (ret_check != CODE_OK) { + LOGD("[M]check key failed in decrypt_file, code: %d", ret_check); + return CODE_KEY_NOT_MATCH; + } + + std::string aes_key = key_str.substr(0, AES_GCM_KEY_LENGTH); + std::string aes_iv = key_str.substr(16, AES_GCM_IV_LENGTH); + + cipher_stream.seekg(0, std::ios::beg); + cipher_stream.seekg(0, std::ios::end); + int data_len = cipher_stream.tellg(); + cipher_stream.seekg(0, std::ios::beg); + size_t pos = constant::MAGIC_NUMBER_LEN + + constant::VERSION_LEN + constant::TAG_LEN; + + size_t cipher_len = data_len - pos; + std::unique_ptr model_cipher( + new unsigned char[cipher_len]); + cipher_stream.seekg(pos); // skip header + cipher_stream.read(reinterpret_cast(model_cipher.get()), + cipher_len); + + size_t plain_len = data_len - AES_GCM_TAG_LENGTH - pos; + std::unique_ptr model_plain(new unsigned char[plain_len]); + + int ret_decrypt_file = util::crypto::AesGcm::decrypt_aes_gcm( + model_cipher.get(), + cipher_len, + reinterpret_cast(aes_key.c_str()), + reinterpret_cast(aes_iv.c_str()), + model_plain.get(), + reinterpret_cast(plain_len)); + + if (ret_decrypt_file != CODE_OK) { + LOGD("[M]decrypt file failed, decrypt ret = %d", ret_decrypt_file); + return ret_decrypt_file; + } + + plain_stream.write(reinterpret_cast(model_plain.get()), + plain_len); + + return CODE_OK; +} + +std::string Decrypt(const std::string& cipher, + const std::string& key) { + std::string input = baidu::base::base64::base64_decode(cipher); + std::istringstream isst_cipher(input); + std::ostringstream osst_plain; + int ret = DecryptStream(isst_cipher, osst_plain, key); + if (ret != 0) { + FDERROR << ret << ", Failed decrypt " << std::endl; + return ""; + } + return osst_plain.str(); +} + +} //namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/src/encrypt.cc b/fastdeploy/encryption/src/encrypt.cc new file mode 100755 index 00000000000..942e88a597d --- /dev/null +++ b/fastdeploy/encryption/src/encrypt.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + +#include "fastdeploy/encryption/include/model_code.h" +#include "fastdeploy/encryption/include/encrypt.h" +#include "fastdeploy/encryption/util/include/constant/constant_model.h" +#include "fastdeploy/encryption/util/include/crypto/aes_gcm.h" +#include "fastdeploy/encryption/util/include/crypto/sha256_utils.h" +#include "fastdeploy/encryption/util/include/crypto/base64.h" +#include "fastdeploy/encryption/util/include/system_utils.h" +#include "fastdeploy/encryption/util/include/io_utils.h" +#include "fastdeploy/encryption/util/include/log.h" + +namespace fastdeploy { + +std::string GenerateRandomKey() { + std::string tmp = util::SystemUtils::random_key_iv(AES_GCM_KEY_LENGTH); + // return util::crypto::Base64Utils::encode(tmp); + return baidu::base::base64::base64_encode(tmp); +} + +int EncryptStream(std::istream& in_stream, std::ostream& out_stream, + const std::string &keydata) { + std::string key_str = baidu::base::base64::base64_decode(keydata); + if (key_str.length() != 32) { + return CODE_KEY_LENGTH_ABNORMAL; + } + + in_stream.seekg(0, std::ios::beg); + in_stream.seekg(0, std::ios::end); + size_t plain_len = in_stream.tellg(); + in_stream.seekg(0, std::ios::beg); + + std::unique_ptr plain(new unsigned char[plain_len]); + in_stream.read(reinterpret_cast(plain.get()), plain_len); + + std::string aes_key = key_str.substr(0, AES_GCM_KEY_LENGTH); + std::string aes_iv = key_str.substr(16, AES_GCM_IV_LENGTH); + + std::unique_ptr cipher( + new unsigned char[plain_len + AES_GCM_TAG_LENGTH]); + size_t cipher_len = 0; + int ret_encrypt = util::crypto::AesGcm::encrypt_aes_gcm( + plain.get(), + plain_len, + reinterpret_cast(aes_key.c_str()), + reinterpret_cast(aes_iv.c_str()), + cipher.get(), + reinterpret_cast(cipher_len)); + if (ret_encrypt != CODE_OK) { + LOGD("[M]aes encrypt ret code: %d", ret_encrypt); + return CODE_AES_GCM_ENCRYPT_FIALED; + } + + std::string randstr = util::SystemUtils::random_str(constant::TAG_LEN); + std::string aes_key_iv(key_str); + std::string sha256_key_iv = + util::crypto::SHA256Utils::sha256_string(aes_key_iv); + for (int i = 0; i < 64; ++i) { + randstr[i] = sha256_key_iv[i]; + } + + size_t header_len = constant::MAGIC_NUMBER_LEN + + constant::VERSION_LEN + constant::TAG_LEN; + out_stream.write(constant::MAGIC_NUMBER.c_str(), + constant::MAGIC_NUMBER_LEN); + out_stream.write(constant::VERSION.c_str(), constant::VERSION_LEN); + out_stream.write(randstr.c_str(), constant::TAG_LEN); + out_stream.write(reinterpret_cast(cipher.get()), cipher_len); + + return CODE_OK; +} + +std::vector Encrypt(const std::string& input, + const std::string& key) { + + std::istringstream isst(input); + std::ostringstream osst; + int ret = EncryptStream(isst, osst, key); + if (ret != 0) { + FDERROR << ret << ", Failed encrypt " << std::endl; + return {"", ""}; + } + + return {baidu::base::base64::base64_encode(osst.str()), key}; +} + +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/include/constant/constant_model.h b/fastdeploy/encryption/util/include/constant/constant_model.h new file mode 100755 index 00000000000..aaebf52ba0f --- /dev/null +++ b/fastdeploy/encryption/util/include/constant/constant_model.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#ifndef PADDLE_MODEL_PROTECT_CONSTANT_CONSTANT_MODEL_H +#define PADDLE_MODEL_PROTECT_CONSTANT_CONSTANT_MODEL_H +namespace fastdeploy { +namespace constant { + +const static std::string MAGIC_NUMBER = "PADDLE"; // NOLINT +const static std::string VERSION = "1"; // NOLINT + +const static int MAGIC_NUMBER_LEN = 6; // NOLINT +const static int VERSION_LEN = 1; // NOLINT +const static int TAG_LEN = 128; // NOLINT + +} // namespace constant +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_CONSTANT_CONSTANT_MODEL_H diff --git a/fastdeploy/encryption/util/include/crypto/aes_gcm.h b/fastdeploy/encryption/util/include/crypto/aes_gcm.h new file mode 100755 index 00000000000..2c86f98b30f --- /dev/null +++ b/fastdeploy/encryption/util/include/crypto/aes_gcm.h @@ -0,0 +1,130 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef PADDLE_MODEL_PROTECT_UTIL_CRYPTO_AES_GCM_H +#define PADDLE_MODEL_PROTECT_UTIL_CRYPTO_AES_GCM_H + +#include +#include +#include +#include + +#include "fastdeploy/encryption/util/include/crypto/basic.h" + +namespace fastdeploy { +namespace util { +namespace crypto { +// aes key 32 byte for 256 bit +#define AES_GCM_KEY_LENGTH 32 + +// aes tag 16 byte for 128 bit +#define AES_GCM_TAG_LENGTH 16 + +// aes iv 12 byte for 96 bit +#define AES_GCM_IV_LENGTH 16 + +class AesGcm { + public: + /** + * \brief initial aes-gcm-256 context use key & iv + * + * \note initial aes-gcm-256 context use key & iv. gcm mode + * will generate a tag(16 byte), so the ciphertext's length + * should be longer 16 byte than plaintext. + * + * + * \param plaintext plain text to be encrypted(in) + * \param len plain text's length(in) + * \param key aes key (in) + * \param iv aes iv (in) + * \param ciphertext encrypted text(out) + * \param out_len encrypted length(out) + * + * \return return 0 if successful + * -1 EVP_CIPHER_CTX_new or aes_gcm_key error + * -2 EVP_EncryptUpdate error + * -3 EVP_EncryptFinal_ex error + * -4 EVP_CIPHER_CTX_ctrl error + */ + static int encrypt_aes_gcm(const unsigned char* plaintext, const int& len, + const unsigned char* key, const unsigned char* iv, + unsigned char* ciphertext, + int& out_len); // NOLINT + /** + * \brief encrypt using aes-gcm-256 + * + * \note encrypt using aes-gcm-256 + * + * \param ciphertext cipher text to be decrypted(in) + * \param len plain text's length(in) + * \param key aes key (in) + * \param iv aes iv (in) + * \param plaintext decrypted text(out) + * \param out_len decrypted length(out) + * + * \return return 0 if successful + * -1 EVP_CIPHER_CTX_new or aes_gcm_key error + * -2 EVP_DecryptUpdate error + * -3 EVP_CIPHER_CTX_ctrl error + * -4 EVP_DecryptFinal_ex error + */ + static int decrypt_aes_gcm(const unsigned char* ciphertext, const int& len, + const unsigned char* key, const unsigned char* iv, + unsigned char* plaintext, int& out_len); // NOLINT + + private: + /** + * \brief initial aes-gcm-256 context use key & iv + * + * \note initial aes-gcm-256 context use key & iv + * + * \param key aes key (in) + * \param iv aes iv (in) + * \param e_ctx encryption context(out) + * \param d_ctx decryption context(out) + * + * \return return 0 if successful + * -1 EVP_xxcryptInit_ex error + * -2 EVP_CIPHER_CTX_ctrl error + * -3 EVP_xxcryptInit_ex error + */ + static int aes_gcm_key(const unsigned char* key, const unsigned char* iv, + EVP_CIPHER_CTX* e_ctx, EVP_CIPHER_CTX* d_ctx); + + /** + * \brief initial aes-gcm-256 context use key & iv + * + * \note initial aes-gcm-256 context use key & iv + * + * \param key aes key (in) + * \param iv aes iv (in) + * \param e_ctx encryption context(out) + * \param d_ctx decryption context(out) + * + * \return return 0 if successful + * -1 EVP_xxcryptInit_ex error + * -2 EVP_CIPHER_CTX_ctrl error + * -3 EVP_xxcryptInit_ex error + * -4 invalid key length or iv length + * -5 hex_to_byte error + */ + static int aes_gcm_key(const std::string& key_hex, const std::string& iv_hex, + EVP_CIPHER_CTX* e_ctx, EVP_CIPHER_CTX* d_ctx); +}; + +} // namespace crypto +} // namespace util +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_UTIL_CRYPTO_AES_GCM_H diff --git a/fastdeploy/encryption/util/include/crypto/base64.h b/fastdeploy/encryption/util/include/crypto/base64.h new file mode 100755 index 00000000000..a1100d63ef8 --- /dev/null +++ b/fastdeploy/encryption/util/include/crypto/base64.h @@ -0,0 +1,33 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#ifndef PADDLE_MODEL_PROTECT_UTIL_CRYPTO_BASE64_UTILS_H +#define PADDLE_MODEL_PROTECT_UTIL_CRYPTO_BASE64_UTILS_H +namespace fastdeploy { +namespace baidu { +namespace base { +namespace base64 { + +std::string base64_encode(const std::string& input); +std::string base64_decode(const std::string& input); + +} // namespace base64 +} // namespace base +} // namespace baidu +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_BASE64_UTILS_H diff --git a/fastdeploy/encryption/util/include/crypto/basic.h b/fastdeploy/encryption/util/include/crypto/basic.h new file mode 100755 index 00000000000..ed683aa6e09 --- /dev/null +++ b/fastdeploy/encryption/util/include/crypto/basic.h @@ -0,0 +1,78 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef PADDLE_MODEL_PROTECT_UTIL_BASIC_H +#define PADDLE_MODEL_PROTECT_UTIL_BASIC_H + +#include +#include +#include +#include +#include +namespace fastdeploy { +namespace util { +namespace crypto { + +class Basic { + public: + /** + * \brief byte to hex + * + * \note byte to hex. + * + * + * \param in_byte byte array(in) + * \param len byte array length(in) + * \param out_hex the hex string(in) + * + * + * \return return 0 if successful + */ + static int byte_to_hex(const unsigned char* in_byte, int len, + std::string& out_hex); // NOLINT + + /** + * \brief hex to byte + * + * \note hex to byte. + * + * + * \param in_hex the hex string(in) + * \param out_byte byte array(out) + * + * \return return 0 if successful + * -1 invalid in_hex + */ + static int hex_to_byte(const std::string& in_hex, unsigned char* out_byte); + + /** + * \brief get random char for length + * + * \note get random char for length + * + * + * \param array to be random(out) + * \param len array length(in) + * + * \return return 0 if successful + * -1 invalid parameters + */ + static int random(unsigned char* random, int len); +}; + +} // namespace crypto +} // namespace util +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_UTIL_BASIC_H diff --git a/fastdeploy/encryption/util/include/crypto/sha256_utils.h b/fastdeploy/encryption/util/include/crypto/sha256_utils.h new file mode 100755 index 00000000000..61718ab27ed --- /dev/null +++ b/fastdeploy/encryption/util/include/crypto/sha256_utils.h @@ -0,0 +1,40 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#ifndef PADDLE_MODEL_PROTECT_UTIL_CRYPTO_SHA256_UTILS_H +#define PADDLE_MODEL_PROTECT_UTIL_CRYPTO_SHA256_UTILS_H +namespace fastdeploy { +namespace util { +namespace crypto { + +class SHA256Utils { + public: + static void sha256(const void* data, size_t len, unsigned char* md); + static std::vector sha256(const void* data, size_t len); + static std::vector sha256( + const std::vector& data); + static std::string sha256_string(const void* data, size_t len); + static std::string sha256_string(const std::vector& data); + static std::string sha256_string(const std::string& string); + static std::string sha256_file(const std::string& path); +}; + +} // namespace crypto +} // namespace util +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_UTIL_CRYPTO_SHA256_UTILS_H diff --git a/fastdeploy/encryption/util/include/io_utils.h b/fastdeploy/encryption/util/include/io_utils.h new file mode 100755 index 00000000000..836668e5f21 --- /dev/null +++ b/fastdeploy/encryption/util/include/io_utils.h @@ -0,0 +1,53 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#ifndef PADDLE_MODEL_PROTECT_IO_UTILS_H +#define PADDLE_MODEL_PROTECT_IO_UTILS_H +namespace fastdeploy { +namespace ioutil { + +int read_file(const char* file_path, unsigned char** dataptr, size_t* sizeptr); + +int read_with_pos_and_length(const char* file_path, unsigned char* dataptr, + size_t pos, size_t length); + +int read_with_pos(const char* file_path, size_t pos, unsigned char** dataptr, + size_t* sizeptr); + +int write_file(const char* file_path, const unsigned char* dataptr, + size_t sizeptr); + +int append_file(const char* file_path, const unsigned char* data, size_t len); + +size_t read_file_size(const char* file_path); + +int read_file_to_file(const char* src_path, const char* dst_path); + +int dir_exist_or_mkdir(const char* dir); + +/** + * @return files.size() + */ +int read_dir_files(const char* dir_path, + std::vector& files); // NOLINT + +} // namespace ioutil +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_IO_UTILS_H diff --git a/fastdeploy/encryption/util/include/log.h b/fastdeploy/encryption/util/include/log.h new file mode 100755 index 00000000000..361d44dd828 --- /dev/null +++ b/fastdeploy/encryption/util/include/log.h @@ -0,0 +1,24 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef PADDLE_MODEL_PROTECT_UTIL_LOG_H +#define PADDLE_MODEL_PROTECT_UTIL_LOG_H + +#include +namespace fastdeploy { +#define LOGD(fmt, ...) \ + printf("{%s:%u}:" fmt "\n", __FUNCTION__, __LINE__, ##__VA_ARGS__) +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_UTIL_LOG_H diff --git a/fastdeploy/encryption/util/include/system_utils.h b/fastdeploy/encryption/util/include/system_utils.h new file mode 100755 index 00000000000..96d0ee20bf5 --- /dev/null +++ b/fastdeploy/encryption/util/include/system_utils.h @@ -0,0 +1,42 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#ifndef PADDLE_MODEL_PROTECT_SYSTEM_UTIL_H +#define PADDLE_MODEL_PROTECT_SYSTEM_UTIL_H +namespace fastdeploy { +namespace util { + +class SystemUtils { + public: + static std::string random_key_iv(int len); + static std::string random_str(int len); + static int check_key_match(const char* key, const char* filepath); + static int check_key_match(const std::string& key, + std::istream& cipher_stream); + static int check_file_encrypted(const char* filepath); + static int check_file_encrypted(std::istream& cipher_stream); + static int check_pattern_exist(const std::vector& vecs, + const std::string& pattern); + + private: + inline static int intN(int n); +}; + +} // namespace util +} // namespace fastdeploy +#endif // PADDLE_MODEL_PROTECT_SYSTEM_UTIL_H diff --git a/fastdeploy/encryption/util/src/crypto/aes_gcm.cc b/fastdeploy/encryption/util/src/crypto/aes_gcm.cc new file mode 100755 index 00000000000..37c9a3c4544 --- /dev/null +++ b/fastdeploy/encryption/util/src/crypto/aes_gcm.cc @@ -0,0 +1,193 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "fastdeploy/encryption/util/include/crypto/aes_gcm.h" +namespace fastdeploy { +namespace util { +namespace crypto { + +int AesGcm::aes_gcm_key(const unsigned char* key, const unsigned char* iv, + EVP_CIPHER_CTX* e_ctx, EVP_CIPHER_CTX* d_ctx) { + int ret = 0; + if (e_ctx != NULL) { + ret = EVP_EncryptInit_ex(e_ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); + if (ret != 1) { + return -1; + } + ret = EVP_CIPHER_CTX_ctrl(e_ctx, EVP_CTRL_GCM_SET_IVLEN, AES_GCM_IV_LENGTH, + NULL); + if (ret != 1) { + return -2; + } + ret = EVP_EncryptInit_ex(e_ctx, NULL, NULL, key, iv); + if (ret != 1) { + return -3; + } + } + // initial decrypt ctx + if (d_ctx != NULL) { + ret = EVP_DecryptInit_ex(d_ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); + if (!ret) { + return -1; + } + ret = EVP_CIPHER_CTX_ctrl(d_ctx, EVP_CTRL_GCM_SET_IVLEN, AES_GCM_IV_LENGTH, + NULL); + if (!ret) { + return -2; + } + ret = EVP_DecryptInit_ex(d_ctx, NULL, NULL, key, iv); + if (!ret) { + return -3; + } + } + return 0; +} + +int AesGcm::aes_gcm_key(const std::string& key_hex, const std::string& iv_hex, + EVP_CIPHER_CTX* e_ctx, EVP_CIPHER_CTX* d_ctx) { + // check key_hex and iv_hex length + if (key_hex.length() != AES_GCM_KEY_LENGTH * 2 || + iv_hex.length() != AES_GCM_IV_LENGTH * 2) { + return -4; + } + + unsigned char key[AES_GCM_KEY_LENGTH]; + unsigned char iv[AES_GCM_IV_LENGTH]; + + int ret = Basic::hex_to_byte(key_hex, key); + if (ret < 0) { + return -5; + } + ret = Basic::hex_to_byte(iv_hex, iv); + if (ret < 0) { + return -5; + } + return aes_gcm_key(key, iv, e_ctx, d_ctx); +} + +int AesGcm::encrypt_aes_gcm(const unsigned char* plaintext, const int& len, + const unsigned char* key, const unsigned char* iv, + unsigned char* ciphertext, int& out_len) { + EVP_CIPHER_CTX* ctx = NULL; + int ret = 0; + int update_len = 0; + int ciphertext_len = 0; + unsigned char tag_char[AES_GCM_TAG_LENGTH]; + + if (!(ctx = EVP_CIPHER_CTX_new())) { + return -1; + } + // initial context + ret = aes_gcm_key(key, iv, ctx, NULL); + if (ret) { + EVP_CIPHER_CTX_free(ctx); + return -1; + } + // encryption + ret = EVP_EncryptUpdate(ctx, ciphertext, &update_len, plaintext, len); + if (ret != 1) { + EVP_CIPHER_CTX_free(ctx); + return -2; + } + ciphertext_len = update_len; + + ret = EVP_EncryptFinal_ex(ctx, ciphertext + ciphertext_len, &update_len); + if (1 != ret) { + EVP_CIPHER_CTX_free(ctx); + return -3; + } + ciphertext_len += update_len; + + // Get the tags for authentication + ret = EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_GET_TAG, AES_GCM_TAG_LENGTH, + tag_char); + if (1 != ret) { + EVP_CIPHER_CTX_free(ctx); + return -4; + } + + EVP_CIPHER_CTX_free(ctx); + + // append the tags to the end of encryption text + for (int i = 0; i < AES_GCM_TAG_LENGTH; ++i) { + ciphertext[ciphertext_len + i] = tag_char[i]; + } + out_len = ciphertext_len + AES_GCM_TAG_LENGTH; + + return 0; +} + +int AesGcm::decrypt_aes_gcm(const unsigned char* ciphertext, const int& len, + const unsigned char* key, const unsigned char* iv, + unsigned char* plaintext, int& out_len) { + EVP_CIPHER_CTX* ctx = NULL; + int ret = 0; + int update_len = 0; + int cipher_len = 0; + int plaintext_len = 0; + unsigned char tag_char[AES_GCM_TAG_LENGTH]; + + // get the tag at the end of ciphertext + for (int i = 0; i < AES_GCM_TAG_LENGTH; ++i) { + tag_char[i] = ciphertext[len - AES_GCM_TAG_LENGTH + i]; + } + cipher_len = len - AES_GCM_TAG_LENGTH; + + // initial aes context + if (!(ctx = EVP_CIPHER_CTX_new())) { + return -1; + } + + ret = aes_gcm_key(key, iv, NULL, ctx); + if (ret) { + EVP_CIPHER_CTX_free(ctx); + return -1; + } + + // decryption + ret = EVP_DecryptUpdate(ctx, plaintext, &update_len, ciphertext, cipher_len); + if (ret != 1) { + EVP_CIPHER_CTX_free(ctx); + return -2; + } + plaintext_len = update_len; + + // check if the tag is equal to the decrption tag + ret = EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_TAG, AES_GCM_TAG_LENGTH, + tag_char); + if (!ret) { + EVP_CIPHER_CTX_free(ctx); + // decrption failed + return -3; + } + + ret = EVP_DecryptFinal_ex(ctx, plaintext + update_len, &update_len); + if (ret <= 0) { + EVP_CIPHER_CTX_free(ctx); + return -4; + } + + plaintext_len += update_len; + + EVP_CIPHER_CTX_free(ctx); + + out_len = plaintext_len; + return 0; +} + +} // namespace crypto +} // namespace util +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/src/crypto/base64.cc b/fastdeploy/encryption/util/src/crypto/base64.cc new file mode 100755 index 00000000000..a37f705c784 --- /dev/null +++ b/fastdeploy/encryption/util/src/crypto/base64.cc @@ -0,0 +1,133 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/encryption/util/include/crypto/base64.h" + + +namespace fastdeploy { +namespace baidu { +namespace base { +namespace base64 { +using std::string; +namespace { +const string base64_chars = // NOLINT + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +inline bool is_base64(unsigned char c) { + return isalnum(c) || (c == '+') || (c == '/'); +} + +inline size_t encode_len(size_t input_len) { return (input_len + 2) / 3 * 4; } + +void encode_char_array(unsigned char *encode_block, + const unsigned char *decode_block) { + encode_block[0] = (decode_block[0] & 0xfc) >> 2; + encode_block[1] = + ((decode_block[0] & 0x03) << 4) + ((decode_block[1] & 0xf0) >> 4); + encode_block[2] = + ((decode_block[1] & 0x0f) << 2) + ((decode_block[2] & 0xc0) >> 6); + encode_block[3] = decode_block[2] & 0x3f; +} + +void decode_char_array(unsigned char *encode_block, + unsigned char *decode_block) { + for (int i = 0; i < 4; ++i) { + encode_block[i] = base64_chars.find(encode_block[i]); + } + decode_block[0] = (encode_block[0] << 2) + ((encode_block[1] & 0x30) >> 4); + decode_block[1] = + ((encode_block[1] & 0xf) << 4) + ((encode_block[2] & 0x3c) >> 2); + decode_block[2] = ((encode_block[2] & 0x3) << 6) + encode_block[3]; +} +} // namespace + +string base64_encode(const string &input) { + string output; + size_t i = 0; + unsigned char decode_block[3]; + unsigned char encode_block[4]; + + for (string::size_type len = 0; len != input.size(); ++len) { + decode_block[i++] = input[len]; + if (i == 3) { + encode_char_array(encode_block, decode_block); + for (i = 0; i < 4; ++i) { + output += base64_chars[encode_block[i]]; + } + i = 0; + } + } + + if (i > 0) { + for (size_t j = i; j < 3; ++j) { + decode_block[j] = '\0'; + } + + encode_char_array(encode_block, decode_block); + + for (size_t j = 0; j < i + 1; ++j) { + output += base64_chars[encode_block[j]]; + } + + while (i++ < 3) { + output += '='; + } + } + + return output; +} + +string base64_decode(const string &encoded_string) { + int in_len = encoded_string.size(); + int i = 0; + int len = 0; + unsigned char encode_block[4]; + unsigned char decode_block[3]; + string output; + + while (in_len-- && (encoded_string[len] != '=') && + is_base64(encoded_string[len])) { + encode_block[i++] = encoded_string[len]; + len++; + if (i == 4) { + decode_char_array(encode_block, decode_block); + + for (int j = 0; j < 3; ++j) { + output += decode_block[j]; + } + i = 0; + } + } + + if (i > 0) { + for (int j = i; j < 4; ++j) { + encode_block[j] = 0; + } + + decode_char_array(encode_block, decode_block); + + for (int j = 0; j < i - 1; ++j) { + output += decode_block[j]; + } + } + + return output; +} + +} // namespace base64 +} // namespace base +} // namespace baidu +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/src/crypto/basic.cc b/fastdeploy/encryption/util/src/crypto/basic.cc new file mode 100755 index 00000000000..4c919463192 --- /dev/null +++ b/fastdeploy/encryption/util/src/crypto/basic.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/encryption/util/include/crypto/basic.h" +namespace fastdeploy { +namespace util { +namespace crypto { + +int Basic::byte_to_hex(const unsigned char* in_byte, int len, + std::string& out_hex) { + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (int i = 0; i < len; ++i) { + oss << std::setw(2) << int(in_byte[i]); + } + out_hex = oss.str(); + return 0; +} + +int Basic::hex_to_byte(const std::string& in_hex, unsigned char* out_byte) { + int i = 0; + int j = 0; + int len = in_hex.length() / 2; + const unsigned char* hex; + if (in_hex.length() % 2 != 0 || out_byte == NULL) { + return -1; + } + hex = (unsigned char*)in_hex.c_str(); + + for (; j < len; i += 2, ++j) { + unsigned char high = hex[i]; + unsigned char low = hex[i + 1]; + if (high >= '0' && high <= '9') { + high = high - '0'; + } else if (high >= 'A' && high <= 'F') { + high = high - 'A' + 10; + } else if (high >= 'a' && high <= 'f') { + high = high - 'a' + 10; + } else { + return -2; + } + + if (low >= '0' && low <= '9') { + low = low - '0'; + } else if (low >= 'A' && low <= 'F') { + low = low - 'A' + 10; + } else if (low >= 'a' && low <= 'f') { + low = low - 'a' + 10; + } else { + return -2; + } + out_byte[j] = high << 4 | low; + } + return 0; +} + +int Basic::random(unsigned char* random, int len) { + std::random_device rd; + int i = 0; + if (len <= 0 || random == NULL) { + return -1; + } + for (; i < len; ++i) { + random[i] = rd() % 256; + } + return 0; +} + +} // namespace crypto +} // namespace util +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/src/crypto/sha256_utils.cc b/fastdeploy/encryption/util/src/crypto/sha256_utils.cc new file mode 100755 index 00000000000..35e8d904f95 --- /dev/null +++ b/fastdeploy/encryption/util/src/crypto/sha256_utils.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include + +#include "fastdeploy/encryption/util/include/crypto/sha256_utils.h" +namespace fastdeploy { +namespace util { +namespace crypto { + +void SHA256Utils::sha256(const void* data, size_t len, unsigned char* md) { + SHA256_CTX sha_ctx = {}; + SHA256_Init(&sha_ctx); + SHA256_Update(&sha_ctx, data, len); + SHA256_Final(md, &sha_ctx); +} +std::vector SHA256Utils::sha256(const void* data, size_t len) { + std::vector md(32); + sha256(data, len, &md[0]); + return md; +} +std::vector SHA256Utils::sha256( + const std::vector& data) { + return sha256(&data[0], data.size()); +} +std::string SHA256Utils::sha256_string(const void* data, size_t len) { + std::vector md = sha256(data, len); + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (unsigned char c : md) { + oss << std::setw(2) << int(c); + } + return oss.str(); +} +std::string SHA256Utils::sha256_string(const std::vector& data) { + return sha256_string(&data[0], data.size()); +} +std::string SHA256Utils::sha256_string(const std::string& string) { + return sha256_string(string.c_str(), string.size()); +} +std::string SHA256Utils::sha256_file(const std::string& path) { + FILE* file = fopen(path.c_str(), "rb"); + if (!file) { + return ""; + } + unsigned char hash[SHA256_DIGEST_LENGTH]; + SHA256_CTX sha_ctx = {}; + SHA256_Init(&sha_ctx); + const int size = 32768; + void* buffer = malloc(size); + if (!buffer) { + fclose(file); + return ""; + } + int read = 0; + while ((read = fread(buffer, 1, size, file))) { + SHA256_Update(&sha_ctx, buffer, read); + } + SHA256_Final(hash, &sha_ctx); + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (unsigned char c : hash) { + oss << std::setw(2) << int(c); + } + fclose(file); + free(buffer); + return oss.str(); +} + +} // namespace crypto +} // namespace util +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/src/io_utils.cc b/fastdeploy/encryption/util/src/io_utils.cc new file mode 100755 index 00000000000..e152541648d --- /dev/null +++ b/fastdeploy/encryption/util/src/io_utils.cc @@ -0,0 +1,225 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef LINUX +#include +#include +#endif +#ifdef WIN32 +#include +#include +#endif + +#include +#include +#include +#include + +#include "fastdeploy/encryption/util/include/io_utils.h" +#include "fastdeploy/encryption/include/model_code.h" +#include "fastdeploy/encryption/util/include/log.h" +namespace fastdeploy { +namespace ioutil { + +int read_file(const char* file_path, unsigned char** dataptr, size_t* sizeptr) { + FILE* fp = NULL; + fp = fopen(file_path, "rb"); + if (fp == NULL) { + LOGD("[M]open file(%s) failed", file_path); + return CODE_OPEN_FAILED; + } + + fseek(fp, 0, SEEK_END); + *sizeptr = ftell(fp); + *dataptr = (unsigned char*)malloc(sizeof(unsigned char) * (*sizeptr)); + + fseek(fp, 0, SEEK_SET); + fread(*dataptr, 1, *sizeptr, fp); + fclose(fp); + + return CODE_OK; +} + +int read_with_pos_and_length(const char* file_path, unsigned char* dataptr, + size_t pos, size_t length) { + if (dataptr == NULL) { + LOGD("Read file pos dataptr = NULL"); + return CODE_READ_FILE_PTR_IS_NULL; + } + + FILE* fp = NULL; + if ((fp = fopen(file_path, "rb")) == NULL) { + LOGD("[M]open file(%s) failed", file_path); + return CODE_OPEN_FAILED; + } + + fseek(fp, pos, SEEK_SET); + fread(dataptr, 1, length, fp); + fclose(fp); + + return CODE_OK; +} + +int read_with_pos(const char* file_path, size_t pos, unsigned char** dataptr, + size_t* sizeptr) { + FILE* fp = NULL; + if ((fp = fopen(file_path, "rb")) == NULL) { + LOGD("[M]open file(%s) failed when read_with_pos", file_path); + return CODE_OPEN_FAILED; + } + + fseek(fp, 0, SEEK_END); + size_t filesize = ftell(fp); + + *sizeptr = filesize - pos; + *dataptr = (unsigned char*)malloc(sizeof(unsigned char) * (filesize - pos)); + fseek(fp, pos, SEEK_SET); + fread(*dataptr, 1, filesize - pos, fp); + fclose(fp); + + return CODE_OK; +} + +int write_file(const char* file_path, const unsigned char* dataptr, + size_t sizeptr) { + FILE* fp = NULL; + if ((fp = fopen(file_path, "wb")) == NULL) { + LOGD("[M]open file(%s) failed", file_path); + return CODE_OPEN_FAILED; + } + + fwrite(dataptr, 1, sizeptr, fp); + + fclose(fp); + return CODE_OK; +} + +int append_file(const char* file_path, const unsigned char* data, size_t len) { + FILE* fp = fopen(file_path, "ab+"); + if (fp == NULL) { + LOGD("[M]open file(%s) failed when append_file", file_path); + return CODE_OPEN_FAILED; + } + fwrite(data, sizeof(char), len, fp); + fclose(fp); + return CODE_OK; +} + +size_t read_file_size(const char* file_path) { + FILE* fp = NULL; + fp = fopen(file_path, "rb"); + if (fp == NULL) { + LOGD("[M]open file(%s) failed when read_file_size", file_path); + return 0; + } + + fseek(fp, 0, SEEK_END); + size_t filesize = ftell(fp); + fclose(fp); + + return filesize; +} + +int read_file_to_file(const char* src_path, const char* dst_path) { + FILE* infp = NULL; + if ((infp = fopen(src_path, "rb")) == NULL) { + LOGD("[M]read src file failed when read_file_to_file"); + return CODE_OPEN_FAILED; + } + + fseek(infp, 0, SEEK_END); + size_t insize = ftell(infp); + char* content = reinterpret_cast(malloc(sizeof(char) * insize)); + + fseek(infp, 0, SEEK_SET); + fread(content, 1, insize, infp); + fclose(infp); + + FILE* outfp = NULL; + if ((outfp = fopen(dst_path, "wb")) == NULL) { + LOGD("[M]open dst file failed when read_file_to_file"); + return CODE_OPEN_FAILED; + } + fwrite(content, 1, insize, outfp); + fclose(outfp); + free(content); + return CODE_OK; +} + +int read_dir_files(const char* dir_path, + std::vector& files) { // NOLINT +#ifdef LINUX + struct dirent* ptr; + DIR* dir = NULL; + dir = opendir(dir_path); + if (dir == NULL) { + return -1; // CODE_NOT_EXIST_DIR + } + while ((ptr = readdir(dir)) != NULL) { + if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { + files.push_back(ptr->d_name); + } + } + closedir(dir); +#endif +#ifdef WIN32 + intptr_t handle; + struct _finddata_t fileinfo; + + std::string tmp_dir(dir_path); + std::string::size_type idx = tmp_dir.rfind("\\*"); + if (idx == std::string::npos || idx != tmp_dir.length() - 1) { + tmp_dir.append("\\*"); + } + + handle = _findfirst(tmp_dir.c_str(), &fileinfo); + if (handle == -1) { + return -1; + } + + do { + std::cout << "File name = " << fileinfo.name << std::endl; + if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0) { + files.push_back(fileinfo.name); + } + } while (!_findnext(handle, &fileinfo)); + + std::cout << files.size() << std::endl; + for (size_t i = 0; i < files.size(); i++) { + std::cout << files[i] << std::endl; + } + + _findclose(handle); +#endif + return files.size(); +} + +int dir_exist_or_mkdir(const char* dir) { +#ifdef WIN32 + if (CreateDirectory(dir, NULL)) { + // return CODE_OK; + } else { + return CODE_MKDIR_FAILED; + } +#endif +#ifdef LINUX + if (access(dir, 0) != 0) { + mkdir(dir, S_IRWXU | S_IRWXG | S_IRWXO); + } +#endif + return CODE_OK; +} + +} // namespace ioutil +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/encryption/util/src/system_utils.cc b/fastdeploy/encryption/util/src/system_utils.cc new file mode 100755 index 00000000000..d04bf3144c9 --- /dev/null +++ b/fastdeploy/encryption/util/src/system_utils.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include + +#include "fastdeploy/encryption/include/model_code.h" +#include "fastdeploy/encryption/util/include/system_utils.h" +#include "fastdeploy/encryption/util/include/crypto/basic.h" +#include "fastdeploy/encryption/util/include/crypto/sha256_utils.h" +#include "fastdeploy/encryption/util/include/io_utils.h" +#include "fastdeploy/encryption/util/include/log.h" +#include "fastdeploy/encryption/util/include/constant/constant_model.h" + +namespace fastdeploy { +namespace util { + +int SystemUtils::intN(int n) { return rand() % n; } + +std::string SystemUtils::random_key_iv(int len) { + unsigned char* tmp = (unsigned char*)malloc(sizeof(unsigned char) * len); + int ret = util::crypto::Basic::random(tmp, len); + std::string tmp_str(reinterpret_cast(tmp), len); + free(tmp); + return tmp_str; +} + +std::string SystemUtils::random_str(int len) { + unsigned char* tmp = (unsigned char*)malloc(sizeof(unsigned char) * len); + int ret = util::crypto::Basic::random(tmp, len); + std::string tmp_str(reinterpret_cast(tmp), len); + free(tmp); + return tmp_str; +} + +int SystemUtils::check_key_match(const char* key, const char* filepath) { + std::string aes_key_iv(key); + std::string sha256_aes_key_iv = + util::crypto::SHA256Utils::sha256_string(aes_key_iv); + + unsigned char* data_pos = (unsigned char*)malloc(sizeof(unsigned char) * 64); + int ret = ioutil::read_with_pos_and_length( + filepath, data_pos, constant::MAGIC_NUMBER_LEN + constant::VERSION_LEN, + 64); + if (ret != CODE_OK) { + LOGD("[M]read file failed when check key"); + return ret; + } + + std::string check_str(reinterpret_cast(data_pos), 64); + if (strcmp(sha256_aes_key_iv.c_str(), check_str.c_str()) != 0) { + return CODE_KEY_NOT_MATCH; + } + free(data_pos); + return CODE_OK; +} + +int SystemUtils::check_key_match(const std::string& key, + std::istream& cipher_stream) { + cipher_stream.seekg(0, std::ios::beg); + std::string sha256_aes_key_iv = util::crypto::SHA256Utils::sha256_string(key); + int check_len = 64; + + std::string data_pos_str; + cipher_stream.seekg(constant::MAGIC_NUMBER_LEN + constant::VERSION_LEN); + std::copy_n(std::istreambuf_iterator(cipher_stream), check_len, + std::back_inserter(data_pos_str)); + if (data_pos_str.size() != check_len) { + LOGD("[M]read file failed when check key"); + return CODE_OPEN_FAILED; + } + if (data_pos_str == sha256_aes_key_iv) { + return CODE_OK; + } + + return CODE_KEY_NOT_MATCH; +} + +/** + * + * @param filepath + * @return 0 - file encrypted 1 - file unencrypted + */ +int SystemUtils::check_file_encrypted(const char* filepath) { + size_t read_len = constant::MAGIC_NUMBER_LEN + constant::VERSION_LEN; + unsigned char* data_pos = + (unsigned char*)malloc(sizeof(unsigned char) * read_len); + if (ioutil::read_with_pos_and_length(filepath, data_pos, 0, read_len) != + CODE_OK) { + LOGD("check file failed when read %s(file)", filepath); + return CODE_OPEN_FAILED; + } + + std::string tag(constant::MAGIC_NUMBER); + tag.append(constant::VERSION); + std::string check_str(reinterpret_cast(data_pos), read_len); + int ret_cmp = strcmp(tag.c_str(), check_str.c_str()) == 0 ? 0 : 1; + free(data_pos); + return ret_cmp; +} + +int SystemUtils::check_file_encrypted(std::istream& cipher_stream) { + cipher_stream.seekg(0, std::ios::beg); + size_t read_len = constant::MAGIC_NUMBER_LEN + constant::VERSION_LEN; + std::string data_pos_str; + std::copy_n(std::istreambuf_iterator(cipher_stream), read_len, + std::back_inserter(data_pos_str)); + if (data_pos_str.size() != read_len) { + LOGD("check file failed when read cipher stream"); + return CODE_OPEN_FAILED; + } + + std::string tag(constant::MAGIC_NUMBER); + tag.append(constant::VERSION); + if (data_pos_str == tag) { + return 0; + } + + return 1; +} + +int SystemUtils::check_pattern_exist(const std::vector& vecs, + const std::string& pattern) { + if (std::find(vecs.begin(), vecs.end(), pattern) == vecs.end()) { + return -1; // not exist + } else { + return 0; // exist + } +} + +} // namespace util +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc index 77c1539c382..d0f06a1cac5 100755 --- a/fastdeploy/fastdeploy_model.cc +++ b/fastdeploy/fastdeploy_model.cc @@ -50,6 +50,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() { bool use_gpu = (runtime_option.device == Device::GPU); bool use_ipu = (runtime_option.device == Device::IPU); bool use_rknpu = (runtime_option.device == Device::RKNPU); + bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD); bool use_timvx = (runtime_option.device == Device::TIMVX); bool use_ascend = (runtime_option.device == Device::ASCEND); bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN); @@ -64,6 +65,11 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() { FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; return false; } + } else if (use_sophgotpu) { + if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) { + FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; + return false; + } } else if (use_timvx) { if (!IsSupported(valid_timvx_backends, runtime_option.backend)) { FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; @@ -118,6 +124,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() { return CreateASCENDBackend(); } else if (runtime_option.device == Device::KUNLUNXIN) { return CreateKunlunXinBackend(); + } else if (runtime_option.device == Device::SOPHGOTPUD) { + return CreateSophgoNPUBackend(); } else if (runtime_option.device == Device::IPU) { #ifdef WITH_IPU return CreateIpuBackend(); @@ -218,6 +226,30 @@ bool FastDeployModel::CreateRKNPUBackend() { return false; } +bool FastDeployModel::CreateSophgoNPUBackend() { + if (valid_sophgonpu_backends.empty()) { + FDERROR << "There's no valid npu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_sophgonpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_sophgonpu_backends[i])) { + continue; + } + runtime_option.backend = valid_sophgonpu_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available npu backend to load this model." + << std::endl; + return false; +} + bool FastDeployModel::CreateTimVXBackend() { if (valid_timvx_backends.size() == 0) { FDERROR << "There's no valid timvx backends for model: " << ModelName() diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h index 6d2f7e5b7b0..9b78c3d3f45 100755 --- a/fastdeploy/fastdeploy_model.h +++ b/fastdeploy/fastdeploy_model.h @@ -54,6 +54,9 @@ class FASTDEPLOY_DECL FastDeployModel { /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model */ std::vector valid_rknpu_backends = {}; + /** Model's valid hardware backends. This member defined all the sophgo npu backends have successfully tested for the model + */ + std::vector valid_sophgonpu_backends = {}; /// Get number of inputs for this model virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); } @@ -148,6 +151,7 @@ class FASTDEPLOY_DECL FastDeployModel { bool CreateGpuBackend(); bool CreateIpuBackend(); bool CreateRKNPUBackend(); + bool CreateSophgoNPUBackend(); bool CreateTimVXBackend(); bool CreateKunlunXinBackend(); bool CreateASCENDBackend(); diff --git a/fastdeploy/pybind/main.cc.in b/fastdeploy/pybind/main.cc.in old mode 100644 new mode 100755 index 1b227edc625..5da3ef9fca6 --- a/fastdeploy/pybind/main.cc.in +++ b/fastdeploy/pybind/main.cc.in @@ -20,6 +20,7 @@ void BindFDTensor(pybind11::module&); void BindRuntime(pybind11::module&); void BindFDModel(pybind11::module&); void BindVision(pybind11::module&); +void BindEncryption(pybind11::module&); void BindText(pybind11::module&); void BindPipeline(pybind11::module&); @@ -166,6 +167,13 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) { m.def_submodule("pipeline", "Pipeline module of FastDeploy."); BindPipeline(pipeline_module); #endif + +#ifdef ENABLE_ENCRYPTION + auto encryption_module = + m.def_submodule("encryption", "Encryption module of FastDeploy."); + BindEncryption(encryption_module); +#endif + #ifdef ENABLE_TEXT auto text_module = m.def_submodule("text", "Text module of FastDeploy."); diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h old mode 100644 new mode 100755 index c0ea4497add..de817bb737f --- a/fastdeploy/pybind/main.h +++ b/fastdeploy/pybind/main.h @@ -32,6 +32,10 @@ #include "fastdeploy/text.h" #endif +#ifdef ENABLE_ENCRYPTION +#include "fastdeploy/encryption.h" +#endif + #include "fastdeploy/core/float16.h" namespace fastdeploy { diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index cbbb7f2e839..3402dd896f0 100755 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) { .def("use_gpu", &RuntimeOption::UseGpu) .def("use_cpu", &RuntimeOption::UseCpu) .def("use_rknpu2", &RuntimeOption::UseRKNPU2) + .def("use_sophgo", &RuntimeOption::UseSophgo) .def("use_ascend", &RuntimeOption::UseAscend) .def("use_kunlunxin", &RuntimeOption::UseKunlunXin) .def("set_external_stream", &RuntimeOption::SetExternalStream) @@ -241,19 +242,22 @@ void BindRuntime(pybind11::module& m) { .value("POROS", Backend::POROS) .value("PDINFER", Backend::PDINFER) .value("RKNPU2", Backend::RKNPU2) + .value("SOPHGOTPU", Backend::SOPHGOTPU) .value("LITE", Backend::LITE); pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(), "ModelFormat for inference.") .value("PADDLE", ModelFormat::PADDLE) .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT) .value("RKNN", ModelFormat::RKNN) + .value("SOPHGO", ModelFormat::SOPHGO) .value("ONNX", ModelFormat::ONNX); pybind11::enum_(m, "Device", pybind11::arithmetic(), "Device for inference.") .value("CPU", Device::CPU) .value("GPU", Device::GPU) .value("IPU", Device::IPU) - .value("RKNPU", Device::RKNPU); + .value("RKNPU", Device::RKNPU) + .value("SOPHGOTPU", Device::SOPHGOTPUD); pybind11::enum_(m, "FDDataType", pybind11::arithmetic(), "Data type of FastDeploy.") diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 0110b4313be..6c1949ed3ce 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -45,6 +45,10 @@ #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" #endif +#ifdef ENABLE_SOPHGO_BACKEND +#include "fastdeploy/backends/sophgo/sophgo_backend.h" +#endif + namespace fastdeploy { std::vector GetAvailableBackends() { @@ -69,6 +73,9 @@ std::vector GetAvailableBackends() { #endif #ifdef ENABLE_RKNPU2_BACKEND backends.push_back(Backend::RKNPU2); +#endif +#ifdef ENABLE_SOPHGO_BACKEND + backends.push_back(Backend::SOPHGOTPU); #endif return backends; } @@ -94,6 +101,8 @@ std::string Str(const Backend& b) { return "Backend::POROS"; } else if (b == Backend::RKNPU2) { return "Backend::RKNPU2"; + } else if (b == Backend::SOPHGOTPU) { + return "Backend::SOPHGOTPU"; } else if (b == Backend::OPENVINO) { return "Backend::OPENVINO"; } else if (b == Backend::LITE) { @@ -113,6 +122,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) { out << "Backend::OPENVINO"; } else if (backend == Backend::RKNPU2) { out << "Backend::RKNPU2"; + } else if (backend == Backend::SOPHGOTPU) { + out << "Backend::SOPHGOTPU"; } else if (backend == Backend::POROS) { out << "Backend::POROS"; } else if (backend == Backend::LITE) { @@ -158,6 +169,15 @@ bool CheckModelFormat(const std::string& model_file, << model_file << std::endl; return false; } + } else if (model_format == ModelFormat::SOPHGO) { + if (model_file.size() < 7 || + model_file.substr(model_file.size() -7, 7) != ".bmodel") { + FDERROR + << "With model format of ModelFormat::SOPHGO, the model file " + "should ends with `.bmodel`, but now it's " + << model_file << std::endl; + return false; + } } else { FDERROR << "Only support model format with frontend ModelFormat::PADDLE / " @@ -185,6 +205,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) { model_file.substr(model_file.size() - 5, 5) == ".rknn") { FDINFO << "Model Format: RKNN." << std::endl; return ModelFormat::RKNN; + } else if (model_file.size() > 7 && + model_file.substr(model_file.size() - 7, 7) == ".bmodel") { + FDINFO << "Model Format: SOPHGO." << std::endl; + return ModelFormat::SOPHGO; } FDERROR << "Cannot guess which model format you are using, please set " @@ -288,6 +312,11 @@ void RuntimeOption::UseAscend(){ device = Device::ASCEND; } +void RuntimeOption::UseSophgo() { + device = Device::SOPHGOTPUD; + UseSophgoBackend(); +} + void RuntimeOption::SetExternalStream(void* external_stream) { external_stream_ = external_stream; } @@ -323,6 +352,15 @@ void RuntimeOption::UseOrtBackend() { #endif } +// use sophgoruntime backend +void RuntimeOption::UseSophgoBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + backend = Backend::SOPHGOTPU; +#else + FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend."); +#endif +} + // use poros backend void RuntimeOption::UsePorosBackend() { #ifdef ENABLE_POROS_BACKEND @@ -564,6 +602,8 @@ bool Runtime::Init(const RuntimeOption& _option) { option.backend = Backend::OPENVINO; } else if (IsBackendAvailable(Backend::RKNPU2)) { option.backend = Backend::RKNPU2; + } else if (IsBackendAvailable(Backend::SOPHGOTPU)) { + option.backend = Backend::SOPHGOTPU; } else { FDERROR << "Please define backend in RuntimeOption, current it's " "Backend::UNKNOWN." @@ -623,7 +663,15 @@ bool Runtime::Init(const RuntimeOption& _option) { FDINFO << "Runtime initialized with Backend::RKNPU2 in " << Str(option.device) << "." << std::endl; - } else { + } else if (option.backend == Backend::SOPHGOTPU) { + FDASSERT(option.device == Device::SOPHGOTPUD, + "Backend::SOPHGO only supports Device::SOPHGO"); + CreateSophgoNPUBackend(); + + FDINFO << "Runtime initialized with Backend::SOPHGO in " + << Str(option.device) << "." << std::endl; + } + else { FDERROR << "Runtime only support " "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " "backend now." @@ -926,6 +974,21 @@ void Runtime::CreateRKNPU2Backend() { #endif } +void Runtime::CreateSophgoNPUBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + auto sophgo_option = SophgoBackendOption(); + FDASSERT(option.model_format == ModelFormat::SOPHGO, + "SophgoBackend only support model format of ModelFormat::SOPHGO"); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, "SophgoBackend is not available, please compiled with " + "ENABLE_SOPHGO_BACKEND=ON."); +#endif +} + Runtime* Runtime::Clone(void* stream, int device_id) { Runtime* runtime = new Runtime(); if (option.backend != Backend::OPENVINO && diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index c889d433740..46532b16b0f 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -43,6 +43,7 @@ enum Backend { OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only + SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only }; FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, @@ -151,6 +152,9 @@ struct FASTDEPLOY_DECL RuntimeOption { bool adaptive_seqlen = false, bool enable_multi_stream = false); + /// Use Sophgo to inference + void UseSophgo(); + void SetExternalStream(void* external_stream); /* @@ -170,6 +174,9 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Set ONNX Runtime as inference backend, support CPU/GPU void UseOrtBackend(); + /// Set SOPHGO Runtime as inference backend, support CPU/GPU + void UseSophgoBackend(); + /// Set TensorRT as inference backend, only support GPU void UseTrtBackend(); @@ -576,6 +583,7 @@ struct FASTDEPLOY_DECL Runtime { void CreateOpenVINOBackend(); void CreateLiteBackend(); void CreateRKNPU2Backend(); + void CreateSophgoNPUBackend(); std::unique_ptr backend_; std::vector input_tensors_; std::vector output_tensors_; diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h index ef2fc90a635..0714a976630 100644 --- a/fastdeploy/vision.h +++ b/fastdeploy/vision.h @@ -55,6 +55,7 @@ #include "fastdeploy/vision/segmentation/ppseg/model.h" #include "fastdeploy/vision/sr/ppsr/model.h" #include "fastdeploy/vision/tracking/pptracking/model.h" +#include "fastdeploy/vision/generation/contrib/animegan.h" #endif diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc index a9b5b46f010..6868c9c62d4 100755 --- a/fastdeploy/vision/classification/ppcls/model.cc +++ b/fastdeploy/vision/classification/ppcls/model.cc @@ -32,7 +32,10 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file, valid_ascend_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE}; valid_ipu_backends = {Backend::PDINFER}; - } else { + }else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } + else { valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_rknpu_backends = {Backend::RKNPU2}; diff --git a/fastdeploy/vision/classification/ppcls/preprocessor.cc b/fastdeploy/vision/classification/ppcls/preprocessor.cc index ef8d8f20eff..aa4314cf872 100644 --- a/fastdeploy/vision/classification/ppcls/preprocessor.cc +++ b/fastdeploy/vision/classification/ppcls/preprocessor.cc @@ -57,7 +57,7 @@ bool PaddleClasPreprocessor::BuildPreprocessPipelineFromConfig() { int height = op.begin()->second["size"].as(); processors_.push_back(std::make_shared(width, height)); } else if (op_name == "NormalizeImage") { - if (!disable_normalize) { + if (!disable_normalize_) { auto mean = op.begin()->second["mean"].as>(); auto std = op.begin()->second["std"].as>(); auto scale = op.begin()->second["scale"].as(); @@ -67,7 +67,7 @@ bool PaddleClasPreprocessor::BuildPreprocessPipelineFromConfig() { processors_.push_back(std::make_shared(mean, std)); } } else if (op_name == "ToCHWImage") { - if (!disable_permute) { + if (!disable_permute_) { processors_.push_back(std::make_shared()); } } else { @@ -83,14 +83,14 @@ bool PaddleClasPreprocessor::BuildPreprocessPipelineFromConfig() { } void PaddleClasPreprocessor::DisableNormalize() { - this->disable_normalize = true; + this->disable_normalize_ = true; // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing if (!BuildPreprocessPipelineFromConfig()) { FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; } } void PaddleClasPreprocessor::DisablePermute() { - this->disable_permute = true; + this->disable_permute_ = true; // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing if (!BuildPreprocessPipelineFromConfig()) { FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; diff --git a/fastdeploy/vision/classification/ppcls/preprocessor.h b/fastdeploy/vision/classification/ppcls/preprocessor.h index 2162ac0951c..06b586d636d 100644 --- a/fastdeploy/vision/classification/ppcls/preprocessor.h +++ b/fastdeploy/vision/classification/ppcls/preprocessor.h @@ -59,9 +59,9 @@ class FASTDEPLOY_DECL PaddleClasPreprocessor { // GPU device id int device_id_ = -1; // for recording the switch of hwc2chw - bool disable_permute = false; + bool disable_permute_ = false; // for recording the switch of normalize - bool disable_normalize = false; + bool disable_normalize_ = false; // read config file std::string config_file_; }; diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index 44e020ffde2..107b1ab9081 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -24,6 +24,8 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/detection/ppdet/model.h b/fastdeploy/vision/detection/ppdet/model.h index be13b0b4d58..da69774fa63 100755 --- a/fastdeploy/vision/detection/ppdet/model.h +++ b/fastdeploy/vision/detection/ppdet/model.h @@ -41,6 +41,7 @@ class FASTDEPLOY_DECL PicoDet : public PPDetBase { valid_rknpu_backends = {Backend::RKNPU2}; valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; initialized = Initialize(); } @@ -68,6 +69,7 @@ class FASTDEPLOY_DECL PPYOLOE : public PPDetBase { valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_timvx_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE}; + valid_rknpu_backends = {Backend::RKNPU2}; valid_ascend_backends = {Backend::LITE}; initialized = Initialize(); } @@ -259,6 +261,134 @@ class FASTDEPLOY_DECL RTMDet : public PPDetBase { virtual std::string ModelName() const { return "PaddleDetection/RTMDet"; } }; +class FASTDEPLOY_DECL CascadeRCNN : public PPDetBase { + public: + CascadeRCNN(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/CascadeRCNN"; } +}; + +class FASTDEPLOY_DECL PSSDet : public PPDetBase { + public: + PSSDet(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/PSSDet"; } +}; + +class FASTDEPLOY_DECL RetinaNet : public PPDetBase { + public: + RetinaNet(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/RetinaNet"; } +}; + +class FASTDEPLOY_DECL PPYOLOESOD : public PPDetBase { + public: + PPYOLOESOD(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/PPYOLOESOD"; } +}; + +class FASTDEPLOY_DECL FCOS : public PPDetBase { + public: + FCOS(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/FCOS"; } +}; + +class FASTDEPLOY_DECL TTFNet : public PPDetBase { + public: + TTFNet(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/TTFNet"; } +}; + +class FASTDEPLOY_DECL TOOD : public PPDetBase { + public: + TOOD(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/TOOD"; } +}; + +class FASTDEPLOY_DECL GFL : public PPDetBase { + public: + GFL(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE) + : PPDetBase(model_file, params_file, config_file, custom_option, + model_format) { + valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; + valid_gpu_backends = {Backend::ORT, Backend::PDINFER}; + initialized = Initialize(); + } + + virtual std::string ModelName() const { return "PaddleDetection/GFL"; } +}; + } // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/backends/common/multiclass_nms.cc b/fastdeploy/vision/detection/ppdet/multiclass_nms.cc similarity index 95% rename from fastdeploy/backends/common/multiclass_nms.cc rename to fastdeploy/vision/detection/ppdet/multiclass_nms.cc index db98e8c7e15..f4c081036fb 100644 --- a/fastdeploy/backends/common/multiclass_nms.cc +++ b/fastdeploy/vision/detection/ppdet/multiclass_nms.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/backends/common/multiclass_nms.h" +#include "fastdeploy/vision/detection/ppdet/multiclass_nms.h" #include #include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/utils/utils.h" namespace fastdeploy { -namespace backend { +namespace vision { +namespace detection { template bool SortScorePairDescend(const std::pair& pair1, const std::pair& pair2) { @@ -79,7 +80,7 @@ float JaccardOverlap(const float* box1, const float* box2, } } -void MultiClassNMS::FastNMS(const float* boxes, const float* scores, +void PaddleMultiClassNMS::FastNMS(const float* boxes, const float* scores, const int& num_boxes, std::vector* keep_indices) { std::vector> sorted_indices; @@ -109,7 +110,7 @@ void MultiClassNMS::FastNMS(const float* boxes, const float* scores, } } -int MultiClassNMS::NMSForEachSample( +int PaddleMultiClassNMS::NMSForEachSample( const float* boxes, const float* scores, int num_boxes, int num_classes, std::map>* keep_indices) { for (int i = 0; i < num_classes; ++i) { @@ -152,7 +153,7 @@ int MultiClassNMS::NMSForEachSample( return num_det; } -void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data, +void PaddleMultiClassNMS::Compute(const float* boxes_data, const float* scores_data, const std::vector& boxes_dim, const std::vector& scores_dim) { int score_size = scores_dim.size(); @@ -220,5 +221,6 @@ void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data, } } } -} // namespace backend +} // namespace detection +} // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/backends/common/multiclass_nms.h b/fastdeploy/vision/detection/ppdet/multiclass_nms.h similarity index 92% rename from fastdeploy/backends/common/multiclass_nms.h rename to fastdeploy/vision/detection/ppdet/multiclass_nms.h index 48a3d9336de..e7e66cae321 100644 --- a/fastdeploy/backends/common/multiclass_nms.h +++ b/fastdeploy/vision/detection/ppdet/multiclass_nms.h @@ -18,8 +18,9 @@ #include namespace fastdeploy { -namespace backend { -struct MultiClassNMS { +namespace vision { +namespace detection { +struct PaddleMultiClassNMS { int64_t background_label = -1; int64_t keep_top_k = -1; float nms_eta; @@ -40,6 +41,6 @@ struct MultiClassNMS { const std::vector& boxes_dim, const std::vector& scores_dim); }; -} // namespace backend - +} // namespace detection +} // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/ppdet/postprocessor.cc b/fastdeploy/vision/detection/ppdet/postprocessor.cc index 7b72e24bab3..f09a21556ab 100644 --- a/fastdeploy/vision/detection/ppdet/postprocessor.cc +++ b/fastdeploy/vision/detection/ppdet/postprocessor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/detection/ppdet/postprocessor.h" +#include "fastdeploy/vision/detection/ppdet/multiclass_nms.h" #include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { @@ -176,7 +177,7 @@ bool PaddleDetPostprocessor::ProcessUnDecodeResults( return false; } - backend::MultiClassNMS nms; + PaddleMultiClassNMS nms; nms.background_label = -1; nms.keep_top_k = 100; nms.nms_eta = 1.0; diff --git a/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc b/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc index 09c89dfcef9..800d656b37d 100644 --- a/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc +++ b/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc @@ -31,7 +31,13 @@ void BindPPDet(pybind11::module& m) { outputs[i].StopSharing(); } return outputs; - }); + }) + .def("disable_normalize", [](vision::detection::PaddleDetPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::detection::PaddleDetPreprocessor& self) { + self.DisablePermute(); + });; pybind11::class_( m, "PaddleDetPostprocessor") @@ -130,6 +136,30 @@ void BindPPDet(pybind11::module& m) { pybind11::class_(m, "RTMDet") .def(pybind11::init()); + ModelFormat>()); + pybind11::class_(m, "CascadeRCNN") + .def(pybind11::init()); + pybind11::class_(m, "PSSDet") + .def(pybind11::init()); + pybind11::class_(m, "RetinaNet") + .def(pybind11::init()); + pybind11::class_(m, "PPYOLOESOD") + .def(pybind11::init()); + pybind11::class_(m, "FCOS") + .def(pybind11::init()); + pybind11::class_(m, "TTFNet") + .def(pybind11::init()); + pybind11::class_(m, "TOOD") + .def(pybind11::init()); + pybind11::class_(m, "GFL") + .def(pybind11::init()); } } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/ppdet/preprocessor.cc b/fastdeploy/vision/detection/ppdet/preprocessor.cc index bb38c67ec6a..a18d43b708c 100644 --- a/fastdeploy/vision/detection/ppdet/preprocessor.cc +++ b/fastdeploy/vision/detection/ppdet/preprocessor.cc @@ -22,19 +22,19 @@ namespace vision { namespace detection { PaddleDetPreprocessor::PaddleDetPreprocessor(const std::string& config_file) { - FDASSERT(BuildPreprocessPipelineFromConfig(config_file), + this->config_file_ = config_file; + FDASSERT(BuildPreprocessPipelineFromConfig(), "Failed to create PaddleDetPreprocessor."); initialized_ = true; } -bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig( - const std::string& config_file) { +bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig() { processors_.clear(); YAML::Node cfg; try { - cfg = YAML::LoadFile(config_file); + cfg = YAML::LoadFile(config_file_); } catch (YAML::BadFile& e) { - FDERROR << "Failed to load yaml file " << config_file + FDERROR << "Failed to load yaml file " << config_file_ << ", maybe you should check this file." << std::endl; return false; } @@ -45,21 +45,23 @@ bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig( for (const auto& op : cfg["Preprocess"]) { std::string op_name = op["type"].as(); if (op_name == "NormalizeImage") { - auto mean = op["mean"].as>(); - auto std = op["std"].as>(); - bool is_scale = true; - if (op["is_scale"]) { - is_scale = op["is_scale"].as(); - } - std::string norm_type = "mean_std"; - if (op["norm_type"]) { - norm_type = op["norm_type"].as(); - } - if (norm_type != "mean_std") { - std::fill(mean.begin(), mean.end(), 0.0); - std::fill(std.begin(), std.end(), 1.0); + if (!disable_normalize_) { + auto mean = op["mean"].as>(); + auto std = op["std"].as>(); + bool is_scale = true; + if (op["is_scale"]) { + is_scale = op["is_scale"].as(); + } + std::string norm_type = "mean_std"; + if (op["norm_type"]) { + norm_type = op["norm_type"].as(); + } + if (norm_type != "mean_std") { + std::fill(mean.begin(), mean.end(), 0.0); + std::fill(std.begin(), std.end(), 1.0); + } + processors_.push_back(std::make_shared(mean, std, is_scale)); } - processors_.push_back(std::make_shared(mean, std, is_scale)); } else if (op_name == "Resize") { bool keep_ratio = op["keep_ratio"].as(); auto target_size = op["target_size"].as>(); @@ -104,10 +106,12 @@ bool PaddleDetPreprocessor::BuildPreprocessPipelineFromConfig( return false; } } - if (has_permute) { - // permute = cast + HWC2CHW - processors_.push_back(std::make_shared("float")); - processors_.push_back(std::make_shared()); + if (!disable_permute_) { + if (has_permute) { + // permute = cast + HWC2CHW + processors_.push_back(std::make_shared("float")); + processors_.push_back(std::make_shared()); + } } // Fusion will improve performance @@ -202,7 +206,20 @@ bool PaddleDetPreprocessor::Run(std::vector* images, return true; } - +void PaddleDetPreprocessor::DisableNormalize() { + this->disable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; + } +} +void PaddleDetPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; + } +} } // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/ppdet/preprocessor.h b/fastdeploy/vision/detection/ppdet/preprocessor.h index 2733c450e8a..8371afb696f 100644 --- a/fastdeploy/vision/detection/ppdet/preprocessor.h +++ b/fastdeploy/vision/detection/ppdet/preprocessor.h @@ -39,10 +39,21 @@ class FASTDEPLOY_DECL PaddleDetPreprocessor { */ bool Run(std::vector* images, std::vector* outputs); + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); + private: - bool BuildPreprocessPipelineFromConfig(const std::string& config_file); + bool BuildPreprocessPipelineFromConfig(); std::vector> processors_; bool initialized_ = false; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + // read config file + std::string config_file_; }; } // namespace detection diff --git a/fastdeploy/vision/generation/contrib/animegan.cc b/fastdeploy/vision/generation/contrib/animegan.cc new file mode 100644 index 00000000000..22962daa1cb --- /dev/null +++ b/fastdeploy/vision/generation/contrib/animegan.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/generation/contrib/animegan.h" +#include "fastdeploy/function/functions.h" + +namespace fastdeploy { +namespace vision { +namespace generation { + +AnimeGAN::AnimeGAN(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option, + const ModelFormat& model_format) { + + valid_cpu_backends = {Backend::PDINFER}; + valid_gpu_backends = {Backend::PDINFER}; + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + + initialized = Initialize(); +} + +bool AnimeGAN::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + + +bool AnimeGAN::Predict(cv::Mat& img, cv::Mat* result) { + std::vector results; + if (!BatchPredict({img}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool AnimeGAN::BatchPredict(const std::vector& images, std::vector* results) { + std::vector fd_images = WrapMat(images); + std::vector processed_data(1); + if (!preprocessor_.Run(fd_images, &(processed_data))) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + std::vector infer_result(1); + processed_data[0].name = InputInfoOfRuntime(0).name; + + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + if (!postprocessor_.Run(infer_result, results)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/generation/contrib/animegan.h b/fastdeploy/vision/generation/contrib/animegan.h new file mode 100644 index 00000000000..9d1f9aa2789 --- /dev/null +++ b/fastdeploy/vision/generation/contrib/animegan.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/generation/contrib/preprocessor.h" +#include "fastdeploy/vision/generation/contrib/postprocessor.h" + +namespace fastdeploy { + +namespace vision { + +namespace generation { +/*! @brief AnimeGAN model object is used when load a AnimeGAN model. + */ +class FASTDEPLOY_DECL AnimeGAN : public FastDeployModel { + public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./model.pdmodel + * \param[in] params_file Path of parameter file, e.g ./model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is PADDLE format + */ + AnimeGAN(const std::string& model_file, const std::string& params_file = "", + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE); + + std::string ModelName() const { return "styletransfer/animegan"; } + + /** \brief Predict the style transfer result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output style transfer result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ + bool Predict(cv::Mat& img, cv::Mat* result); + + /** \brief Predict the style transfer result for a batch of input images + * + * \param[in] images The list of input images, each element comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] results The list of output style transfer results will be writen to this structure + * \return true if the batch prediction successed, otherwise false + */ + bool BatchPredict(const std::vector& images, + std::vector* results); + + // Get preprocessor reference of AnimeGAN + AnimeGANPreprocessor& GetPreprocessor() { + return preprocessor_; + } + + // Get postprocessor reference of AnimeGAN + AnimeGANPostprocessor& GetPostprocessor() { + return postprocessor_; + } + + private: + bool Initialize(); + + AnimeGANPreprocessor preprocessor_; + AnimeGANPostprocessor postprocessor_; +}; + +} // namespace generation +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/generation/contrib/animegan_pybind.cc b/fastdeploy/vision/generation/contrib/animegan_pybind.cc new file mode 100644 index 00000000000..853069d71bc --- /dev/null +++ b/fastdeploy/vision/generation/contrib/animegan_pybind.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindAnimeGAN(pybind11::module& m) { + pybind11::class_(m, "AnimeGAN") + .def(pybind11::init()) + .def("predict", + [](vision::generation::AnimeGAN& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + cv::Mat res; + self.Predict(mat, &res); + auto ret = pybind11::array_t( + {res.rows, res.cols, res.channels()}, res.data); + return ret; + }) + .def("batch_predict", + [](vision::generation::AnimeGAN& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + std::vector> ret; + for(size_t i = 0; i < results.size(); ++i){ + ret.push_back(pybind11::array_t( + {results[i].rows, results[i].cols, results[i].channels()}, results[i].data)); + } + return ret; + }) + .def_property_readonly("preprocessor", &vision::generation::AnimeGAN::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::generation::AnimeGAN::GetPostprocessor); + + pybind11::class_( + m, "AnimeGANPreprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::generation::AnimeGANPreprocessor& self, std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(images, &outputs)) { + throw std::runtime_error("Failed to preprocess the input data in PaddleClasPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }); + pybind11::class_( + m, "AnimeGANPostprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::generation::AnimeGANPostprocessor& self, std::vector& inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error("Failed to postprocess the runtime result in YOLOv5Postprocessor."); + } + return results; + }); + +} +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/generation/contrib/postprocessor.cc b/fastdeploy/vision/generation/contrib/postprocessor.cc new file mode 100644 index 00000000000..68dbaf8f357 --- /dev/null +++ b/fastdeploy/vision/generation/contrib/postprocessor.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/generation/contrib/postprocessor.h" + +namespace fastdeploy { +namespace vision { +namespace generation { + +bool AnimeGANPostprocessor::Run(std::vector& infer_results, + std::vector* results) { + // 1. Reverse normalization + // 2. RGB2BGR + FDTensor& output_tensor = infer_results.at(0); + std::vector shape = output_tensor.Shape(); // n, h, w, c + int size = shape[1] * shape[2] * shape[3]; + results->resize(shape[0]); + float* infer_result_data = reinterpret_cast(output_tensor.Data()); + for(size_t i = 0; i < results->size(); ++i){ + Mat result_mat = Mat::Create(shape[1], shape[2], 3, FDDataType::FP32, infer_result_data+i*size); + std::vector mean{127.5f, 127.5f, 127.5f}; + std::vector std{127.5f, 127.5f, 127.5f}; + Convert::Run(&result_mat, mean, std); + // tmp data type is float[0-1.0],convert to uint type + auto temp = result_mat.GetOpenCVMat(); + cv::Mat res = cv::Mat::zeros(temp->size(), CV_8UC3); + temp->convertTo(res, CV_8UC3, 1); + Mat fd_image = WrapMat(res); + BGR2RGB::Run(&fd_image); + res = *(fd_image.GetOpenCVMat()); + res.copyTo(results->at(i)); + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/generation/contrib/postprocessor.h b/fastdeploy/vision/generation/contrib/postprocessor.h new file mode 100644 index 00000000000..3f3a7728bc1 --- /dev/null +++ b/fastdeploy/vision/generation/contrib/postprocessor.h @@ -0,0 +1,43 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/function/functions.h" + +namespace fastdeploy { +namespace vision { + +namespace generation { +/*! @brief Postprocessor object for AnimeGAN serials model. + */ +class FASTDEPLOY_DECL AnimeGANPostprocessor { + public: + /** \brief Create a postprocessor instance for AnimeGAN serials model + */ + AnimeGANPostprocessor() {} + + /** \brief Process the result of runtime + * + * \param[in] infer_results The inference results from runtime + * \param[in] results The output results of style transfer + * \return true if the postprocess successed, otherwise false + */ + bool Run(std::vector& infer_results, + std::vector* results); +}; + +} // namespace generation +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/generation/contrib/preprocessor.cc b/fastdeploy/vision/generation/contrib/preprocessor.cc new file mode 100644 index 00000000000..24e75fdc3b0 --- /dev/null +++ b/fastdeploy/vision/generation/contrib/preprocessor.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/generation/contrib/preprocessor.h" + +namespace fastdeploy { +namespace vision { +namespace generation { + +bool AnimeGANPreprocessor::Run(std::vector& images, std::vector* outputs) { + // 1. BGR2RGB + // 2. Convert(opencv style) or Normalize + for (size_t i = 0; i < images.size(); ++i) { + auto ret = BGR2RGB::Run(&images[i]); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "BGR2RGB" << "." << std::endl; + return false; + } + ret = Cast::Run(&images[i], "float"); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "Cast" << "." << std::endl; + return false; + } + std::vector mean{1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + std::vector std {-1.f, -1.f, -1.f}; + ret = Convert::Run(&images[i], mean, std); + if (!ret) { + FDERROR << "Failed to processs image:" << i << " in " + << "Cast" << "." << std::endl; + return false; + } + } + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images.size()); + for (size_t i = 0; i < images.size(); ++i) { + images[i].ShareWithTensor(&(tensors[i])); + tensors[i].ExpandDim(0); + } + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace generation +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/generation/contrib/preprocessor.h b/fastdeploy/vision/generation/contrib/preprocessor.h new file mode 100644 index 00000000000..4fcf94a15b1 --- /dev/null +++ b/fastdeploy/vision/generation/contrib/preprocessor.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/function/functions.h" + +namespace fastdeploy { +namespace vision { + +namespace generation { +/*! @brief Preprocessor object for AnimeGAN serials model. + */ +class FASTDEPLOY_DECL AnimeGANPreprocessor { + public: + /** \brief Create a preprocessor instance for AnimeGAN serials model + */ + AnimeGANPreprocessor() {} + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned wrapped by FDMat. + * \param[in] output The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector& images, std::vector* output); +}; + +} // namespace generation +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/generation/generation_pybind.cc b/fastdeploy/vision/generation/generation_pybind.cc new file mode 100644 index 00000000000..d4f02612e1a --- /dev/null +++ b/fastdeploy/vision/generation/generation_pybind.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindAnimeGAN(pybind11::module& m); + +void BindGeneration(pybind11::module& m) { + auto generation_module = m.def_submodule("generation", "image generation submodule"); + BindAnimeGAN(generation_module); +} +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/classifier.h b/fastdeploy/vision/ocr/ppocr/classifier.h index cd035e26939..824d9c3be1b 100755 --- a/fastdeploy/vision/ocr/ppocr/classifier.h +++ b/fastdeploy/vision/ocr/ppocr/classifier.h @@ -68,11 +68,20 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { std::vector* cls_scores, size_t start_index, size_t end_index); - ClassifierPreprocessor preprocessor_; - ClassifierPostprocessor postprocessor_; + /// Get preprocessor reference of ClassifierPreprocessor + virtual ClassifierPreprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of ClassifierPostprocessor + virtual ClassifierPostprocessor& GetPostprocessor() { + return postprocessor_; + } private: bool Initialize(); + ClassifierPreprocessor preprocessor_; + ClassifierPostprocessor postprocessor_; }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h index d9702e1a153..e596db71d2e 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h @@ -39,6 +39,12 @@ class FASTDEPLOY_DECL ClassifierPostprocessor { std::vector* cls_labels, std::vector* cls_scores, size_t start_index, size_t total_size); + /// Set threshold for the classification postprocess, default is 0.9 + void SetClsThresh(float cls_thresh) { cls_thresh_ = cls_thresh; } + + /// Get threshold value of the classification postprocess. + float GetClsThresh() const { return cls_thresh_; } + float cls_thresh_ = 0.9; }; diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h index 8c1c8161114..8d42f3d31d4 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h @@ -34,6 +34,27 @@ class FASTDEPLOY_DECL ClassifierPreprocessor { bool Run(std::vector* images, std::vector* outputs, size_t start_index, size_t end_index); + /// Set mean value for the image normalization in classification preprocess + void SetMean(std::vector mean) { mean_ = mean; } + /// Get mean value of the image normalization in classification preprocess + std::vector GetMean() const { return mean_; } + + /// Set scale value for the image normalization in classification preprocess + void SetScale(std::vector scale) { scale_ = scale; } + /// Get scale value of the image normalization in classification preprocess + std::vector GetScale() const { return scale_; } + + /// Set is_scale for the image normalization in classification preprocess + void SetIsScale(bool is_scale) { is_scale_ = is_scale; } + /// Get is_scale of the image normalization in classification preprocess + bool GetIsScale() const { return is_scale_; } + + /// Set cls_image_shape for the classification preprocess + void SetClsImageShape(std::vector cls_image_shape) + { cls_image_shape_ = cls_image_shape; } + /// Get cls_image_shape for the classification preprocess + std::vector GetClsImageShape() const { return cls_image_shape_; } + std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; bool is_scale_ = true; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.h b/fastdeploy/vision/ocr/ppocr/dbdetector.h index d2305abd7c8..ec1ef028dfd 100755 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.h +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.h @@ -61,11 +61,20 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { virtual bool BatchPredict(const std::vector& images, std::vector>>* det_results); - DBDetectorPreprocessor preprocessor_; - DBDetectorPostprocessor postprocessor_; + /// Get preprocessor reference of DBDetectorPreprocessor + virtual DBDetectorPreprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of DBDetectorPostprocessor + virtual DBDetectorPostprocessor& GetPostprocessor() { + return postprocessor_; + } private: bool Initialize(); + DBDetectorPreprocessor preprocessor_; + DBDetectorPostprocessor postprocessor_; }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/det_postprocessor.h b/fastdeploy/vision/ocr/ppocr/det_postprocessor.h index 1152288439d..129ca62581a 100644 --- a/fastdeploy/vision/ocr/ppocr/det_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_postprocessor.h @@ -36,6 +36,34 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor { std::vector>>* results, const std::vector>& batch_det_img_info); + /// Set det_db_thresh for the detection postprocess, default is 0.3 + void SetDetDBThresh(double det_db_thresh) { det_db_thresh_ = det_db_thresh; } + /// Get det_db_thresh of the detection postprocess + double GetDetDBThresh() const { return det_db_thresh_; } + + /// Set det_db_box_thresh for the detection postprocess, default is 0.6 + void SetDetDBBoxThresh(double det_db_box_thresh) + { det_db_box_thresh_ = det_db_box_thresh; } + /// Get det_db_box_thresh of the detection postprocess + double GetDetDBBoxThresh() const { return det_db_box_thresh_; } + + /// Set det_db_unclip_ratio for the detection postprocess, default is 1.5 + void SetDetDBUnclipRatio(double det_db_unclip_ratio) + { det_db_unclip_ratio_ = det_db_unclip_ratio; } + /// Get det_db_unclip_ratio_ of the detection postprocess + double GetDetDBUnclipRatio() const { return det_db_unclip_ratio_; } + + /// Set det_db_score_mode for the detection postprocess, default is 'slow' + void SetDetDBScoreMode(std::string det_db_score_mode) + { det_db_score_mode_ = det_db_score_mode; } + /// Get det_db_score_mode_ of the detection postprocess + std::string GetDetDBScoreMode() const { return det_db_score_mode_; } + + /// Set use_dilation for the detection postprocess, default is fasle + void SetUseDilation(int use_dilation) { use_dilation_ = use_dilation; } + /// Get use_dilation of the detection postprocess + int GetUseDilation() const { return use_dilation_; } + double det_db_thresh_ = 0.3; double det_db_box_thresh_ = 0.6; double det_db_unclip_ratio_ = 1.5; diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index 705f19c7bdb..bf496079f23 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -35,6 +35,26 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor { std::vector* outputs, std::vector>* batch_det_img_info_ptr); + /// Set max_side_len for the detection preprocess, default is 960 + void SetMaxSideLen(int max_side_len) { max_side_len_ = max_side_len; } + /// Get max_side_len of the detection preprocess + int GetMaxSideLen() const { return max_side_len_; } + + /// Set mean value for the image normalization in detection preprocess + void SetMean(std::vector mean) { mean_ = mean; } + /// Get mean value of the image normalization in detection preprocess + std::vector GetMean() const { return mean_; } + + /// Set scale value for the image normalization in detection preprocess + void SetScale(std::vector scale) { scale_ = scale; } + /// Get scale value of the image normalization in detection preprocess + std::vector GetScale() const { return scale_; } + + /// Set is_scale for the image normalization in detection preprocess + void SetIsScale(bool is_scale) { is_scale_ = is_scale; } + /// Get is_scale of the image normalization in detection preprocess + bool GetIsScale() const { return is_scale_; } + int max_side_len_ = 960; std::vector mean_ = {0.485f, 0.456f, 0.406f}; std::vector scale_ = {0.229f, 0.224f, 0.225f}; diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc index acc73c57d4b..2bcb697a80f 100755 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -24,10 +24,10 @@ void BindPPOCRModel(pybind11::module& m) { // DBDetector pybind11::class_(m, "DBDetectorPreprocessor") .def(pybind11::init<>()) - .def_readwrite("max_side_len", &vision::ocr::DBDetectorPreprocessor::max_side_len_) - .def_readwrite("mean", &vision::ocr::DBDetectorPreprocessor::mean_) - .def_readwrite("scale", &vision::ocr::DBDetectorPreprocessor::scale_) - .def_readwrite("is_scale", &vision::ocr::DBDetectorPreprocessor::is_scale_) + .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) + .def_property("mean", &vision::ocr::DBDetectorPreprocessor::GetMean, &vision::ocr::DBDetectorPreprocessor::SetMean) + .def_property("scale", &vision::ocr::DBDetectorPreprocessor::GetScale, &vision::ocr::DBDetectorPreprocessor::SetScale) + .def_property("is_scale", &vision::ocr::DBDetectorPreprocessor::GetIsScale, &vision::ocr::DBDetectorPreprocessor::SetIsScale) .def("run", [](vision::ocr::DBDetectorPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { @@ -44,11 +44,12 @@ void BindPPOCRModel(pybind11::module& m) { pybind11::class_(m, "DBDetectorPostprocessor") .def(pybind11::init<>()) - .def_readwrite("det_db_thresh", &vision::ocr::DBDetectorPostprocessor::det_db_thresh_) - .def_readwrite("det_db_box_thresh", &vision::ocr::DBDetectorPostprocessor::det_db_box_thresh_) - .def_readwrite("det_db_unclip_ratio", &vision::ocr::DBDetectorPostprocessor::det_db_unclip_ratio_) - .def_readwrite("det_db_score_mode", &vision::ocr::DBDetectorPostprocessor::det_db_score_mode_) - .def_readwrite("use_dilation", &vision::ocr::DBDetectorPostprocessor::use_dilation_) + .def_property("det_db_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) + .def_property("det_db_box_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) + .def_property("det_db_unclip_ratio", &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) + .def_property("det_db_score_mode", &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) + .def_property("use_dilation", &vision::ocr::DBDetectorPostprocessor::GetUseDilation, &vision::ocr::DBDetectorPostprocessor::SetUseDilation) + .def("run", [](vision::ocr::DBDetectorPostprocessor& self, std::vector& inputs, const std::vector>& batch_det_img_info) { @@ -75,8 +76,8 @@ void BindPPOCRModel(pybind11::module& m) { .def(pybind11::init()) .def(pybind11::init<>()) - .def_readwrite("preprocessor", &vision::ocr::DBDetector::preprocessor_) - .def_readwrite("postprocessor", &vision::ocr::DBDetector::postprocessor_) + .def_property_readonly("preprocessor", &vision::ocr::DBDetector::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::ocr::DBDetector::GetPostprocessor) .def("predict", [](vision::ocr::DBDetector& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); @@ -97,10 +98,10 @@ void BindPPOCRModel(pybind11::module& m) { // Classifier pybind11::class_(m, "ClassifierPreprocessor") .def(pybind11::init<>()) - .def_readwrite("cls_image_shape", &vision::ocr::ClassifierPreprocessor::cls_image_shape_) - .def_readwrite("mean", &vision::ocr::ClassifierPreprocessor::mean_) - .def_readwrite("scale", &vision::ocr::ClassifierPreprocessor::scale_) - .def_readwrite("is_scale", &vision::ocr::ClassifierPreprocessor::is_scale_) + .def_property("cls_image_shape", &vision::ocr::ClassifierPreprocessor::GetClsImageShape, &vision::ocr::ClassifierPreprocessor::SetClsImageShape) + .def_property("mean", &vision::ocr::ClassifierPreprocessor::GetMean, &vision::ocr::ClassifierPreprocessor::SetMean) + .def_property("scale", &vision::ocr::ClassifierPreprocessor::GetScale, &vision::ocr::ClassifierPreprocessor::SetScale) + .def_property("is_scale", &vision::ocr::ClassifierPreprocessor::GetIsScale, &vision::ocr::ClassifierPreprocessor::SetIsScale) .def("run", [](vision::ocr::ClassifierPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { @@ -118,7 +119,7 @@ void BindPPOCRModel(pybind11::module& m) { pybind11::class_(m, "ClassifierPostprocessor") .def(pybind11::init<>()) - .def_readwrite("cls_thresh", &vision::ocr::ClassifierPostprocessor::cls_thresh_) + .def_property("cls_thresh", &vision::ocr::ClassifierPostprocessor::GetClsThresh, &vision::ocr::ClassifierPostprocessor::SetClsThresh) .def("run", [](vision::ocr::ClassifierPostprocessor& self, std::vector& inputs) { std::vector cls_labels; @@ -144,8 +145,8 @@ void BindPPOCRModel(pybind11::module& m) { .def(pybind11::init()) .def(pybind11::init<>()) - .def_readwrite("preprocessor", &vision::ocr::Classifier::preprocessor_) - .def_readwrite("postprocessor", &vision::ocr::Classifier::postprocessor_) + .def_property_readonly("preprocessor", &vision::ocr::Classifier::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::ocr::Classifier::GetPostprocessor) .def("predict", [](vision::ocr::Classifier& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); @@ -168,11 +169,11 @@ void BindPPOCRModel(pybind11::module& m) { // Recognizer pybind11::class_(m, "RecognizerPreprocessor") .def(pybind11::init<>()) - .def_readwrite("rec_image_shape", &vision::ocr::RecognizerPreprocessor::rec_image_shape_) - .def_readwrite("mean", &vision::ocr::RecognizerPreprocessor::mean_) - .def_readwrite("scale", &vision::ocr::RecognizerPreprocessor::scale_) - .def_readwrite("is_scale", &vision::ocr::RecognizerPreprocessor::is_scale_) - .def_readwrite("static_shape", &vision::ocr::RecognizerPreprocessor::static_shape_) + .def_property("static_shape_infer", &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) + .def_property("rec_image_shape", &vision::ocr::RecognizerPreprocessor::GetRecImageShape, &vision::ocr::RecognizerPreprocessor::SetRecImageShape) + .def_property("mean", &vision::ocr::RecognizerPreprocessor::GetMean, &vision::ocr::RecognizerPreprocessor::SetMean) + .def_property("scale", &vision::ocr::RecognizerPreprocessor::GetScale, &vision::ocr::RecognizerPreprocessor::SetScale) + .def_property("is_scale", &vision::ocr::RecognizerPreprocessor::GetIsScale, &vision::ocr::RecognizerPreprocessor::SetIsScale) .def("run", [](vision::ocr::RecognizerPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { @@ -215,8 +216,8 @@ void BindPPOCRModel(pybind11::module& m) { .def(pybind11::init()) .def(pybind11::init<>()) - .def_readwrite("preprocessor", &vision::ocr::Recognizer::preprocessor_) - .def_readwrite("postprocessor", &vision::ocr::Recognizer::postprocessor_) + .def_property_readonly("preprocessor", &vision::ocr::Recognizer::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::ocr::Recognizer::GetPostprocessor) .def("predict", [](vision::ocr::Recognizer& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc b/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc index 756604dde8e..622fe41c086 100755 --- a/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc +++ b/fastdeploy/vision/ocr/ppocr/ppocr_v2.cc @@ -23,14 +23,14 @@ PPOCRv2::PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model, fastdeploy::vision::ocr::Recognizer* rec_model) : detector_(det_model), classifier_(cls_model), recognizer_(rec_model) { Initialized(); - recognizer_->preprocessor_.rec_image_shape_[1] = 32; + recognizer_->GetPreprocessor().rec_image_shape_[1] = 32; } PPOCRv2::PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model, fastdeploy::vision::ocr::Recognizer* rec_model) : detector_(det_model), recognizer_(rec_model) { Initialized(); - recognizer_->preprocessor_.rec_image_shape_[1] = 32; + recognizer_->GetPreprocessor().rec_image_shape_[1] = 32; } bool PPOCRv2::SetClsBatchSize(int cls_batch_size) { @@ -134,7 +134,7 @@ bool PPOCRv2::BatchPredict(const std::vector& images, return false; }else{ for (size_t i_img = start_index; i_img < end_index; ++i_img) { - if(cls_labels_ptr->at(i_img) % 2 == 1 && cls_scores_ptr->at(i_img) > classifier_->postprocessor_.cls_thresh_) { + if(cls_labels_ptr->at(i_img) % 2 == 1 && cls_scores_ptr->at(i_img) > classifier_->GetPostprocessor().cls_thresh_) { cv::rotate(image_list[i_img], image_list[i_img], 1); } } diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v3.h b/fastdeploy/vision/ocr/ppocr/ppocr_v3.h index ed9177d9232..fa46fdb2c64 100755 --- a/fastdeploy/vision/ocr/ppocr/ppocr_v3.h +++ b/fastdeploy/vision/ocr/ppocr/ppocr_v3.h @@ -36,7 +36,7 @@ class FASTDEPLOY_DECL PPOCRv3 : public PPOCRv2 { fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRv2(det_model, cls_model, rec_model) { // The only difference between v2 and v3 - recognizer_->preprocessor_.rec_image_shape_[1] = 48; + recognizer_->GetPreprocessor().rec_image_shape_[1] = 48; } /** \brief Classification model is optional, so this function is set up the detection model path and recognition model path respectively. * @@ -47,7 +47,7 @@ class FASTDEPLOY_DECL PPOCRv3 : public PPOCRv2 { fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRv2(det_model, rec_model) { // The only difference between v2 and v3 - recognizer_->preprocessor_.rec_image_shape_[1] = 48; + recognizer_->GetPreprocessor().rec_image_shape_[1] = 48; } }; diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc index 8ed4e0c53f2..ad049fdceca 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc @@ -22,12 +22,12 @@ namespace vision { namespace ocr { void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, - const std::vector& rec_image_shape, bool static_shape) { + const std::vector& rec_image_shape, bool static_shape_infer) { int img_h, img_w; img_h = rec_image_shape[1]; img_w = rec_image_shape[2]; - if (!static_shape) { + if (!static_shape_infer) { img_w = int(img_h * max_wh_ratio); float ratio = float(mat->Width()) / float(mat->Height()); @@ -52,23 +52,6 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, } } -void OcrRecognizerResizeImageOnAscend(FDMat* mat, - const std::vector& rec_image_shape) { - - int img_h, img_w; - img_h = rec_image_shape[1]; - img_w = rec_image_shape[2]; - - if (mat->Width() >= img_w) { - Resize::Run(mat, img_w, img_h); // Reszie W to 320 - } else { - Resize::Run(mat, mat->Width(), img_h); - Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {0,0,0}); - // Pad to 320 - } -} - - bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs) { return Run(images, outputs, 0, images->size(), {}); } @@ -101,7 +84,7 @@ bool RecognizerPreprocessor::Run(std::vector* images, std::vectorat(real_index)); - OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_); + OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_); NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); } // Only have 1 output Tensor. diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h index ee21c73625b..c50711588d1 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h @@ -35,11 +35,40 @@ class FASTDEPLOY_DECL RecognizerPreprocessor { size_t start_index, size_t end_index, const std::vector& indices); + /// Set static_shape_infer is true or not. When deploy PP-OCR + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) + { static_shape_infer_ = static_shape_infer; } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + + /// Set mean value for the image normalization in recognition preprocess + void SetMean(std::vector mean) { mean_ = mean; } + /// Get mean value of the image normalization in recognition preprocess + std::vector GetMean() const { return mean_; } + + /// Set scale value for the image normalization in recognition preprocess + void SetScale(std::vector scale) { scale_ = scale; } + /// Get scale value of the image normalization in recognition preprocess + std::vector GetScale() const { return scale_; } + + /// Set is_scale for the image normalization in recognition preprocess + void SetIsScale(bool is_scale) { is_scale_ = is_scale; } + /// Get is_scale of the image normalization in recognition preprocess + bool GetIsScale() const { return is_scale_; } + + /// Set rec_image_shape for the recognition preprocess + void SetRecImageShape(std::vector rec_image_shape) + { rec_image_shape_ = rec_image_shape; } + /// Get rec_image_shape for the recognition preprocess + std::vector GetRecImageShape() const { return rec_image_shape_; } + std::vector rec_image_shape_ = {3, 48, 320}; std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; bool is_scale_ = true; - bool static_shape_ = false; + bool static_shape_infer_ = false; }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.h b/fastdeploy/vision/ocr/ppocr/recognizer.h index bba8a444779..60ffdcd10de 100755 --- a/fastdeploy/vision/ocr/ppocr/recognizer.h +++ b/fastdeploy/vision/ocr/ppocr/recognizer.h @@ -67,11 +67,20 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { size_t start_index, size_t end_index, const std::vector& indices); - RecognizerPreprocessor preprocessor_; - RecognizerPostprocessor postprocessor_; + /// Get preprocessor reference of DBDetectorPreprocessor + virtual RecognizerPreprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of DBDetectorPostprocessor + virtual RecognizerPostprocessor& GetPostprocessor() { + return postprocessor_; + } private: bool Initialize(); + RecognizerPreprocessor preprocessor_; + RecognizerPostprocessor postprocessor_; }; } // namespace ocr diff --git a/fastdeploy/vision/segmentation/ppseg/model.cc b/fastdeploy/vision/segmentation/ppseg/model.cc index 7baa7ac7ffa..54f978828cd 100755 --- a/fastdeploy/vision/segmentation/ppseg/model.cc +++ b/fastdeploy/vision/segmentation/ppseg/model.cc @@ -25,12 +25,18 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) : preprocessor_(config_file), postprocessor_(config_file) { - valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + if(model_format == ModelFormat::SOPHGO) { + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + } + else{ + valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } valid_rknpu_backends = {Backend::RKNPU2}; valid_timvx_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; + runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; diff --git a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc index e687d3cc413..78c7c9ccc3b 100644 --- a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc +++ b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc @@ -36,9 +36,12 @@ void BindPPSeg(pybind11::module& m) { } return make_pair(outputs, imgs_info);; }) - .def("disable_normalize_and_permute", - &vision::segmentation::PaddleSegPreprocessor::DisableNormalizeAndPermute) - + .def("disable_normalize", [](vision::segmentation::PaddleSegPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::segmentation::PaddleSegPreprocessor& self) { + self.DisablePermute(); + }) .def_property("is_vertical_screen", &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen, &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen); diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc index 027309aad0f..92b0378955f 100644 --- a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc +++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc @@ -43,7 +43,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() { FDASSERT(op.IsMap(), "Require the transform information in yaml be Map type."); if (op["type"].as() == "Normalize") { - if (!disable_normalize_and_permute_) { + if (!disable_normalize_) { std::vector mean = {0.5, 0.5, 0.5}; std::vector std = {0.5, 0.5, 0.5}; if (op["mean"]) { @@ -55,7 +55,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() { processors_.push_back(std::make_shared(mean, std)); } } else if (op["type"].as() == "Resize") { - is_contain_resize_op = true; + is_contain_resize_op_ = true; const auto& target_size = op["target_size"]; int resize_width = target_size[0].as(); int resize_height = target_size[1].as(); @@ -73,13 +73,13 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() { auto input_shape = cfg["Deploy"]["input_shape"]; int input_height = input_shape[2].as(); int input_width = input_shape[3].as(); - if (input_height != -1 && input_width != -1 && !is_contain_resize_op) { - is_contain_resize_op = true; + if (input_height != -1 && input_width != -1 && !is_contain_resize_op_) { + is_contain_resize_op_ = true; processors_.insert(processors_.begin(), std::make_shared(input_width, input_height)); } } - if (!disable_normalize_and_permute_) { + if (!disable_permute_) { processors_.push_back(std::make_shared()); } @@ -121,7 +121,7 @@ bool PaddleSegPreprocessor::Run(std::vector* images, std::vectorsize(); // Batch preprocess : resize all images to the largest image shape in batch - if (!is_contain_resize_op && img_num > 1) { + if (!is_contain_resize_op_ && img_num > 1) { int max_width = 0; int max_height = 0; for (size_t i = 0; i < img_num; ++i) { @@ -156,14 +156,20 @@ bool PaddleSegPreprocessor::Run(std::vector* images, std::vectordisable_normalize_ = true; + // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; + } +} +void PaddleSegPreprocessor::DisablePermute() { + this->disable_permute_ = true; + // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing if (!BuildPreprocessPipelineFromConfig()) { FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; } } - } // namespace segmentation } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.h b/fastdeploy/vision/segmentation/ppseg/preprocessor.h index faa7fb8de59..6452e8e0e22 100644 --- a/fastdeploy/vision/segmentation/ppseg/preprocessor.h +++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.h @@ -49,8 +49,10 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor { is_vertical_screen_ = value; } - // This function will disable normalize and hwc2chw in preprocessing step. - void DisableNormalizeAndPermute(); + /// This function will disable normalize in preprocessing step. + void DisableNormalize(); + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute(); private: virtual bool BuildPreprocessPipelineFromConfig(); @@ -61,10 +63,12 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor { */ bool is_vertical_screen_ = false; - // for recording the switch of normalize and hwc2chw - bool disable_normalize_and_permute_ = false; + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; - bool is_contain_resize_op = false; + bool is_contain_resize_op_ = false; bool initialized_ = false; }; diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc index cecd4f7c37d..aa387b4305d 100644 --- a/fastdeploy/vision/vision_pybind.cc +++ b/fastdeploy/vision/vision_pybind.cc @@ -28,6 +28,7 @@ void BindTracking(pybind11::module& m); void BindKeyPointDetection(pybind11::module& m); void BindHeadPose(pybind11::module& m); void BindSR(pybind11::module& m); +void BindGeneration(pybind11::module& m); #ifdef ENABLE_VISION_VISUALIZE void BindVisualize(pybind11::module& m); #endif @@ -213,6 +214,7 @@ void BindVision(pybind11::module& m) { BindKeyPointDetection(m); BindHeadPose(m); BindSR(m); + BindGeneration(m); #ifdef ENABLE_VISION_VISUALIZE BindVisualize(m); #endif diff --git a/python/fastdeploy/__init__.py b/python/fastdeploy/__init__.py old mode 100644 new mode 100755 index 42db5c281cc..730d98a734f --- a/python/fastdeploy/__init__.py +++ b/python/fastdeploy/__init__.py @@ -36,5 +36,6 @@ from . import vision from . import pipeline from . import text -from .download import download, download_and_decompress, download_model +from . import encryption +from .download import download, download_and_decompress, download_model, get_model_list from . import serving diff --git a/python/fastdeploy/download.py b/python/fastdeploy/download.py index 0b14ccf8e54..7af6042a8d2 100644 --- a/python/fastdeploy/download.py +++ b/python/fastdeploy/download.py @@ -213,6 +213,30 @@ def download_and_decompress(url, path='.', rename=None): return +def get_model_list(category: str=None): + ''' + Get all pre-trained models information supported by fd.download_model. + Args: + category(str): model category, if None, list all models in all categories. + Returns: + results(dict): a dictionary, key is category, value is a list which contains models information. + ''' + result = model_server.get_model_list() + if result['status'] != 0: + raise ValueError( + 'Failed to get pretrained models information from hub model server.' + ) + result = result['data'] + if category is None: + return result + elif category in result: + return {category: result[category]} + else: + raise ValueError( + 'No pretrained model in category {} can be downloaded now.'.format( + category)) + + def download_model(name: str, path: str=None, format: str=None, @@ -237,11 +261,13 @@ def download_model(name: str, if format == 'paddle': if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count( "zip") > 0: + archive_path = fullpath fullpath = decompress(fullpath) try: os.rename(fullpath, os.path.join(os.path.dirname(fullpath), name)) fullpath = os.path.join(os.path.dirname(fullpath), name) + os.remove(archive_path) except FileExistsError: pass print('Successfully download model at path: {}'.format(fullpath)) diff --git a/python/fastdeploy/encryption/__init__.py b/python/fastdeploy/encryption/__init__.py new file mode 100755 index 00000000000..489103ff8ec --- /dev/null +++ b/python/fastdeploy/encryption/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from .encryption import * diff --git a/python/fastdeploy/encryption/encryption.py b/python/fastdeploy/encryption/encryption.py new file mode 100755 index 00000000000..843064258d4 --- /dev/null +++ b/python/fastdeploy/encryption/encryption.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .. import c_lib_wrap as C + + +def generate_key(): + """generate a key for encryption + :return: key(str) + """ + return C.encryption.generate_key() + + +def encrypt(input, key=None): + """Encrypt a input string with key. + :param: input: (str) The input str for encryption + :param: key: (str,optional) The key for encryption(if not given, generate automatically.) + :return: pair(str, str) [encrypted string, key] + """ + if key is None: + key = generate_key() + return C.encryption.encrypt(input, key) + + +def decrypt(cipher, key): + """Decrypt a input cipher with key. + :param: cipher: (str) The input str for decryption + :param: key: (str) The key for decryption + :return: str(The decrypted str) + """ + return C.encryption.decrypt(cipher, key) diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index 23cf697bf3e..d7035bd1b2a 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -301,6 +301,11 @@ def use_rknpu2(self, rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0): return self._option.use_rknpu2(rknpu2_name, rknpu2_core) + def use_sophgo(self): + """Inference with SOPHGO TPU + """ + return self._option.use_sophgo() + def use_ascend(self): """Inference with Huawei Ascend NPU """ diff --git a/python/fastdeploy/utils/hub_model_server.py b/python/fastdeploy/utils/hub_model_server.py index 849763b9f61..3eb891e64c8 100644 --- a/python/fastdeploy/utils/hub_model_server.py +++ b/python/fastdeploy/utils/hub_model_server.py @@ -98,6 +98,20 @@ def request(self, path: str, params: dict) -> dict: except requests.exceptions.ConnectionError as e: raise ServerConnectionError(self._url) + def get_model_list(self): + ''' + Get all pre-trained models information in dataset. + Return: + result(dict): key is category name, value is a list which contains models \ + information such as name, format and version. + ''' + api = '{}/{}'.format(self._url, 'fastdeploy_listmodels') + try: + result = requests.get(api, timeout=self._timeout) + return result.json() + except requests.exceptions.ConnectionError as e: + raise ServerConnectionError(self._url) + def is_connected(self): return self.check(self._url) diff --git a/python/fastdeploy/vision/__init__.py b/python/fastdeploy/vision/__init__.py index a5531a8a908..ba9a2d0ca99 100755 --- a/python/fastdeploy/vision/__init__.py +++ b/python/fastdeploy/vision/__init__.py @@ -26,6 +26,7 @@ from . import headpose from . import sr from . import evaluation +from . import generation from .utils import fd_result_to_json from .visualize import * from .. import C diff --git a/python/fastdeploy/vision/detection/ppdet/__init__.py b/python/fastdeploy/vision/detection/ppdet/__init__.py index 45734eef09e..9f4ad75bc41 100644 --- a/python/fastdeploy/vision/detection/ppdet/__init__.py +++ b/python/fastdeploy/vision/detection/ppdet/__init__.py @@ -36,6 +36,18 @@ def run(self, input_ims): """ return self._preprocessor.run(input_ims) + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class PaddleDetPostprocessor: def __init__(self): @@ -500,4 +512,204 @@ def __init__(self, self._model = C.vision.detection.RTMDet( model_file, params_file, config_file, self._runtime_option, model_format) - assert self.initialized, "RTMDet model initialize failed." \ No newline at end of file + assert self.initialized, "RTMDet model initialize failed." + + +class CascadeRCNN(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a CascadeRCNN model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g cascadercnn/model.pdmodel + :param params_file: (str)Path of parameters file, e.g cascadercnn/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "CascadeRCNN model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.CascadeRCNN( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "CascadeRCNN model initialize failed." + + +class PSSDet(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a PSSDet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g pssdet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g pssdet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "PSSDet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PSSDet( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "PSSDet model initialize failed." + + +class RetinaNet(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a RetinaNet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g retinanet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g retinanet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "RetinaNet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.RetinaNet( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "RetinaNet model initialize failed." + + +class PPYOLOESOD(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a PPYOLOESOD model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ppyoloesod/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ppyoloesod/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "PPYOLOESOD model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.PPYOLOESOD( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "PPYOLOESOD model initialize failed." + + +class FCOS(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a FCOS model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g fcos/model.pdmodel + :param params_file: (str)Path of parameters file, e.g fcos/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "FCOS model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.FCOS( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "FCOS model initialize failed." + + +class TTFNet(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a TTFNet model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g ttfnet/model.pdmodel + :param params_file: (str)Path of parameters file, e.g ttfnet/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "TTFNet model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.TTFNet( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "TTFNet model initialize failed." + + +class TOOD(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a TOOD model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g tood/model.pdmodel + :param params_file: (str)Path of parameters file, e.g tood/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "TOOD model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.TOOD( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "TOOD model initialize failed." + + +class GFL(PPYOLOE): + def __init__(self, + model_file, + params_file, + config_file, + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a GFL model exported by PaddleDetection. + + :param model_file: (str)Path of model file, e.g gfl/model.pdmodel + :param params_file: (str)Path of parameters file, e.g gfl/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param config_file: (str)Path of configuration file for deployment, e.g ppyoloe/infer_cfg.yml + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + + super(PPYOLOE, self).__init__(runtime_option) + + assert model_format == ModelFormat.PADDLE, "GFL model only support model format of ModelFormat.Paddle now." + self._model = C.vision.detection.GFL( + model_file, params_file, config_file, self._runtime_option, + model_format) + assert self.initialized, "GFL model initialize failed." \ No newline at end of file diff --git a/python/fastdeploy/vision/facedet/contrib/yolov7face.py b/python/fastdeploy/vision/facedet/contrib/yolov7face.py index 262533b3a4f..9639654413b 100644 --- a/python/fastdeploy/vision/facedet/contrib/yolov7face.py +++ b/python/fastdeploy/vision/facedet/contrib/yolov7face.py @@ -140,6 +140,13 @@ def __init__(self, assert self.initialized, "YOLOv7Face initialize failed." + def predict(self, input_image): + """Detect the location and key points of human faces from an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: FaceDetectionResult + """ + return self._model.predict(input_image) + def batch_predict(self, images): """Classify a batch of input image diff --git a/python/fastdeploy/vision/generation/__init__.py b/python/fastdeploy/vision/generation/__init__.py new file mode 100644 index 00000000000..f568ed84d88 --- /dev/null +++ b/python/fastdeploy/vision/generation/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .contrib.anemigan import AnimeGAN diff --git a/python/fastdeploy/vision/generation/contrib/__init__.py b/python/fastdeploy/vision/generation/contrib/__init__.py new file mode 100644 index 00000000000..8034e10bfc5 --- /dev/null +++ b/python/fastdeploy/vision/generation/contrib/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import diff --git a/python/fastdeploy/vision/generation/contrib/anemigan.py b/python/fastdeploy/vision/generation/contrib/anemigan.py new file mode 100644 index 00000000000..eaed21c5e09 --- /dev/null +++ b/python/fastdeploy/vision/generation/contrib/anemigan.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from .... import FastDeployModel, ModelFormat +from .... import c_lib_wrap as C + + +class AnimeGANPreprocessor: + def __init__(self, config_file): + """Create a preprocessor for AnimeGAN. + """ + self._preprocessor = C.vision.generation.AnimeGANPreprocessor() + + def run(self, input_ims): + """Preprocess input images for AnimeGAN. + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class AnimeGANPostprocessor: + def __init__(self): + """Create a postprocessor for AnimeGAN. + """ + self._postprocessor = C.vision.generation.AnimeGANPostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for AnimeGAN + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :return: results: (list) Final results + """ + return self._postprocessor.run(runtime_results) + + +class AnimeGAN(FastDeployModel): + def __init__(self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load a AnimeGAN model. + + :param model_file: (str)Path of model file, e.g ./model.pdmodel + :param params_file: (str)Path of parameters file, e.g ./model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + # call super constructor to initialize self._runtime_option + super(AnimeGAN, self).__init__(runtime_option) + + self._model = C.vision.generation.AnimeGAN( + model_file, params_file, self._runtime_option, model_format) + # assert self.initialized to confirm initialization successfully. + assert self.initialized, "AnimeGAN initialize failed." + + def predict(self, input_image): + """ Predict the style transfer result for an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: style transfer result + """ + return self._model.predict(input_image) + + def batch_predict(self, input_images): + """ Predict the style transfer result for multiple input images + + :param input_images: (list of numpy.ndarray)The list of input image data, each image is a 3-D array with layout HWC, BGR format + :return: a list of style transfer results + """ + return self._model.batch_predict(input_images) + + @property + def preprocessor(self): + """Get AnimeGANPreprocessor object of the loaded model + + :return AnimeGANPreprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get AnimeGANPostprocessor object of the loaded model + + :return AnimeGANPostprocessor + """ + return self._model.postprocessor diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index a357547fde9..41bb279a590 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -509,15 +509,15 @@ def run(self, input_ims): return self._preprocessor.run(input_ims) @property - def static_shape(self): - return self._preprocessor.static_shape + def static_shape_infer(self): + return self._preprocessor.static_shape_infer - @static_shape.setter - def static_shape(self, value): + @static_shape_infer.setter + def static_shape_infer(self, value): assert isinstance( value, - bool), "The value to set `static_shape` must be type of bool." - self._preprocessor.static_shape = value + bool), "The value to set `static_shape_infer` must be type of bool." + self._preprocessor.static_shape_infer = value @property def is_scale(self): @@ -638,15 +638,15 @@ def postprocessor(self, value): self._model.postprocessor = value @property - def static_shape(self): - return self._model.preprocessor.static_shape + def static_shape_infer(self): + return self._model.preprocessor.static_shape_infer - @static_shape.setter - def static_shape(self, value): + @static_shape_infer.setter + def static_shape_infer(self, value): assert isinstance( value, - bool), "The value to set `static_shape` must be type of bool." - self._model.preprocessor.static_shape = value + bool), "The value to set `static_shape_infer` must be type of bool." + self._model.preprocessor.static_shape_infer = value @property def is_scale(self): diff --git a/python/fastdeploy/vision/segmentation/ppseg/__init__.py b/python/fastdeploy/vision/segmentation/ppseg/__init__.py index 455785686bc..f0106a39a27 100644 --- a/python/fastdeploy/vision/segmentation/ppseg/__init__.py +++ b/python/fastdeploy/vision/segmentation/ppseg/__init__.py @@ -104,10 +104,17 @@ def run(self, input_ims): """ return self._preprocessor.run(input_ims) - def disable_normalize_and_permute(self): - """To disable normalize and hwc2chw in preprocessing step. + def disable_normalize(self): """ - return self._preprocessor.disable_normalize_and_permute() + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() @property def is_vertical_screen(self): diff --git a/python/requirements.txt b/python/requirements.txt index 3c463f4bb34..f50f053a906 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -3,6 +3,6 @@ requests tqdm numpy opencv-python -fastdeploy-tools>=0.0.1 +fastdeploy-tools>=0.0.2 pyyaml fastapi diff --git a/python/setup.py b/python/setup.py index 108c3db94ae..d1b02254ec1 100755 --- a/python/setup.py +++ b/python/setup.py @@ -56,6 +56,8 @@ setup_configs = dict() setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND", "OFF") +setup_configs["ENABLE_SOPHGO_BACKEND"] = os.getenv("ENABLE_SOPHGO_BACKEND", + "OFF") setup_configs["WITH_ASCEND"] = os.getenv("WITH_ASCEND", "OFF") setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF") setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", @@ -68,6 +70,7 @@ setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF") setup_configs["PADDLELITE_URL"] = os.getenv("PADDLELITE_URL", "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") +setup_configs["ENABLE_ENCRYPTION"] = os.getenv("ENABLE_ENCRYPTION", "OFF") setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF") setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF") setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") diff --git a/scripts/patch_paddle_lite.py b/scripts/patch_paddle_lite.py new file mode 100644 index 00000000000..96dea4bf173 --- /dev/null +++ b/scripts/patch_paddle_lite.py @@ -0,0 +1,43 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import subprocess +import platform +import sys + + +def process_paddle_lite(paddle_lite_so_path): + if platform.system().lower() != "linux": + return + rpaths = ["$ORIGIN"] + patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf") + + for paddle_lite_so_file in os.listdir(paddle_lite_so_path): + paddle_lite_so_file = os.path.join(paddle_lite_so_path, + paddle_lite_so_file) + if '.so' in paddle_lite_so_file: + command = "{} --set-rpath '{}' {}".format( + patchelf_exe, ":".join(rpaths), paddle_lite_so_file) + if platform.machine() != 'sw_64' and platform.machine( + ) != 'mips64': + assert os.system( + command) == 0, "patchelf {} failed, the command: {}".format( + paddle_lite_so_file, command) + + +if __name__ == "__main__": + process_paddle_lite(sys.argv[1]) diff --git a/serving/Dockerfile b/serving/Dockerfile index d03611f7285..70a595caeb0 100644 --- a/serving/Dockerfile +++ b/serving/Dockerfile @@ -41,7 +41,12 @@ RUN apt-get update \ RUN apt-get update \ && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \ && python3 -m pip install -U pip \ - && python3 -m pip install paddlepaddle-gpu paddlenlp fast-tokenizer-python + && python3 -m pip install paddlenlp fast-tokenizer-python + +# unset proxy +ENV http_proxy= +ENV https_proxy= +python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html COPY python/dist/*.whl /opt/fastdeploy/ RUN python3 -m pip install /opt/fastdeploy/*.whl \ @@ -52,6 +57,3 @@ COPY build/fastdeploy_install /opt/fastdeploy/ ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH" ENV PATH="/opt/tritonserver/bin:$PATH" -# unset proxy -ENV http_proxy= -ENV https_proxy= diff --git a/serving/README.md b/serving/README.md index 51b136045c1..4f7a74da956 100644 --- a/serving/README.md +++ b/serving/README.md @@ -20,7 +20,7 @@ FastDeploy builds an end-to-end serving deployment based on [Triton Inference Se CPU images only support Paddle/ONNX models for serving deployment on CPUs, and supported inference backends include OpenVINO, Paddle Inference, and ONNX Runtime ```shell -docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10 +docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-cpu-only-21.10 ``` #### GPU Image @@ -28,7 +28,7 @@ docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10 GPU images support Paddle/ONNX models for serving deployment on GPU and CPU, and supported inference backends including OpenVINO, TensorRT, Paddle Inference, and ONNX Runtime ``` -docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-gpu-cuda11.4-trt8.4-21.10 +docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-gpu-cuda11.4-trt8.4-21.10 ``` Users can also compile the image by themselves according to their own needs, referring to the following documents: diff --git a/serving/README_CN.md b/serving/README_CN.md index 801be56ea7f..f3f436f641c 100644 --- a/serving/README_CN.md +++ b/serving/README_CN.md @@ -17,13 +17,13 @@ FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-se #### CPU镜像 CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署,支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime ``` shell -docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-cpu-only-21.10 +docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-cpu-only-21.10 ``` #### GPU镜像 GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署,支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime ``` -docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.1-gpu-cuda11.4-trt8.4-21.10 +docker pull registry.baidubce.com/paddlepaddle/fastdeploy:1.0.2-gpu-cuda11.4-trt8.4-21.10 ``` 用户也可根据自身需求,参考如下文档自行编译镜像 diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md index 298603e50ff..af59854f908 100644 --- a/serving/docs/EN/compile-en.md +++ b/serving/docs/EN/compile-en.md @@ -1,7 +1,7 @@ English | [中文](../zh_CN/compile.md) # FastDeploy Serving Deployment Image Compilation -This article is about how to create a FastDploy image. +This document is about how to create a FastDploy image. ## GPU Image diff --git a/serving/docs/EN/demo-en.md b/serving/docs/EN/demo-en.md index 0f47d8fa5ad..786af289715 100644 --- a/serving/docs/EN/demo-en.md +++ b/serving/docs/EN/demo-en.md @@ -1,6 +1,6 @@ English | [中文](../zh_CN/demo.md) # Service-oriented Deployment Demo -We take the YOLOv5 model as an simple example, and introduce how to execute a service-oriented deployment. For the detailed code, please refer to [Service-oriented Deployment of YOLOv5](../../../examples/vision/detection/yolov5/serving). It is recommend that you read the following documents before reading this article. +We take the YOLOv5 model as an simple example, and introduce how to execute a service-oriented deployment. For the detailed code, please refer to [Service-oriented Deployment of YOLOv5](../../../examples/vision/detection/yolov5/serving). It is recommend that you read the following documents before reading this document. - [Service-oriented Model Catalog Description](model_repository-en.md) (how to prepare the model catalog) - [Service-oriented Deployment Configuration Description](model_configuration-en.md) (the configuration option for runtime) diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh index cfcb68b3745..fa7c0aacba6 100644 --- a/serving/scripts/build.sh +++ b/serving/scripts/build.sh @@ -12,7 +12,41 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -WITH_GPU=${1:-ON} + +ARGS=`getopt -a -o w:n:h:hs -l WITH_GPU:,docker_name:,http_proxy:,https_proxy: -- "$@"` + +eval set -- "${ARGS}" +echo "parse start" + +while true +do + case "$1" in + -w|--WITH_GPU) + WITH_GPU="$2" + shift;; + -n|--docker_name) + docker_name="$2" + shift;; + -h|--http_proxy) + http_proxy="$2" + shift;; + -hs|--https_proxy) + https_proxy="$2" + shift;; + --) + shift + break;; + esac +shift +done + +if [ -z $WITH_GPU ];then + WITH_GPU="ON" +fi + +if [ -z $docker_name ];then + docker_name="build_fd" +fi if [ $WITH_GPU == "ON" ]; then @@ -30,7 +64,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz fi -nvidia-docker run -i --rm --name build_fd \ +nvidia-docker run -i --rm --name ${docker_name} \ -v`pwd`/..:/workspace/fastdeploy \ -e "http_proxy=${http_proxy}" \ -e "https_proxy=${https_proxy}" \ @@ -68,7 +102,7 @@ else echo "start build FD CPU library" -docker run -i --rm --name build_fd \ +docker run -i --rm --name ${docker_name} \ -v`pwd`/..:/workspace/fastdeploy \ -e "http_proxy=${http_proxy}" \ -e "https_proxy=${https_proxy}" \ diff --git a/tests/acc_eval/README.md b/tests/acc_eval/README.md new file mode 100644 index 00000000000..5b62577f719 --- /dev/null +++ b/tests/acc_eval/README.md @@ -0,0 +1,29 @@ +# 模型精度批量验证脚本 + +本目录下的Python脚本可以在CPU/GPU/昆仑芯/昇腾,以及后续的新增硬件上, 完成对高优模型的精度批量验证. +各模型的精度测试代码是基于Python部署demo修改而成, 当后续有新增硬件或者新增模型时,用户可以通过同样的方式(新增option和模型),添加新的Python代码来完成精度验证. + + +## 用法 + +### 1.准备数据集 +- 分类模型需要ImageNet验证集以及标签 +- 检测模型需要COCO2017验证集以及标签 +- 分割模型需要Cityscape验证集以及标签 +- PP-OCRv2/v3的数据集在准备脚本中会自行下载. + +请将准备好的数据集解压至dataset目录中使用 + +### 2.精度验证 +分类/检测/分割/OCR四个场景的精度验证启用方式是一样的. +其中分类, 检测和分割模型会返回预测精度, OCR模型会返回与GPU预测结果的差异. + +```bash +# 进入分类模型目录下 +cd classification +# 执行prepare.sh脚本,自动下载并解压模型至models文件夹下 +bash prepare.sh +# 首先修改run.sh中的TARGET_DEVICE为想测试的硬件,之后执行run.sh脚本 +bash run.sh +# 验证完毕的输出以及精度数据,会保存至log文件夹下,用户自行查看 +``` diff --git a/tests/acc_eval/classification/eval.py b/tests/acc_eval/classification/eval.py new file mode 100755 index 00000000000..b6a452855c8 --- /dev/null +++ b/tests/acc_eval/classification/eval.py @@ -0,0 +1,66 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="Path of PaddleClas model.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--topk", type=int, default=1, help="Return topk results.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu' or 'ipu' or 'kunlunxin' or 'ascend' ." + ) + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "ipu": + option.use_ipu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) + +model_file = os.path.join(args.model, "inference.pdmodel") +params_file = os.path.join(args.model, "inference.pdiparams") +config_file = os.path.join(args.model, "inference_cls.yaml") +model = fd.vision.classification.PaddleClasModel( + model_file, params_file, config_file, runtime_option=runtime_option) + +res = fd.vision.evaluation.eval_classify( + model=model, + image_file_path="../dataset/imagenet/", + label_file_path="../dataset/imagenet/val_list.txt", + topk=1) +print(res) diff --git a/tests/acc_eval/classification/prepare.sh b/tests/acc_eval/classification/prepare.sh new file mode 100644 index 00000000000..b6d3ba34430 --- /dev/null +++ b/tests/acc_eval/classification/prepare.sh @@ -0,0 +1,28 @@ +mkdir models +cd models + +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPLCNet_x1_0_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPLCNetV2_base_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/EfficientNetB7_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/EfficientNetB0_small_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/GhostNet_x1_3_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/GhostNet_x0_5_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV1_x0_25_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV1_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV2_x0_25_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV2_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV3_small_x0_35_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV3_large_x1_0_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ShuffleNetV2_x0_25_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ShuffleNetV2_x2_0_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/SqueezeNet1_1_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/InceptionV3_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPHGNet_tiny_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPHGNet_base_ssld_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz + +ls *.tgz | xargs -n1 tar xzvf + +rm -rf *.tgz + +cd .. diff --git a/tests/acc_eval/classification/run.sh b/tests/acc_eval/classification/run.sh new file mode 100644 index 00000000000..16c1b2bb9f9 --- /dev/null +++ b/tests/acc_eval/classification/run.sh @@ -0,0 +1,8 @@ +TARGET_DEVICE=ascend + +model_dir=`ls ./models/` + +for MODEL_NAME in $model_dir +do + python infer.py --model ./models/$MODEL_NAME --image None --device $TARGET_DEVICE 2>&1 | tee ./log/${MODEL_NAME}_acc.log +done diff --git a/tests/acc_eval/detection/eval_faster_rcnn.py b/tests/acc_eval/detection/eval_faster_rcnn.py new file mode 100644 index 00000000000..354ca6c0f1d --- /dev/null +++ b/tests/acc_eval/detection/eval_faster_rcnn.py @@ -0,0 +1,69 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("image", [1, 3, 640, 640]) + option.set_trt_input_shape("scale_factor", [1, 2]) + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='faster_rcnn_r50_vd_fpn_2x_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.FasterRCNN( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_mask_rcnn.py b/tests/acc_eval/detection/eval_mask_rcnn.py new file mode 100644 index 00000000000..5ee53097cb8 --- /dev/null +++ b/tests/acc_eval/detection/eval_mask_rcnn.py @@ -0,0 +1,76 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + # option.use_gpu() + print( + """GPU inference with Backend::Paddle in python has not been supported yet. \ + \nWill ignore this option.""") + + if args.use_trt: + # TODO(qiuyanjun): may remove TRT option + # Backend::TRT has not been supported yet. + print( + """Backend::TRT has not been supported yet, will ignore this option.\ + \nPaddleDetection/MaskRCNN has only support Backend::Paddle now.""" + ) + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='mask_rcnn_r50_1x_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.MaskRCNN( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_picodet.py b/tests/acc_eval/detection/eval_picodet.py new file mode 100644 index 00000000000..c2120282ae7 --- /dev/null +++ b/tests/acc_eval/detection/eval_picodet.py @@ -0,0 +1,67 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='picodet_l_320_coco_lcnet') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PicoDet( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_ppyolo.py b/tests/acc_eval/detection/eval_ppyolo.py new file mode 100644 index 00000000000..f5aa98d1176 --- /dev/null +++ b/tests/acc_eval/detection/eval_ppyolo.py @@ -0,0 +1,69 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("image", [1, 3, 640, 640]) + option.set_trt_input_shape("scale_factor", [1, 2]) + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='ppyolo_r50vd_dcn_1x_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PPYOLO( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_ppyoloe.py b/tests/acc_eval/detection/eval_ppyoloe.py new file mode 100644 index 00000000000..1fea5d42507 --- /dev/null +++ b/tests/acc_eval/detection/eval_ppyoloe.py @@ -0,0 +1,68 @@ +import cv2 +import os + +import fastdeploy as fd + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PPYOLOE( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_ssd.py b/tests/acc_eval/detection/eval_ssd.py new file mode 100644 index 00000000000..1744a8f2f27 --- /dev/null +++ b/tests/acc_eval/detection/eval_ssd.py @@ -0,0 +1,56 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + required=True, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + return option + + +args = parse_arguments() + +model_file = os.path.join(args.model_dir, "model.pdmodel") +params_file = os.path.join(args.model_dir, "model.pdiparams") +config_file = os.path.join(args.model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.SSD(model_file, + params_file, + config_file, + runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_yolov3.py b/tests/acc_eval/detection/eval_yolov3.py new file mode 100644 index 00000000000..7023676fb24 --- /dev/null +++ b/tests/acc_eval/detection/eval_yolov3.py @@ -0,0 +1,67 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='yolov3_darknet53_270e_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.YOLOv3( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/eval_yolov5.py b/tests/acc_eval/detection/eval_yolov5.py new file mode 100755 index 00000000000..3d950b26a84 --- /dev/null +++ b/tests/acc_eval/detection/eval_yolov5.py @@ -0,0 +1,60 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", default=None, help="Path of yolov5 model.") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu' or 'kunlunxin'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("images", [1, 3, 640, 640]) + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model_file = os.path.join(args.model, "model.pdmodel") +params_file = os.path.join(args.model, "model.pdiparams") +model = fd.vision.detection.YOLOv5( + model_file, + params_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.PADDLE) + +image_file_path = "/xieyunyao/Project/coco/val2017" +annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path, 0.001, 0.65) +print(res) diff --git a/tests/acc_eval/detection/eval_yolov6.py b/tests/acc_eval/detection/eval_yolov6.py new file mode 100755 index 00000000000..3641194ca6e --- /dev/null +++ b/tests/acc_eval/detection/eval_yolov6.py @@ -0,0 +1,60 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", default=None, help="Path of yolov5 model.") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu' or 'kunlunxin'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("images", [1, 3, 640, 640]) + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model_file = os.path.join(args.model, "model.pdmodel") +params_file = os.path.join(args.model, "model.pdiparams") +model = fd.vision.detection.YOLOv6( + model_file, + params_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.PADDLE) + +image_file_path = "/xieyunyao/Project/coco/val2017" +annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path, 0.001, 0.65) +print(res) diff --git a/tests/acc_eval/detection/eval_yolov7.py b/tests/acc_eval/detection/eval_yolov7.py new file mode 100755 index 00000000000..3641194ca6e --- /dev/null +++ b/tests/acc_eval/detection/eval_yolov7.py @@ -0,0 +1,60 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument("--model", default=None, help="Path of yolov5 model.") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu' or 'kunlunxin'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("images", [1, 3, 640, 640]) + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model_file = os.path.join(args.model, "model.pdmodel") +params_file = os.path.join(args.model, "model.pdiparams") +model = fd.vision.detection.YOLOv6( + model_file, + params_file, + runtime_option=runtime_option, + model_format=fd.ModelFormat.PADDLE) + +image_file_path = "/xieyunyao/Project/coco/val2017" +annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path, 0.001, 0.65) +print(res) diff --git a/tests/acc_eval/detection/eval_yolox.py b/tests/acc_eval/detection/eval_yolox.py new file mode 100644 index 00000000000..2885cc772af --- /dev/null +++ b/tests/acc_eval/detection/eval_yolox.py @@ -0,0 +1,67 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", + default=None, + help="Path of PaddleDetection model directory") + parser.add_argument( + "--image", default=None, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + return option + + +args = parse_arguments() + +if args.model_dir is None: + model_dir = fd.download_model(name='yolox_s_300e_coco') +else: + model_dir = args.model_dir + +model_file = os.path.join(model_dir, "model.pdmodel") +params_file = os.path.join(model_dir, "model.pdiparams") +config_file = os.path.join(model_dir, "infer_cfg.yml") + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model = fd.vision.detection.PaddleYOLOX( + model_file, params_file, config_file, runtime_option=runtime_option) + +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" + +res = fd.vision.evaluation.eval_detection(model, image_file_path, + annotation_file_path) +print(res) diff --git a/tests/acc_eval/detection/prepare.sh b/tests/acc_eval/detection/prepare.sh new file mode 100644 index 00000000000..2e36440a2da --- /dev/null +++ b/tests/acc_eval/detection/prepare.sh @@ -0,0 +1,25 @@ +mkdir models +cd models + +wget https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz +wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_infer.tar + +ls *.tgz | xargs -n1 tar xzvf +ls *.tar | xargs -n1 tar xzvf +rm -rf *.tgz +rm -rf *.tar + +cd .. diff --git a/tests/acc_eval/detection/run.sh b/tests/acc_eval/detection/run.sh new file mode 100644 index 00000000000..59dff2e9b26 --- /dev/null +++ b/tests/acc_eval/detection/run.sh @@ -0,0 +1,17 @@ +TARGET_DEVICE=ascend + +python eval_picodet.py --model_dir ./models/picodet_l_320_coco_lcnet --image None --device $TARGET_DEVICE 2>&1 | tee ./log/picodet_l_320_coco_lcnet.log +python eval_ppyolo.py --model_dir ./models/ppyolov2_r101vd_dcn_365e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ppyolov2_r101vd_dcn_365e_coco.log +python eval_ppyolo.py --model_dir ./models/ppyolo_r50vd_dcn_1x_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ppyolo_r50vd_dcn_1x_coco.log +python eval_ppyoloe.py --model_dir ./models/ppyoloe_crn_l_300e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ppyoloe_crn_l_300e_coco.log +python eval_ppyoloe.py --model_dir ./models/ppyoloe_plus_crn_m_80e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ppyoloe_plus_crn_m_80e_coco.log +python eval_ssd.py --model_dir ./models/ssd_vgg16_300_240e_voc --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ssd_vgg16_300_240e_voc.log +python eval_ssd.py --model_dir ./models/ssdlite_mobilenet_v1_300_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ssdlite_mobilenet_v1_300_coco.log +python eval_ssd.py --model_dir ./models/ssd_mobilenet_v1_300_120e_voc --image None --device $TARGET_DEVICE 2>&1 | tee ./log/ssd_mobilenet_v1_300_120e_voc.log +python eval_yolov3.py --model_dir ./models/yolov3_darknet53_270e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov3_darknet53_270e_coco.log +python eval_yolox.py --model_dir ./models/yolox_s_300e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolox_s_300e_coco.log +python eval_faster_rcnn.py --model_dir ./models/faster_rcnn_r50_vd_fpn_2x_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/faster_rcnn_r50_vd_fpn_2x_coco.log +python eval_mask_rcnn.py --model_dir ./models/mask_rcnn_r50_1x_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/mask_rcnn_r50_1x_coco.log +python eval_yolov5.py --model_dir ./models/yolov5s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov5s_infer.log +python eval_yolov6.py --model_dir ./models/yolov6s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov6s_infer.log +python eval_yolov5.py --model_dir ./models/yolov7_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov7_infer.log diff --git a/tests/acc_eval/ppocr/eval_ppocrv2.py b/tests/acc_eval/ppocr/eval_ppocrv2.py new file mode 100644 index 00000000000..f4742df6661 --- /dev/null +++ b/tests/acc_eval/ppocr/eval_ppocrv2.py @@ -0,0 +1,170 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--backend", + type=str, + default="default", + help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu" + ) + parser.add_argument( + "--device_id", + type=int, + default=0, + help="Define which GPU card used to run model.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.device.lower() == "gpu": + option.use_gpu() + + return option + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = os.path.join(args.det_model, "inference.pdmodel") +det_params_file = os.path.join(args.det_model, "inference.pdiparams") +# Classification模型,方向分类,可选 +cls_model_file = os.path.join(args.cls_model, "inference.pdmodel") +cls_params_file = os.path.join(args.cls_model, "inference.pdiparams") +# Recognition模型,文字识别模型 +rec_model_file = os.path.join(args.rec_model, "inference.pdmodel") +rec_params_file = os.path.join(args.rec_model, "inference.pdiparams") +rec_label_file = args.rec_label_file + +# 对于三个模型,均采用同样的部署配置 +# 用户也可根据自行需求分别配置 +runtime_option = build_option(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=runtime_option) +cls_model = fd.vision.ocr.Classifier( + cls_model_file, cls_params_file, runtime_option=runtime_option) +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, + rec_params_file, + rec_label_file, + runtime_option=runtime_option) + +# PPOCR的Rec模型开启静态推理, 其他硬件不需要的话请注释掉. +rec_model.preprocessor.static_shape = True + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v2 = fd.vision.ocr.PPOCRv2( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +##### +#准备输入图片数据 +img_dir = args.image +imgs_file_lists = [] +if os.path.isdir(img_dir): + for single_file in os.listdir(img_dir): + if 'jpg' in single_file: + file_path = os.path.join(img_dir, single_file) + if os.path.isfile(file_path): + imgs_file_lists.append(file_path) + +imgs_file_lists.sort() + +fd_result = [] +for idx, image in enumerate(imgs_file_lists): + img = cv2.imread(image) + result = ppocr_v2.predict(img) + for i in range(len(result.boxes)): + one_res = result.boxes[i] + [ + result.rec_scores[i] + ] + [result.cls_labels[i]] + [result.cls_scores[i]] + fd_result.append(one_res) + +local_result = [] +with open('PPOCRv2_ICDAR10_BS116_1221.txt', 'r') as f: + for line in f: + local_result.append(list(map(float, line.split(',')))) + +print("==== Begin to check OCR diff ====") +for list_local, list_fd in zip(local_result, fd_result): + + for i in range(len(list_local)): + + if (i < 8): + #Det + diff = list_local[i] - list_fd[i] + assert ( + abs(diff) < 1 + ), "Diff exist in Det box result, where is {} - {} .".format( + list_local, list_fd) + elif (i == 8): + #rec + diff = round(list_local[i], 6) - round(list_fd[i], 6) + assert ( + abs(diff) < 0.001 + ), "Diff exist in rec scores result, where is {} - {} .".format( + list_local, list_fd) + elif (i == 9): + diff = list_local[i] - list_fd[i] + assert ( + abs(diff) != 1 + ), "Diff exist in cls label result, where is {} - {} .".format( + list_local, list_fd) + else: + diff = round(list_local[i], 6) - round(list_fd[i], 6) + assert ( + abs(diff) < 0.001 + ), "Diff exist in cls score result, where is {} - {} .".format( + list_local, list_fd) diff --git a/tests/acc_eval/ppocr/eval_ppocrv3.py b/tests/acc_eval/ppocr/eval_ppocrv3.py new file mode 100644 index 00000000000..b6f4dccedfd --- /dev/null +++ b/tests/acc_eval/ppocr/eval_ppocrv3.py @@ -0,0 +1,174 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--backend", + type=str, + default="default", + help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu" + ) + parser.add_argument( + "--device_id", + type=int, + default=0, + help="Define which GPU card used to run model.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.device.lower() == "gpu": + option.use_gpu() + + return option + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = os.path.join(args.det_model, "inference.pdmodel") +det_params_file = os.path.join(args.det_model, "inference.pdiparams") +# Classification模型,方向分类,可选 +cls_model_file = os.path.join(args.cls_model, "inference.pdmodel") +cls_params_file = os.path.join(args.cls_model, "inference.pdiparams") +# Recognition模型,文字识别模型 +rec_model_file = os.path.join(args.rec_model, "inference.pdmodel") +rec_params_file = os.path.join(args.rec_model, "inference.pdiparams") +rec_label_file = args.rec_label_file + +# 对于三个模型,均采用同样的部署配置 +# 用户也可根据自行需求分别配置 +runtime_option = build_option(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=runtime_option) +cls_model = fd.vision.ocr.Classifier( + cls_model_file, cls_params_file, runtime_option=runtime_option) +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, + rec_params_file, + rec_label_file, + runtime_option=runtime_option) + +# PPOCR的Rec模型开启静态推理, 其他硬件不需要的话请注释掉. +rec_model.preprocessor.static_shape = True + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v3 = fd.vision.ocr.PPOCRv3( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +##### +#准备输入图片数据 +img_dir = args.image +imgs_file_lists = [] +if os.path.isdir(img_dir): + for single_file in os.listdir(img_dir): + if 'jpg' in single_file: + file_path = os.path.join(img_dir, single_file) + if os.path.isfile(file_path): + imgs_file_lists.append(file_path) + +imgs_file_lists.sort() + +fd_result = [] +for idx, image in enumerate(imgs_file_lists): + img = cv2.imread(image) + result = ppocr_v3.predict(img) + for i in range(len(result.boxes)): + one_res = result.boxes[i] + [ + result.rec_scores[i] + ] + [result.cls_labels[i]] + [result.cls_scores[i]] + fd_result.append(one_res) + +local_result = [] +with open('PPOCRv3_ICDAR10_BS116_1221.txt', 'r') as f: + for line in f: + local_result.append(list(map(float, line.split(',')))) + +# Begin to Diff Compare +total_num_res = len(local_result) * 11 +total_diff_num = 0 + +print("==== Begin to check OCR diff ====") +for list_local, list_fd in zip(local_result, fd_result): + + for i in range(len(list_local)): + + if (i < 8): + #Det + diff = list_local[i] - list_fd[i] + assert ( + abs(diff) < 1 + ), "Diff exist in Det box result, where is {} - {} .".format( + list_local, list_fd) + elif (i == 8): + #rec + diff = round(list_local[i], 6) - round(list_fd[i], 6) + assert ( + abs(diff) < 0.001 + ), "Diff exist in rec scores result, where is {} - {} .".format( + list_local, list_fd) + elif (i == 9): + diff = list_local[i] - list_fd[i] + assert ( + abs(diff) != 1 + ), "Diff exist in cls label result, where is {} - {} .".format( + list_local, list_fd) + else: + diff = round(list_local[i], 6) - round(list_fd[i], 6) + assert ( + abs(diff) < 0.001 + ), "Diff exist in cls score result, where is {} - {} .".format( + list_local, list_fd) diff --git a/tests/acc_eval/ppocr/prepare.sh b/tests/acc_eval/ppocr/prepare.sh new file mode 100644 index 00000000000..8417ce879d9 --- /dev/null +++ b/tests/acc_eval/ppocr/prepare.sh @@ -0,0 +1,22 @@ +mkdir models +cd models + +# 下载模型 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +# 下载GPU预测结果 +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPOCRv3_ICDAR10_BS116_1221.txt +wget https://bj.bcebos.com/paddlehub/fastdeploy/PPOCRv2_ICDAR10_BS116_1221.txt + +# 下载ICDAR2017数据集前10张图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/ICDAR2017_10.tar + +ls *.tar | xargs -n1 tar xzvf +rm -rf *.tar + +cd .. diff --git a/tests/acc_eval/ppocr/run.sh b/tests/acc_eval/ppocr/run.sh new file mode 100644 index 00000000000..59deabd1963 --- /dev/null +++ b/tests/acc_eval/ppocr/run.sh @@ -0,0 +1,7 @@ +TARGET_DEVICE=ascend + +python eval_ppocrv3.py.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt \ + --image ../ICDAR2017_10 --device $TARGET_DEVICE 2>&1 | tee ./log/ppocrv3_diff.log + +python eval_ppocrv2.py.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt \ + --image ../ICDAR2017_10 --device $TARGET_DEVICE 2>&1 | tee ./log/ppocrv2_diff.log diff --git a/tests/acc_eval/segmentation/eval.py b/tests/acc_eval/segmentation/eval.py new file mode 100644 index 00000000000..b77a6951942 --- /dev/null +++ b/tests/acc_eval/segmentation/eval.py @@ -0,0 +1,58 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="Path of PaddleSeg model.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--use_trt", + type=ast.literal_eval, + default=False, + help="Wether to use tensorrt.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + option.use_gpu() + + if args.device.lower() == "kunlunxin": + option.use_kunlunxin() + + if args.device.lower() == "ascend": + option.use_ascend() + + if args.use_trt: + option.use_trt_backend() + option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024], + [1, 3, 2048, 2048]) + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model_file = os.path.join(args.model, "model.pdmodel") +params_file = os.path.join(args.model, "model.pdiparams") +config_file = os.path.join(args.model, "deploy.yaml") +model = fd.vision.segmentation.PaddleSegModel( + model_file, params_file, config_file, runtime_option=runtime_option) + +res = fd.vision.evaluation.eval_segmentation( + model=model, data_dir="../dataset/FD_dataset/data/cityscapes") +print(res) diff --git a/tests/acc_eval/segmentation/prepare.sh b/tests/acc_eval/segmentation/prepare.sh new file mode 100644 index 00000000000..465eed00aa3 --- /dev/null +++ b/tests/acc_eval/segmentation/prepare.sh @@ -0,0 +1,30 @@ +mkdir models +cd models + +wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz + +wget https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz + + +ls *.tgz | xargs -n1 tar xzvf +rm -rf *.tgz diff --git a/tests/acc_eval/segmentation/run.sh b/tests/acc_eval/segmentation/run.sh new file mode 100644 index 00000000000..73fe957d439 --- /dev/null +++ b/tests/acc_eval/segmentation/run.sh @@ -0,0 +1,8 @@ +TARGET_DEVICE=ascend + +model_dir=`ls ./models/` + +for MODEL_NAME in $model_dir +do + python eval.py --model ./models/$MODEL_NAME --image None --device $TARGET_DEVICE 2>&1 | tee ./log/${MODEL_NAME}_acc.log +done diff --git a/tests/models/test_animegan.py b/tests/models/test_animegan.py new file mode 100644 index 00000000000..d698b05a854 --- /dev/null +++ b/tests/models/test_animegan.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os +import numpy as np + + +def test_animegan(): + model_name = 'animegan_v1_hayao_60' + model_path = fd.download_model( + name=model_name, path='./resources', format='paddle') + test_img = 'https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_testimg.jpg' + label_img = 'https://bj.bcebos.com/paddlehub/fastdeploy/style_transfer_result.png' + fd.download(test_img, "./resources") + fd.download(label_img, "./resources") + # use default backend + runtime_option = fd.RuntimeOption() + runtime_option.set_paddle_mkldnn(False) + model_file = os.path.join(model_path, "model.pdmodel") + params_file = os.path.join(model_path, "model.pdiparams") + animegan = fd.vision.generation.AnimeGAN( + model_file, params_file, runtime_option=runtime_option) + + src_img = cv2.imread("./resources/style_transfer_testimg.jpg") + label_img = cv2.imread("./resources/style_transfer_result.png") + res = animegan.predict(src_img) + + diff = np.fabs(res.astype(np.float32) - label_img.astype(np.float32)) / 255 + assert diff.max() < 1e-04, "There's diff in prediction." + + +if __name__ == "__main__": + test_animegan() diff --git a/tests/models/test_basicvsr.py b/tests/models/test_basicvsr.py index 479343444e3..9aeabc50907 100644 --- a/tests/models/test_basicvsr.py +++ b/tests/models/test_basicvsr.py @@ -69,3 +69,7 @@ def test_basicvsr(): if t >= 10: break capture.release() + + +if __name__ == "__main__": + test_basicvsr() diff --git a/tests/models/test_edvr.py b/tests/models/test_edvr.py index a9f9517e7d6..a874c7d3b59 100644 --- a/tests/models/test_edvr.py +++ b/tests/models/test_edvr.py @@ -1,4 +1,4 @@ -test_pptracking.py # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -74,3 +74,7 @@ def test_edvr(): if t >= 10: break capture.release() + + +if __name__ == "__main__": + test_edvr() diff --git a/tests/models/test_encryption.py b/tests/models/test_encryption.py new file mode 100755 index 00000000000..10670f87fec --- /dev/null +++ b/tests/models/test_encryption.py @@ -0,0 +1,9 @@ +import fastdeploy as fd +import os + +if __name__ == "__main__": + input = "Hello" + cipher, key = fd.encryption.encrypt(input) + output = fd.encryption.decrypt(cipher, key) + assert input == output, "test encryption failed" + print("test encryption success") diff --git a/tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml b/tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml index d47075090e9..72c27a18a7d 100644 --- a/tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml +++ b/tools/rknpu2/config/ResNet50_vd_infer_rknn.yaml @@ -1,10 +1,15 @@ -model_path: ./ResNet50_vd_infer.onnx -output_folder: ./ -target_platform: RK3588 -normalize: - mean: [[0, 0, 0]] - std: [[1, 1, 1]] -outputs: [] -outputs_nodes: [] +model_path: ./ResNet50_vd_infer/ResNet50_vd_infer.onnx +output_folder: ./ResNet50_vd_infer +mean: + - + - 123.675 + - 116.28 + - 103.53 +std: + - + - 58.395 + - 57.12 + - 57.375 +outputs_nodes: do_quantization: False -dataset: \ No newline at end of file +dataset: "./ResNet50_vd_infer/dataset.txt"