From 96827a00ddb02c93169427c31f0ab43f2857ad6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 02:17:03 +0000 Subject: [PATCH 01/16] add a TTS demo for ARM Linux --- demos/TTSArmLinux/.gitignore | 4 + demos/TTSArmLinux/README.md | 43 ++++++ demos/TTSArmLinux/build.sh | 20 +++ demos/TTSArmLinux/config.sh | 14 ++ demos/TTSArmLinux/download.sh | 27 ++++ demos/TTSArmLinux/run.sh | 18 +++ demos/TTSArmLinux/src/CMakeLists.txt | 58 ++++++++ demos/TTSArmLinux/src/Predictor.hpp | 208 +++++++++++++++++++++++++++ demos/TTSArmLinux/src/main.cc | 71 +++++++++ 9 files changed, 463 insertions(+) create mode 100644 demos/TTSArmLinux/.gitignore create mode 100644 demos/TTSArmLinux/README.md create mode 100755 demos/TTSArmLinux/build.sh create mode 100644 demos/TTSArmLinux/config.sh create mode 100755 demos/TTSArmLinux/download.sh create mode 100755 demos/TTSArmLinux/run.sh create mode 100644 demos/TTSArmLinux/src/CMakeLists.txt create mode 100644 demos/TTSArmLinux/src/Predictor.hpp create mode 100644 demos/TTSArmLinux/src/main.cc diff --git a/demos/TTSArmLinux/.gitignore b/demos/TTSArmLinux/.gitignore new file mode 100644 index 00000000000..13135e37624 --- /dev/null +++ b/demos/TTSArmLinux/.gitignore @@ -0,0 +1,4 @@ +build/ +output/ +libs/ +models/ diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md new file mode 100644 index 00000000000..6fe66c5b6a9 --- /dev/null +++ b/demos/TTSArmLinux/README.md @@ -0,0 +1,43 @@ +# PaddleSpeech TTS 文本到语音 ARM Linux Demo + +修改自[demos/TTSAndroid](../TTSAndroid),模型也来自该安卓Demo。 + +使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)), +该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 + +该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容,运行会崩溃,需要使用更新的版本。 +不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型,应该可以兼容。 + +### 配置 + +打开 [config.sh](config.sh) 按需修改配置。 + +默认编译64位版本,如果要编译32位版本,把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。 + +### 下载Paddle Lite库文件和模型文件 + +``` +./download.sh +``` + +### 安装依赖 + +以 Ubuntu 18.04 为例: + +``` +sudo apt install build-essential cmake libopencv-dev +``` + +### 编译 + +``` +./build.sh +``` + +### 运行 + +``` +./run.sh +``` + +将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。 diff --git a/demos/TTSArmLinux/build.sh b/demos/TTSArmLinux/build.sh new file mode 100755 index 00000000000..c872e57494f --- /dev/null +++ b/demos/TTSArmLinux/build.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" + +# load configure +. ./config.sh + +# build +echo "ARM_ABI is ${ARM_ABI}" +echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}" + +rm -rf build +mkdir -p build +cd build + +cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src +make + +echo "make successful!" diff --git a/demos/TTSArmLinux/config.sh b/demos/TTSArmLinux/config.sh new file mode 100644 index 00000000000..9b895aa7543 --- /dev/null +++ b/demos/TTSArmLinux/config.sh @@ -0,0 +1,14 @@ +# configuration + +ARM_ABI=armv8 +#ARM_ABI=armv7hf + +MODELS_DIR="${PWD}/models" +LIBS_DIR="${PWD}/libs" + +PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz" +PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx" + +MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz" +AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" +VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh new file mode 100755 index 00000000000..6114dd7a61e --- /dev/null +++ b/demos/TTSArmLinux/download.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" +basedir="$PWD" + +mkdir -p ./libs ./models + +download() { + file="$1" + url="$2" + dir="$3" + + cd "$dir" + echo "Downloading $file..." + wget -O "$file" "$url" + echo "Extracting $file..." + tar -vxf "$file" +} + +download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + "$basedir/libs" + +download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ + 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ + "$basedir/models" diff --git a/demos/TTSArmLinux/run.sh b/demos/TTSArmLinux/run.sh new file mode 100755 index 00000000000..69a9a1b22ec --- /dev/null +++ b/demos/TTSArmLinux/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" + +# load configure +. ./config.sh + +# create dir +rm -rf ./output +mkdir -p ./output + +# run +for i in {1..10}; do + (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav) +done + +ls -lh "$PWD"/output/*.wav diff --git a/demos/TTSArmLinux/src/CMakeLists.txt b/demos/TTSArmLinux/src/CMakeLists.txt new file mode 100644 index 00000000000..b15d899346b --- /dev/null +++ b/demos/TTSArmLinux/src/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 3.10) +set(CMAKE_SYSTEM_NAME Linux) +if(ARM_ABI STREQUAL "armv8") + set(CMAKE_SYSTEM_PROCESSOR aarch64) + set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") + set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") +elseif(ARM_ABI STREQUAL "armv7hf") + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") + set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") +else() + message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.") + return() +endif() + +project(paddlespeech_tts_demo) +message(STATUS "TARGET ARCH ABI: ${ARM_ABI}") +message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}") + +include_directories(${PADDLE_LITE_DIR}/include) +link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI}) +link_directories(${PADDLE_LITE_DIR}/lib) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if(ARM_ABI STREQUAL "armv8") + set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}") +elseif(ARM_ABI STREQUAL "armv7hf") + set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" ) +endif() + +find_package(OpenMP REQUIRED) +if(OpenMP_FOUND OR OpenMP_CXX_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}") + message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}") + message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") + message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}") + message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}") +else() + message(FATAL_ERROR "Could not found OpenMP!") + return() +endif() +find_package(OpenCV REQUIRED) +if(OpenCV_FOUND OR OpenCV_CXX_FOUND) + include_directories(${OpenCV_INCLUDE_DIRS}) + message(STATUS "OpenCV library status:") + message(STATUS " version: ${OpenCV_VERSION}") + message(STATUS " libraries: ${OpenCV_LIBS}") + message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") +else() + message(FATAL_ERROR "Could not found OpenCV!") + return() +endif() + +add_executable(paddlespeech_tts_demo main.cc) +target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp new file mode 100644 index 00000000000..8c4f4655d86 --- /dev/null +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include "paddle_api.h" + +using namespace paddle::lite_api; + +class Predictor { +private: + float inferenceTime = 0; + std::shared_ptr AMPredictor = nullptr; + std::shared_ptr VOCPredictor = nullptr; + std::vector wav; + +public: + bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + // Release model if exists + releaseModel(); + + AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode); + if (AMPredictor == nullptr) { + return false; + } + VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode); + if (VOCPredictor == nullptr) { + return false; + } + + return true; + } + + ~Predictor() { + releaseModel(); + releaseWav(); + } + + std::shared_ptr loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + if (modelPath.empty()) { + return nullptr; + } + + // 设置MobileConfig + MobileConfig config; + config.set_model_from_file(modelPath); + config.set_threads(cpuThreadNum); + + if (cpuPowerMode == "LITE_POWER_HIGH") { + config.set_power_mode(PowerMode::LITE_POWER_HIGH); + } else if (cpuPowerMode == "LITE_POWER_LOW") { + config.set_power_mode(PowerMode::LITE_POWER_LOW); + } else if (cpuPowerMode == "LITE_POWER_FULL") { + config.set_power_mode(PowerMode::LITE_POWER_FULL); + } else if (cpuPowerMode == "LITE_POWER_NO_BIND") { + config.set_power_mode(PowerMode::LITE_POWER_NO_BIND); + } else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") { + config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH); + } else if (cpuPowerMode == "LITE_POWER_RAND_LOW") { + config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW); + } else { + std::cerr << "Unknown cpu power mode!" << std::endl; + return nullptr; + } + + return CreatePaddlePredictor(config); + } + + void releaseModel() { + AMPredictor = nullptr; + VOCPredictor = nullptr; + } + + bool runModel(const std::vector &phones) { + if (!isLoaded()) { + return false; + } + + // 计时开始 + auto start = std::chrono::system_clock::now(); + + // 执行推理 + VOCOutputToWav(getAMOutput(phones)); + + // 计时结束 + auto end = std::chrono::system_clock::now(); + + // 计算用时 + std::chrono::duration duration = end - start; + inferenceTime = duration.count() * 1000; // 单位:毫秒 + + return true; + } + + std::unique_ptr getAMOutput(const std::vector &phones) { + auto phones_handle = AMPredictor->GetInput(0); + phones_handle->Resize({static_cast(phones.size())}); + phones_handle->CopyFromCpu(phones.data()); + AMPredictor->Run(); + + // 获取输出Tensor + auto am_output_handle = AMPredictor->GetOutput(0); + // 打印输出Tensor的shape + std::cout << "AM Output shape: "; + auto shape = am_output_handle->shape(); + for (auto s : shape) { + std::cout << s << ", "; + } + std::cout << std::endl; + + // 获取输出Tensor的数据 + auto am_output_data = am_output_handle->mutable_data(); + return am_output_handle; + } + + void VOCOutputToWav(std::unique_ptr &&input) { + auto mel_handle = VOCPredictor->GetInput(0); + // [?, 80] + auto dims = input->shape(); + mel_handle->Resize(dims); + auto am_output_data = input->mutable_data(); + mel_handle->CopyFromCpu(am_output_data); + VOCPredictor->Run(); + + // 获取输出Tensor + auto voc_output_handle = VOCPredictor->GetOutput(0); + // 打印输出Tensor的shape + std::cout << "VOC Output shape: "; + auto shape = voc_output_handle->shape(); + for (auto s : shape) { + std::cout << s << ", "; + } + std::cout << std::endl; + + // 获取输出Tensor的数据 + int64_t output_size = 1; + for (auto dim : voc_output_handle->shape()) { + output_size *= dim; + } + wav.resize(output_size); + auto output_data = voc_output_handle->mutable_data(); + std::copy_n(output_data, output_size, wav.data()); + } + + bool isLoaded() { + return AMPredictor != nullptr && VOCPredictor != nullptr; + } + + float getInferenceTime() { + return inferenceTime; + } + + const std::vector & getWav() { + return wav; + } + + void releaseWav() { + wav.clear(); + } + + struct WavHeader { + // RIFF 头 + char riff[4] = {'R', 'I', 'F', 'F'}; + uint32_t size = 0; + char wave[4] = {'W', 'A', 'V', 'E'}; + + // FMT 头 + char fmt[4] = {'f', 'm', 't', ' '}; + uint32_t fmt_size = 16; + uint16_t audio_format = 3; + uint16_t num_channels = 1; + + // 如果播放速度和音调异常,请修改采样率 + // 常见采样率:16000, 24000, 32000, 44100, 48000, 96000 + uint32_t sample_rate = 24000; + + uint32_t byte_rate = 64000; + uint16_t block_align = 4; + uint16_t bits_per_sample = 32; + + // DATA 头 + char data[4] = {'d', 'a', 't', 'a'}; + uint32_t data_size = 0; + }; + + bool writeWavToFile(const std::string &wavPath) { + std::ofstream fout(wavPath, std::ios::binary); + if (!fout.is_open()) { + return false; + } + + // 写入头信息 + WavHeader header; + header.size = sizeof(header) - 8; + header.data_size = wav.size() * sizeof(float); + header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8; + header.block_align = header.num_channels * header.bits_per_sample / 8; + fout.write(reinterpret_cast(&header), sizeof(header)); + + // 写入wav数据 + fout.write(reinterpret_cast(wav.data()), header.data_size); + + fout.close(); + return true; + } +}; diff --git a/demos/TTSArmLinux/src/main.cc b/demos/TTSArmLinux/src/main.cc new file mode 100644 index 00000000000..64aeaa8578c --- /dev/null +++ b/demos/TTSArmLinux/src/main.cc @@ -0,0 +1,71 @@ +#include +#include +#include +#include "paddle_api.h" +#include "Predictor.hpp" + +using namespace paddle::lite_api; + +std::vector> sentencesToChoose = { + // 009901 昨日,这名“伤者”与医生全部被警方依法刑事拘留。 + {261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141}, + // 009902 钱伟长想到上海来办学校是经过深思熟虑的。 + {174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45}, + // 009903 她见我一进门就骂,吃饭时也骂,骂得我抬不起头。 + {182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168}, + // 009904 李述德在离开之前,只说了一句“柱驼杀父亲了”。 + {153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45}, + // 009905 这种车票和保险单捆绑出售属于重复性购买。 + {262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9}, + // 009906 戴佩妮的男友西米露接唱情歌,让她非常开心。 + {40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120}, + // 009907 观大势、谋大局、出大策始终是该院的办院方针。 + {70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51}, + // 009908 他们骑着摩托回家,正好为农忙时的父母帮忙。 + {182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20}, + // 009909 但是因为还没到退休年龄,只能掰着指头捱日子。 + {40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112}, + // 009910 这几天雨水不断,人们恨不得待在家里不出门。 + {262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52}, +}; + +void usage(const char *binName) { + std::cerr << "Usage:" << std::endl + << "\t" << binName << " " << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 5) { + usage(argv[0]); + return -1; + } + const char *AMModelPath = argv[1]; + const char *VOCModelPath = argv[2]; + int sentencesIndex = atoi(argv[3]) - 1; + const char *outputWavPath = argv[4]; + + if (sentencesIndex < 0 || sentencesIndex >= sentencesToChoose.size()) { + std::cerr << "sentences-index out of range" << std::endl; + return -1; + } + + Predictor predictor; + if (!predictor.init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) { + std::cerr << "predictor init failed" << std::endl; + return -1; + } + + if (!predictor.runModel(sentencesToChoose[sentencesIndex])) { + std::cerr << "predictor run model failed" << std::endl; + return -1; + } + + std::cout << "Inference time: " << predictor.getInferenceTime() << "ms, WAV size: " << predictor.getWav().size() << std::endl; + + if (!predictor.writeWavToFile(outputWavPath)) { + std::cerr << "write wav file failed" << std::endl; + return -1; + } + + return 0; +} From 0f62ccc0d2c0c665344a7e5efd47fd57c7001c7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 03:45:39 +0000 Subject: [PATCH 02/16] demos/TTSArmLinux: Clarify compatibility issues with Paddle-Lite 2.12 --- demos/TTSArmLinux/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md index 6fe66c5b6a9..fe6e7e381d2 100644 --- a/demos/TTSArmLinux/README.md +++ b/demos/TTSArmLinux/README.md @@ -5,8 +5,7 @@ 使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)), 该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 -该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容,运行会崩溃,需要使用更新的版本。 -不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型,应该可以兼容。 +注意 [Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 ### 配置 From 447a68eaab25c54731a3c284b4da7f31638cd9f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 04:08:18 +0000 Subject: [PATCH 03/16] demos/TTSArmLinux: adjust the code style to match the project --- demos/TTSArmLinux/src/Predictor.hpp | 84 ++++++++++++++--------------- demos/TTSArmLinux/src/main.cc | 8 +-- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index 8c4f4655d86..765d859bea7 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -10,23 +10,17 @@ using namespace paddle::lite_api; class Predictor { -private: - float inferenceTime = 0; - std::shared_ptr AMPredictor = nullptr; - std::shared_ptr VOCPredictor = nullptr; - std::vector wav; - public: - bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + bool Init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { // Release model if exists - releaseModel(); + ReleaseModel(); - AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode); - if (AMPredictor == nullptr) { + AM_predictor_ = LoadModel(AMModelPath, cpuThreadNum, cpuPowerMode); + if (AM_predictor_ == nullptr) { return false; } - VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode); - if (VOCPredictor == nullptr) { + VOC_predictor_ = LoadModel(VOCModelPath, cpuThreadNum, cpuPowerMode); + if (VOC_predictor_ == nullptr) { return false; } @@ -34,11 +28,11 @@ class Predictor { } ~Predictor() { - releaseModel(); - releaseWav(); + ReleaseModel(); + ReleaseWav(); } - std::shared_ptr loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + std::shared_ptr LoadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) { if (modelPath.empty()) { return nullptr; } @@ -68,13 +62,13 @@ class Predictor { return CreatePaddlePredictor(config); } - void releaseModel() { - AMPredictor = nullptr; - VOCPredictor = nullptr; + void ReleaseModel() { + AM_predictor_ = nullptr; + VOC_predictor_ = nullptr; } - bool runModel(const std::vector &phones) { - if (!isLoaded()) { + bool RunModel(const std::vector &phones) { + if (!IsLoaded()) { return false; } @@ -82,26 +76,26 @@ class Predictor { auto start = std::chrono::system_clock::now(); // 执行推理 - VOCOutputToWav(getAMOutput(phones)); + VOCOutputToWav(GetAMOutput(phones)); // 计时结束 auto end = std::chrono::system_clock::now(); // 计算用时 std::chrono::duration duration = end - start; - inferenceTime = duration.count() * 1000; // 单位:毫秒 + inference_time_ = duration.count() * 1000; // 单位:毫秒 return true; } - std::unique_ptr getAMOutput(const std::vector &phones) { - auto phones_handle = AMPredictor->GetInput(0); + std::unique_ptr GetAMOutput(const std::vector &phones) { + auto phones_handle = AM_predictor_->GetInput(0); phones_handle->Resize({static_cast(phones.size())}); phones_handle->CopyFromCpu(phones.data()); - AMPredictor->Run(); + AM_predictor_->Run(); // 获取输出Tensor - auto am_output_handle = AMPredictor->GetOutput(0); + auto am_output_handle = AM_predictor_->GetOutput(0); // 打印输出Tensor的shape std::cout << "AM Output shape: "; auto shape = am_output_handle->shape(); @@ -116,16 +110,16 @@ class Predictor { } void VOCOutputToWav(std::unique_ptr &&input) { - auto mel_handle = VOCPredictor->GetInput(0); + auto mel_handle = VOC_predictor_->GetInput(0); // [?, 80] auto dims = input->shape(); mel_handle->Resize(dims); auto am_output_data = input->mutable_data(); mel_handle->CopyFromCpu(am_output_data); - VOCPredictor->Run(); + VOC_predictor_->Run(); // 获取输出Tensor - auto voc_output_handle = VOCPredictor->GetOutput(0); + auto voc_output_handle = VOC_predictor_->GetOutput(0); // 打印输出Tensor的shape std::cout << "VOC Output shape: "; auto shape = voc_output_handle->shape(); @@ -139,25 +133,25 @@ class Predictor { for (auto dim : voc_output_handle->shape()) { output_size *= dim; } - wav.resize(output_size); + wav_.resize(output_size); auto output_data = voc_output_handle->mutable_data(); - std::copy_n(output_data, output_size, wav.data()); + std::copy_n(output_data, output_size, wav_.data()); } - bool isLoaded() { - return AMPredictor != nullptr && VOCPredictor != nullptr; + bool IsLoaded() { + return AM_predictor_ != nullptr && VOC_predictor_ != nullptr; } - float getInferenceTime() { - return inferenceTime; + float GetInferenceTime() { + return inference_time_; } - const std::vector & getWav() { - return wav; + const std::vector & GetWav() { + return wav_; } - void releaseWav() { - wav.clear(); + void ReleaseWav() { + wav_.clear(); } struct WavHeader { @@ -185,7 +179,7 @@ class Predictor { uint32_t data_size = 0; }; - bool writeWavToFile(const std::string &wavPath) { + bool WriteWavToFile(const std::string &wavPath) { std::ofstream fout(wavPath, std::ios::binary); if (!fout.is_open()) { return false; @@ -194,15 +188,21 @@ class Predictor { // 写入头信息 WavHeader header; header.size = sizeof(header) - 8; - header.data_size = wav.size() * sizeof(float); + header.data_size = wav_.size() * sizeof(float); header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8; header.block_align = header.num_channels * header.bits_per_sample / 8; fout.write(reinterpret_cast(&header), sizeof(header)); // 写入wav数据 - fout.write(reinterpret_cast(wav.data()), header.data_size); + fout.write(reinterpret_cast(wav_.data()), header.data_size); fout.close(); return true; } + +private: + float inference_time_ = 0; + std::shared_ptr AM_predictor_ = nullptr; + std::shared_ptr VOC_predictor_ = nullptr; + std::vector wav_; }; diff --git a/demos/TTSArmLinux/src/main.cc b/demos/TTSArmLinux/src/main.cc index 64aeaa8578c..4068cb0b9ab 100644 --- a/demos/TTSArmLinux/src/main.cc +++ b/demos/TTSArmLinux/src/main.cc @@ -50,19 +50,19 @@ int main(int argc, char *argv[]) { } Predictor predictor; - if (!predictor.init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) { + if (!predictor.Init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) { std::cerr << "predictor init failed" << std::endl; return -1; } - if (!predictor.runModel(sentencesToChoose[sentencesIndex])) { + if (!predictor.RunModel(sentencesToChoose[sentencesIndex])) { std::cerr << "predictor run model failed" << std::endl; return -1; } - std::cout << "Inference time: " << predictor.getInferenceTime() << "ms, WAV size: " << predictor.getWav().size() << std::endl; + std::cout << "Inference time: " << predictor.GetInferenceTime() << "ms, WAV size: " << predictor.GetWav().size() << std::endl; - if (!predictor.writeWavToFile(outputWavPath)) { + if (!predictor.WriteWavToFile(outputWavPath)) { std::cerr << "write wav file failed" << std::endl; return -1; } From 9eb03eef7117ad783f901e9049f4a4fa403028b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 04:38:18 +0000 Subject: [PATCH 04/16] demos/TTSArmLinux: fix WAV size display --- demos/TTSArmLinux/src/Predictor.hpp | 6 +++++- demos/TTSArmLinux/src/main.cc | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index 765d859bea7..6d4926fff72 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -150,6 +150,10 @@ class Predictor { return wav_; } + int GetWavSize() { + return wav_.size() * sizeof(float); + } + void ReleaseWav() { wav_.clear(); } @@ -188,7 +192,7 @@ class Predictor { // 写入头信息 WavHeader header; header.size = sizeof(header) - 8; - header.data_size = wav_.size() * sizeof(float); + header.data_size = GetWavSize(); header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8; header.block_align = header.num_channels * header.bits_per_sample / 8; fout.write(reinterpret_cast(&header), sizeof(header)); diff --git a/demos/TTSArmLinux/src/main.cc b/demos/TTSArmLinux/src/main.cc index 4068cb0b9ab..ceb76358fe9 100644 --- a/demos/TTSArmLinux/src/main.cc +++ b/demos/TTSArmLinux/src/main.cc @@ -60,7 +60,8 @@ int main(int argc, char *argv[]) { return -1; } - std::cout << "Inference time: " << predictor.GetInferenceTime() << "ms, WAV size: " << predictor.GetWav().size() << std::endl; + std::cout << "Inference time: " << predictor.GetInferenceTime() << " ms, " + << "WAV size (without header): " << predictor.GetWavSize() << " bytes" << std::endl; if (!predictor.WriteWavToFile(outputWavPath)) { std::cerr << "write wav file failed" << std::endl; From 3920960fbfecf1ab0de5a4040c6a8728df96127b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 04:50:12 +0000 Subject: [PATCH 05/16] demos/TTSArmLinux: Eliminate warnings about datatype mismatches > Paddle-Lite/lite/api/light_api.cc:462 CheckInputValid] Error input tensor precision type. Input index (0) Tensor name (text) Require precision type (int64_t) Input precision type (float). --- demos/TTSArmLinux/src/Predictor.hpp | 4 ++-- demos/TTSArmLinux/src/main.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index 6d4926fff72..d32ed1fae08 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -67,7 +67,7 @@ class Predictor { VOC_predictor_ = nullptr; } - bool RunModel(const std::vector &phones) { + bool RunModel(const std::vector &phones) { if (!IsLoaded()) { return false; } @@ -88,7 +88,7 @@ class Predictor { return true; } - std::unique_ptr GetAMOutput(const std::vector &phones) { + std::unique_ptr GetAMOutput(const std::vector &phones) { auto phones_handle = AM_predictor_->GetInput(0); phones_handle->Resize({static_cast(phones.size())}); phones_handle->CopyFromCpu(phones.data()); diff --git a/demos/TTSArmLinux/src/main.cc b/demos/TTSArmLinux/src/main.cc index ceb76358fe9..0bf78a7de4c 100644 --- a/demos/TTSArmLinux/src/main.cc +++ b/demos/TTSArmLinux/src/main.cc @@ -6,7 +6,7 @@ using namespace paddle::lite_api; -std::vector> sentencesToChoose = { +std::vector> sentencesToChoose = { // 009901 昨日,这名“伤者”与医生全部被警方依法刑事拘留。 {261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141}, // 009902 钱伟长想到上海来办学校是经过深思熟虑的。 @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { std::cerr << "predictor init failed" << std::endl; return -1; } - + if (!predictor.RunModel(sentencesToChoose[sentencesIndex])) { std::cerr << "predictor run model failed" << std::endl; return -1; From 93819cdd488ab1a0c237ecf86c270e0a0f14aa97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 05:49:13 +0000 Subject: [PATCH 06/16] demos/TTSArmLinux: download.sh add armv7hf library; add MD5 verify --- demos/TTSArmLinux/download.sh | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh index 6114dd7a61e..e311590491b 100755 --- a/demos/TTSArmLinux/download.sh +++ b/demos/TTSArmLinux/download.sh @@ -9,19 +9,46 @@ mkdir -p ./libs ./models download() { file="$1" url="$2" - dir="$3" + md5="$3" + dir="$4" cd "$dir" - echo "Downloading $file..." - wget -O "$file" "$url" + + if [ -f "$file" ] && [ "$(md5sum "$file" | awk '{ print $1 }')" = "$md5" ]; then + echo "File $file (MD5: $md5) has been downloaded." + else + echo "Downloading $file..." + wget -O "$file" "$url" + + # MD5 verify + fileMd5="$(md5sum "$file" | awk '{ print $1 }')" + if [ "$fileMd5" == "$md5" ]; then + echo "File $file (MD5: $md5) has been downloaded." + else + echo "MD5 mismatch, file may be corrupt" + echo "$file MD5: $fileMd5, it should be $md5" + fi + fi + echo "Extracting $file..." + echo '-----------------------' tar -vxf "$file" + echo '=======================' } download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + '39e0c6604f97c70f5d13c573d7e709b9' \ + "$basedir/libs" + +download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ + 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ + 'f5ceb509f0b610dafb8379889c5f36f8' \ "$basedir/libs" download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ + '93ef17d44b498aff3bea93e2c5c09a1e' \ "$basedir/models" + +echo "Done." From 33b338943fd32463219c730be5c5395969e54443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 07:49:28 +0000 Subject: [PATCH 07/16] demos/TTSArmLinux: Update the download links The prebuilt libraries has been placed to a BCE BOS server. --- demos/TTSArmLinux/README.md | 4 ++-- demos/TTSArmLinux/download.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md index fe6e7e381d2..972b5675a95 100644 --- a/demos/TTSArmLinux/README.md +++ b/demos/TTSArmLinux/README.md @@ -2,8 +2,8 @@ 修改自[demos/TTSAndroid](../TTSAndroid),模型也来自该安卓Demo。 -使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)), -该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 +使用与安卓Demo版本相同的Paddle-Lite推理库:[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd35) +预编译的二进制兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 注意 [Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh index e311590491b..e362bb60196 100755 --- a/demos/TTSArmLinux/download.sh +++ b/demos/TTSArmLinux/download.sh @@ -37,12 +37,12 @@ download() { } download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ - 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + 'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ '39e0c6604f97c70f5d13c573d7e709b9' \ "$basedir/libs" download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ - 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ + 'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ 'f5ceb509f0b610dafb8379889c5f36f8' \ "$basedir/libs" From 18375aa19f2eaf3c091b95c4c4f64aa6884c7364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 07:58:03 +0000 Subject: [PATCH 08/16] demos/TTSArmLinux: all scripts use the paths defined in config.sh --- demos/TTSArmLinux/config.sh | 3 +-- demos/TTSArmLinux/download.sh | 12 +++++++----- demos/TTSArmLinux/run.sh | 8 ++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/demos/TTSArmLinux/config.sh b/demos/TTSArmLinux/config.sh index 9b895aa7543..8053414d51b 100644 --- a/demos/TTSArmLinux/config.sh +++ b/demos/TTSArmLinux/config.sh @@ -5,10 +5,9 @@ ARM_ABI=armv8 MODELS_DIR="${PWD}/models" LIBS_DIR="${PWD}/libs" +OUTPUT_DIR="${PWD}/output" -PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz" PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx" -MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz" AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh index e362bb60196..560374bc94d 100755 --- a/demos/TTSArmLinux/download.sh +++ b/demos/TTSArmLinux/download.sh @@ -2,9 +2,11 @@ set -e cd "$(dirname "$(realpath "$0")")" -basedir="$PWD" -mkdir -p ./libs ./models +# load configure +. ./config.sh + +mkdir -p "$LIBS_DIR" "$MODELS_DIR" download() { file="$1" @@ -39,16 +41,16 @@ download() { download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ 'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ '39e0c6604f97c70f5d13c573d7e709b9' \ - "$basedir/libs" + "$LIBS_DIR" download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ 'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \ 'f5ceb509f0b610dafb8379889c5f36f8' \ - "$basedir/libs" + "$LIBS_DIR" download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ '93ef17d44b498aff3bea93e2c5c09a1e' \ - "$basedir/models" + "$MODELS_DIR" echo "Done." diff --git a/demos/TTSArmLinux/run.sh b/demos/TTSArmLinux/run.sh index 69a9a1b22ec..efcb61b5b96 100755 --- a/demos/TTSArmLinux/run.sh +++ b/demos/TTSArmLinux/run.sh @@ -7,12 +7,12 @@ cd "$(dirname "$(realpath "$0")")" . ./config.sh # create dir -rm -rf ./output -mkdir -p ./output +rm -rf "$OUTPUT_DIR" +mkdir -p "$OUTPUT_DIR" # run for i in {1..10}; do - (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav) + (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i "$OUTPUT_DIR/$i.wav") done -ls -lh "$PWD"/output/*.wav +ls -lh "$OUTPUT_DIR"/*.wav From 5e0126ec22f7c254ae10e8ef097e11dd8f7ab0e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 09:51:44 +0000 Subject: [PATCH 09/16] demos/TTSArmLinux: Remove unnecessary opencv dependencies Demo does not use OpenCV functions. Even if the Paddle-Lite library enables OpenCV, it does not need to be installed when building the demo. --- demos/TTSArmLinux/src/CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/demos/TTSArmLinux/src/CMakeLists.txt b/demos/TTSArmLinux/src/CMakeLists.txt index b15d899346b..0fa27444f5c 100644 --- a/demos/TTSArmLinux/src/CMakeLists.txt +++ b/demos/TTSArmLinux/src/CMakeLists.txt @@ -42,17 +42,6 @@ else() message(FATAL_ERROR "Could not found OpenMP!") return() endif() -find_package(OpenCV REQUIRED) -if(OpenCV_FOUND OR OpenCV_CXX_FOUND) - include_directories(${OpenCV_INCLUDE_DIRS}) - message(STATUS "OpenCV library status:") - message(STATUS " version: ${OpenCV_VERSION}") - message(STATUS " libraries: ${OpenCV_LIBS}") - message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") -else() - message(FATAL_ERROR "Could not found OpenCV!") - return() -endif() add_executable(paddlespeech_tts_demo main.cc) target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared) From 8b32f45146dc42880c14495431334396b543d101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 10:23:37 +0000 Subject: [PATCH 10/16] demos/TTSArmLinux: add a clean script --- demos/TTSArmLinux/clean.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 demos/TTSArmLinux/clean.sh diff --git a/demos/TTSArmLinux/clean.sh b/demos/TTSArmLinux/clean.sh new file mode 100755 index 00000000000..1ea36556674 --- /dev/null +++ b/demos/TTSArmLinux/clean.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" + +# load configure +. ./config.sh + +# remove dirs +set -x + +rm -rf "$OUTPUT_DIR" +rm -rf "$LIBS_DIR" +rm -rf "$MODELS_DIR" From 035d2c07134cc7e732cb42e3aa4f3d0fade51134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 10:26:46 +0000 Subject: [PATCH 11/16] demos/TTSArmLinux: update building docs add steps for building paddle-lite library. --- demos/TTSArmLinux/README.md | 63 ++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md index 972b5675a95..9699d65ddc4 100644 --- a/demos/TTSArmLinux/README.md +++ b/demos/TTSArmLinux/README.md @@ -2,37 +2,43 @@ 修改自[demos/TTSAndroid](../TTSAndroid),模型也来自该安卓Demo。 -使用与安卓Demo版本相同的Paddle-Lite推理库:[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd35) -预编译的二进制兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 - -注意 [Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 - -### 配置 +### 配置编译选项 打开 [config.sh](config.sh) 按需修改配置。 默认编译64位版本,如果要编译32位版本,把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。 -### 下载Paddle Lite库文件和模型文件 +### 安装依赖 ``` -./download.sh +# Ubuntu +sudo apt install build-essential cmake wget tar unzip + +# CentOS +sudo yum groupinstall "Development Tools" +sudo yum install cmake wget tar unzip ``` -### 安装依赖 +### 下载Paddle Lite库文件和模型文件 -以 Ubuntu 18.04 为例: +预编译的二进制使用与安卓Demo版本相同的Paddle-Lite推理库([Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd35))和模型([fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz))。 + +可用以下命令下载: ``` -sudo apt install build-essential cmake libopencv-dev +git clone https://github.com/PaddlePaddle/PaddleSpeech.git +cd PaddleSpeech/demos/TTSArmLinux +./download.sh ``` -### 编译 +### 编译Demo ``` ./build.sh ``` +如果编译或链接失败,请尝试手动编译Paddle Lite库,具体步骤在最下面。 + ### 运行 ``` @@ -40,3 +46,36 @@ sudo apt install build-essential cmake libopencv-dev ``` 将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。 + +----- + +### 手动编译Paddle Lite库 + +预编译的二进制兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 + +注意 [Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 + +#### 安装Paddle Lite的编译依赖 + +``` +# Ubuntu +sudo apt install build-essential cmake git python + +# CentOS +sudo yum groupinstall "Development Tools" +sudo yum install cmake git python +``` + +#### 编译Paddle Lite + +``` +git clone -b develop https://github.com/PaddlePaddle/Paddle-Lite.git +cd Paddle-Lite +./lite/tools/build_linux.sh --with_extra=ON +``` + +编译完成后,打开Demo的 [config.sh](config.sh),把 `PADDLE_LITE_DIR` 改成以下值即可(注意替换`/path/to/`为实际目录): + +``` +PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx" +``` From a7b309b50e90670b75b70991408d3ea4a0f5d33e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 10:45:58 +0000 Subject: [PATCH 12/16] demos/TTSArmLinux: fix missing aarch64-linux-gnu-g++ in CentOS 7 In CentOS 7, its aarch64-redhat-linux-g++. It might be a good idea not to give a compiler name and let cmake find it by itself. --- demos/TTSArmLinux/src/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/demos/TTSArmLinux/src/CMakeLists.txt b/demos/TTSArmLinux/src/CMakeLists.txt index 0fa27444f5c..e1076af92bb 100644 --- a/demos/TTSArmLinux/src/CMakeLists.txt +++ b/demos/TTSArmLinux/src/CMakeLists.txt @@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.10) set(CMAKE_SYSTEM_NAME Linux) if(ARM_ABI STREQUAL "armv8") set(CMAKE_SYSTEM_PROCESSOR aarch64) - set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") - set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") + #set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") + #set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") elseif(ARM_ABI STREQUAL "armv7hf") set(CMAKE_SYSTEM_PROCESSOR arm) - set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") - set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") + #set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") + #set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") else() message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.") return() From 831da38aff0427638005d867233ab491b47f9ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 10:51:26 +0000 Subject: [PATCH 13/16] demos/TTSArmLinux: minor modification for docs --- demos/TTSArmLinux/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md index 9699d65ddc4..7b93bab6ea5 100644 --- a/demos/TTSArmLinux/README.md +++ b/demos/TTSArmLinux/README.md @@ -37,7 +37,9 @@ cd PaddleSpeech/demos/TTSArmLinux ./build.sh ``` -如果编译或链接失败,请尝试手动编译Paddle Lite库,具体步骤在最下面。 +预编译的二进制兼容 Ubuntu 16.04 到 20.04。 + +如果编译或链接失败,说明发行版与预编译库不兼容,请尝试手动编译Paddle Lite库,具体步骤在最下面。 ### 运行 From 2b06fde6e458278d99e7e1dd5e60273433da5d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Mon, 6 Mar 2023 11:23:34 +0000 Subject: [PATCH 14/16] demos/TTSArmLinux: clarify compatibility with Paddle-Lite versions --- demos/TTSArmLinux/README.md | 15 ++++++++++----- demos/TTSArmLinux/config.sh | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md index 7b93bab6ea5..100622619aa 100644 --- a/demos/TTSArmLinux/README.md +++ b/demos/TTSArmLinux/README.md @@ -21,7 +21,7 @@ sudo yum install cmake wget tar unzip ### 下载Paddle Lite库文件和模型文件 -预编译的二进制使用与安卓Demo版本相同的Paddle-Lite推理库([Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd35))和模型([fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz))。 +预编译的二进制使用与安卓Demo版本相同的Paddle-Lite推理库([Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449))和模型([fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz))。 可用以下命令下载: @@ -55,7 +55,11 @@ cd PaddleSpeech/demos/TTSArmLinux 预编译的二进制兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 -注意 [Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 +注意,我们只能保证 [Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449) 与通过`download.sh`下载的模型兼容。 +如果使用其他版本的Paddle Lite库,可能需要用对应版本的opt工具重新导出模型。 + +此外,[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容,无法导出或运行TTS模型,需要使用更新的版本(比如`develop`分支中的代码)。 +但`develop`分支中的代码可能与通过`download.sh`下载的模型不兼容,Demo运行起来可能会崩溃。 #### 安装Paddle Lite的编译依赖 @@ -68,15 +72,16 @@ sudo yum groupinstall "Development Tools" sudo yum install cmake git python ``` -#### 编译Paddle Lite +#### 编译Paddle Lite 68b66fd35 ``` -git clone -b develop https://github.com/PaddlePaddle/Paddle-Lite.git +git clone https://github.com/PaddlePaddle/Paddle-Lite.git cd Paddle-Lite +git checkout 68b66fd356c875c92167d311ad458e6093078449 ./lite/tools/build_linux.sh --with_extra=ON ``` -编译完成后,打开Demo的 [config.sh](config.sh),把 `PADDLE_LITE_DIR` 改成以下值即可(注意替换`/path/to/`为实际目录): +编译完成后,打开Demo的[config.sh](config.sh),把 `PADDLE_LITE_DIR` 改成以下值即可(注意替换`/path/to/`为实际目录): ``` PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx" diff --git a/demos/TTSArmLinux/config.sh b/demos/TTSArmLinux/config.sh index 8053414d51b..0a04f18eeda 100644 --- a/demos/TTSArmLinux/config.sh +++ b/demos/TTSArmLinux/config.sh @@ -8,6 +8,7 @@ LIBS_DIR="${PWD}/libs" OUTPUT_DIR="${PWD}/output" PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx" +#PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx" AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" From ac47ceeeb1d8321b327f69982b433111175df857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Tue, 7 Mar 2023 02:02:41 +0000 Subject: [PATCH 15/16] demos/TTSArmLinux: save wav in 16-bit PCM format to reduce compatibility issues --- demos/TTSArmLinux/src/Predictor.hpp | 40 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index d32ed1fae08..268d98c1074 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -9,6 +9,8 @@ using namespace paddle::lite_api; +typedef int16_t WavDataType; + class Predictor { public: bool Init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { @@ -104,8 +106,6 @@ class Predictor { } std::cout << std::endl; - // 获取输出Tensor的数据 - auto am_output_data = am_output_handle->mutable_data(); return am_output_handle; } @@ -133,9 +133,29 @@ class Predictor { for (auto dim : voc_output_handle->shape()) { output_size *= dim; } - wav_.resize(output_size); auto output_data = voc_output_handle->mutable_data(); - std::copy_n(output_data, output_size, wav_.data()); + + SaveFloatWav(output_data, output_size); + } + + inline float Abs(float number) { + return (number < 0) ? -number : number; + } + + void SaveFloatWav(float *floatWav, int64_t size) { + wav_.resize(size); + float maxSample = 0.01; + // 寻找最大采样值 + for (int64_t i=0; i maxSample) { + maxSample = sample; + } + } + // 把采样值缩放到 int_16 范围 + for (int64_t i=0; i & GetWav() { + const std::vector & GetWav() { return wav_; } int GetWavSize() { - return wav_.size() * sizeof(float); + return wav_.size() * sizeof(WavDataType); } void ReleaseWav() { @@ -167,7 +187,7 @@ class Predictor { // FMT 头 char fmt[4] = {'f', 'm', 't', ' '}; uint32_t fmt_size = 16; - uint16_t audio_format = 3; + uint16_t audio_format = 1; // 1为整数编码,3为浮点编码 uint16_t num_channels = 1; // 如果播放速度和音调异常,请修改采样率 @@ -175,8 +195,8 @@ class Predictor { uint32_t sample_rate = 24000; uint32_t byte_rate = 64000; - uint16_t block_align = 4; - uint16_t bits_per_sample = 32; + uint16_t block_align = 2; + uint16_t bits_per_sample = sizeof(WavDataType) * 8; // DATA 头 char data[4] = {'d', 'a', 't', 'a'}; @@ -208,5 +228,5 @@ class Predictor { float inference_time_ = 0; std::shared_ptr AM_predictor_ = nullptr; std::shared_ptr VOC_predictor_ = nullptr; - std::vector wav_; + std::vector wav_; }; From 85147e2e65a8294a9607c363ff895ab2f3901743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Tue, 7 Mar 2023 03:09:33 +0000 Subject: [PATCH 16/16] demos/TTSArmLinux: Fix wav length field value error --- demos/TTSArmLinux/src/Predictor.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index 268d98c1074..221d51fc193 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -211,8 +211,8 @@ class Predictor { // 写入头信息 WavHeader header; - header.size = sizeof(header) - 8; header.data_size = GetWavSize(); + header.size = sizeof(header) - 8 + header.data_size; header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8; header.block_align = header.num_channels * header.bits_per_sample / 8; fout.write(reinterpret_cast(&header), sizeof(header));