Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TTS][Paddle-Lite]add a TTS demo for ARM Linux #2991

Merged
merged 16 commits into from Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions demos/TTSArmLinux/.gitignore
@@ -0,0 +1,4 @@
build/
output/
libs/
models/
43 changes: 43 additions & 0 deletions demos/TTSArmLinux/README.md
@@ -0,0 +1,43 @@
# PaddleSpeech TTS 文本到语音 ARM Linux Demo

修改自[demos/TTSAndroid](../TTSAndroid),模型也来自该安卓Demo。

使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)),
该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。

该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容,运行会崩溃,需要使用更新的版本。
不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型,应该可以兼容。

### 配置

打开 [config.sh](config.sh) 按需修改配置。

默认编译64位版本,如果要编译32位版本,把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。

### 下载Paddle Lite库文件和模型文件

```
./download.sh
```

### 安装依赖

以 Ubuntu 18.04 为例:

```
sudo apt install build-essential cmake libopencv-dev
```

### 编译

```
./build.sh
```

### 运行

```
./run.sh
```

将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。
20 changes: 20 additions & 0 deletions demos/TTSArmLinux/build.sh
@@ -0,0 +1,20 @@
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"

# load configure
. ./config.sh

# build
echo "ARM_ABI is ${ARM_ABI}"
echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}"

rm -rf build
mkdir -p build
cd build

cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src
make

echo "make successful!"
14 changes: 14 additions & 0 deletions demos/TTSArmLinux/config.sh
@@ -0,0 +1,14 @@
# configuration

ARM_ABI=armv8
#ARM_ABI=armv7hf

MODELS_DIR="${PWD}/models"
LIBS_DIR="${PWD}/libs"

PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz"
PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"

MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz"
AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"
27 changes: 27 additions & 0 deletions demos/TTSArmLinux/download.sh
@@ -0,0 +1,27 @@
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"
basedir="$PWD"

mkdir -p ./libs ./models

download() {
file="$1"
url="$2"
dir="$3"

cd "$dir"
echo "Downloading $file..."
wget -O "$file" "$url"
echo "Extracting $file..."
tar -vxf "$file"
}

download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
yt605155624 marked this conversation as resolved.
Show resolved Hide resolved
'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
"$basedir/libs"

download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
"$basedir/models"
18 changes: 18 additions & 0 deletions demos/TTSArmLinux/run.sh
@@ -0,0 +1,18 @@
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"

# load configure
. ./config.sh

# create dir
rm -rf ./output
mkdir -p ./output

# run
for i in {1..10}; do
(set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav)
done

ls -lh "$PWD"/output/*.wav
58 changes: 58 additions & 0 deletions demos/TTSArmLinux/src/CMakeLists.txt
@@ -0,0 +1,58 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_SYSTEM_NAME Linux)
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
else()
message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.")
return()
endif()

project(paddlespeech_tts_demo)
message(STATUS "TARGET ARCH ABI: ${ARM_ABI}")
message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")

include_directories(${PADDLE_LITE_DIR}/include)
link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI})
link_directories(${PADDLE_LITE_DIR}/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
endif()

find_package(OpenMP REQUIRED)
if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}")
message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}")
message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}")
else()
message(FATAL_ERROR "Could not found OpenMP!")
return()
endif()
find_package(OpenCV REQUIRED)
if(OpenCV_FOUND OR OpenCV_CXX_FOUND)
include_directories(${OpenCV_INCLUDE_DIRS})
message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "Could not found OpenCV!")
return()
endif()

add_executable(paddlespeech_tts_demo main.cc)
target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared)
208 changes: 208 additions & 0 deletions demos/TTSArmLinux/src/Predictor.hpp
@@ -0,0 +1,208 @@
#include <algorithm>
#include <chrono>
#include <iostream>
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h"

using namespace paddle::lite_api;

class Predictor {
private:
float inferenceTime = 0;
std::shared_ptr<PaddlePredictor> AMPredictor = nullptr;
std::shared_ptr<PaddlePredictor> VOCPredictor = nullptr;
std::vector<float> wav;
yt605155624 marked this conversation as resolved.
Show resolved Hide resolved

public:
bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
yt605155624 marked this conversation as resolved.
Show resolved Hide resolved
// Release model if exists
releaseModel();

AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode);
if (AMPredictor == nullptr) {
return false;
}
VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode);
if (VOCPredictor == nullptr) {
return false;
}

return true;
}

~Predictor() {
releaseModel();
releaseWav();
}

std::shared_ptr<PaddlePredictor> loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
if (modelPath.empty()) {
return nullptr;
}

// 设置MobileConfig
MobileConfig config;
config.set_model_from_file(modelPath);
config.set_threads(cpuThreadNum);

if (cpuPowerMode == "LITE_POWER_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_HIGH);
} else if (cpuPowerMode == "LITE_POWER_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_LOW);
} else if (cpuPowerMode == "LITE_POWER_FULL") {
config.set_power_mode(PowerMode::LITE_POWER_FULL);
} else if (cpuPowerMode == "LITE_POWER_NO_BIND") {
config.set_power_mode(PowerMode::LITE_POWER_NO_BIND);
} else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH);
} else if (cpuPowerMode == "LITE_POWER_RAND_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW);
} else {
std::cerr << "Unknown cpu power mode!" << std::endl;
return nullptr;
}

return CreatePaddlePredictor<MobileConfig>(config);
}

void releaseModel() {
AMPredictor = nullptr;
VOCPredictor = nullptr;
}

bool runModel(const std::vector<float> &phones) {
if (!isLoaded()) {
return false;
}

// 计时开始
auto start = std::chrono::system_clock::now();

// 执行推理
VOCOutputToWav(getAMOutput(phones));

// 计时结束
auto end = std::chrono::system_clock::now();

// 计算用时
std::chrono::duration<float> duration = end - start;
inferenceTime = duration.count() * 1000; // 单位:毫秒

return true;
}

std::unique_ptr<const Tensor> getAMOutput(const std::vector<float> &phones) {
auto phones_handle = AMPredictor->GetInput(0);
phones_handle->Resize({static_cast<int64_t>(phones.size())});
phones_handle->CopyFromCpu(phones.data());
AMPredictor->Run();

// 获取输出Tensor
auto am_output_handle = AMPredictor->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "AM Output shape: ";
auto shape = am_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;

// 获取输出Tensor的数据
auto am_output_data = am_output_handle->mutable_data<float>();
return am_output_handle;
}

void VOCOutputToWav(std::unique_ptr<const Tensor> &&input) {
auto mel_handle = VOCPredictor->GetInput(0);
// [?, 80]
auto dims = input->shape();
mel_handle->Resize(dims);
auto am_output_data = input->mutable_data<float>();
mel_handle->CopyFromCpu(am_output_data);
VOCPredictor->Run();

// 获取输出Tensor
auto voc_output_handle = VOCPredictor->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "VOC Output shape: ";
auto shape = voc_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;

// 获取输出Tensor的数据
int64_t output_size = 1;
for (auto dim : voc_output_handle->shape()) {
output_size *= dim;
}
wav.resize(output_size);
auto output_data = voc_output_handle->mutable_data<float>();
std::copy_n(output_data, output_size, wav.data());
}

bool isLoaded() {
return AMPredictor != nullptr && VOCPredictor != nullptr;
}

float getInferenceTime() {
return inferenceTime;
}

const std::vector<float> & getWav() {
return wav;
}

void releaseWav() {
wav.clear();
}

struct WavHeader {
// RIFF 头
char riff[4] = {'R', 'I', 'F', 'F'};
uint32_t size = 0;
char wave[4] = {'W', 'A', 'V', 'E'};

// FMT 头
char fmt[4] = {'f', 'm', 't', ' '};
uint32_t fmt_size = 16;
uint16_t audio_format = 3;
uint16_t num_channels = 1;

// 如果播放速度和音调异常,请修改采样率
// 常见采样率:16000, 24000, 32000, 44100, 48000, 96000
uint32_t sample_rate = 24000;

uint32_t byte_rate = 64000;
uint16_t block_align = 4;
uint16_t bits_per_sample = 32;

// DATA 头
char data[4] = {'d', 'a', 't', 'a'};
uint32_t data_size = 0;
};

bool writeWavToFile(const std::string &wavPath) {
std::ofstream fout(wavPath, std::ios::binary);
if (!fout.is_open()) {
return false;
}

// 写入头信息
WavHeader header;
header.size = sizeof(header) - 8;
header.data_size = wav.size() * sizeof(float);
header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
header.block_align = header.num_channels * header.bits_per_sample / 8;
fout.write(reinterpret_cast<const char*>(&header), sizeof(header));

// 写入wav数据
fout.write(reinterpret_cast<const char*>(wav.data()), header.data_size);

fout.close();
return true;
}
};