From 4c92af5717dc16bbd509c700aea9a03a082a0be5 Mon Sep 17 00:00:00 2001 From: lidongdong Date: Mon, 18 Jul 2022 19:47:12 +0800 Subject: [PATCH 1/8] feat:add windows ctc-decoders compile func --- third_party/ctc_decoders/scorer.cpp | 3 ++- third_party/ctc_decoders/setup.py | 15 ++++++++------- third_party/install.bat | 24 ++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 third_party/install.bat diff --git a/third_party/ctc_decoders/scorer.cpp b/third_party/ctc_decoders/scorer.cpp index 6c1d96be36c..6e7f68cf6ba 100644 --- a/third_party/ctc_decoders/scorer.cpp +++ b/third_party/ctc_decoders/scorer.cpp @@ -13,7 +13,8 @@ #include "decoder_utils.h" using namespace lm::ngram; - +// if your platform is windows ,you need add the define +#define F_OK 0 Scorer::Scorer(double alpha, double beta, const std::string& lm_path, diff --git a/third_party/ctc_decoders/setup.py b/third_party/ctc_decoders/setup.py index ce2787e3fa5..ee81c8fd27a 100644 --- a/third_party/ctc_decoders/setup.py +++ b/third_party/ctc_decoders/setup.py @@ -89,10 +89,11 @@ def compile_test(header, library): or fn.endswith('unittest.cc')) ] # yapf: enable - -LIBS = ['stdc++'] +# LIBS = ['stdc++'] +LIBS = ['-static-libstdc++'] if platform.system() != 'Darwin': - LIBS.append('rt') + # LIBS.append('rt') + pass ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11'] @@ -126,12 +127,12 @@ def compile_test(header, library): ] setup( - name='paddlespeech_ctcdecoders', + name='paddlespeech_ldd_ctcdecoders', version='0.2.0', description="CTC decoders in paddlespeech", - author="PaddlePaddle Speech and Language Team", - author_email="paddlesl@baidu.com", + author="PaddlePaddle Speech user lidongdong compile for windows", + author_email="double_dongli@foxmail.com", url="https://github.com/PaddlePaddle/PaddleSpeech", license='Apache 2.0, GNU Lesser General Public License v3 (LGPLv3) (LGPL-3)', ext_modules=decoders_module, - py_modules=['paddlespeech_ctcdecoders']) + py_modules=['paddlespeech_ldd_ctcdecoders']) diff --git a/third_party/install.bat b/third_party/install.bat new file mode 100644 index 00000000000..8669e44476b --- /dev/null +++ b/third_party/install.bat @@ -0,0 +1,24 @@ +@echo off + +cd ctc_decoders +if not exist kenlm ( + git clone https://github.com/Doubledongli/kenlm.git + cd kenlm/ + git checkout df2d717e95183f79a90b2fa6e4307083a351ca6a + cd .. + @echo. +) + +if not exist openfst-1.6.3 ( + echo "Download and extract openfst ..." + git clone https://gitee.com/koala999/openfst.git + ren openfst openfst-1.6.3 + @echo. +) + +if not exist ThreadPool ( + git clone https://github.com/progschj/ThreadPool.git + @echo. +) +echo "Install decoders ..." +python setup.py install --num_processes 4 \ No newline at end of file From 06a44f7460df77a4c4a2831f723d56df6722c5be Mon Sep 17 00:00:00 2001 From: lidongdong Date: Mon, 18 Jul 2022 20:05:08 +0800 Subject: [PATCH 2/8] fix:change bat --- third_party/install.bat | 3 --- 1 file changed, 3 deletions(-) diff --git a/third_party/install.bat b/third_party/install.bat index 8669e44476b..0bf1e7bb127 100644 --- a/third_party/install.bat +++ b/third_party/install.bat @@ -3,9 +3,6 @@ cd ctc_decoders if not exist kenlm ( git clone https://github.com/Doubledongli/kenlm.git - cd kenlm/ - git checkout df2d717e95183f79a90b2fa6e4307083a351ca6a - cd .. @echo. ) From d21c1f2b104e337bc10c7973ad60d9934faa1ad0 Mon Sep 17 00:00:00 2001 From: lidongdong Date: Mon, 18 Jul 2022 20:09:42 +0800 Subject: [PATCH 3/8] fix:change setup --- third_party/ctc_decoders/setup.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/third_party/ctc_decoders/setup.py b/third_party/ctc_decoders/setup.py index ee81c8fd27a..f1eb42467a2 100644 --- a/third_party/ctc_decoders/setup.py +++ b/third_party/ctc_decoders/setup.py @@ -89,11 +89,11 @@ def compile_test(header, library): or fn.endswith('unittest.cc')) ] # yapf: enable -# LIBS = ['stdc++'] -LIBS = ['-static-libstdc++'] +LIBS = ['stdc++'] if platform.system() != 'Darwin': - # LIBS.append('rt') - pass + LIBS.append('rt') +if platform.system() != 'Windows': + LIBS = ['-static-libstdc++'] ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11'] @@ -127,12 +127,12 @@ def compile_test(header, library): ] setup( - name='paddlespeech_ldd_ctcdecoders', + name='paddlespeech_ctcdecoders', version='0.2.0', description="CTC decoders in paddlespeech", - author="PaddlePaddle Speech user lidongdong compile for windows", - author_email="double_dongli@foxmail.com", + author="PaddlePaddle Speech and Language Team", + author_email="paddlesl@baidu.com", url="https://github.com/PaddlePaddle/PaddleSpeech", license='Apache 2.0, GNU Lesser General Public License v3 (LGPLv3) (LGPL-3)', ext_modules=decoders_module, - py_modules=['paddlespeech_ldd_ctcdecoders']) + py_modules=['paddlespeech_ctcdecoders']) From c7294fae362bbcf3305e76411e261c60d9942bd4 Mon Sep 17 00:00:00 2001 From: lidongdong Date: Mon, 18 Jul 2022 20:10:22 +0800 Subject: [PATCH 4/8] fix:change setup --- third_party/ctc_decoders/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ctc_decoders/setup.py b/third_party/ctc_decoders/setup.py index f1eb42467a2..9a8b292a07b 100644 --- a/third_party/ctc_decoders/setup.py +++ b/third_party/ctc_decoders/setup.py @@ -92,7 +92,7 @@ def compile_test(header, library): LIBS = ['stdc++'] if platform.system() != 'Darwin': LIBS.append('rt') -if platform.system() != 'Windows': +if platform.system() == 'Windows': LIBS = ['-static-libstdc++'] ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11'] From 0b9ac9ba73e2b371299c2b08d3761698737008b4 Mon Sep 17 00:00:00 2001 From: Doubledongli Date: Tue, 19 Jul 2022 12:31:50 +0800 Subject: [PATCH 5/8] fix:change bat script and readme --- third_party/README.md | 34 +++++++++++++-------------------- third_party/install_win_ctc.bat | 21 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 21 deletions(-) create mode 100644 third_party/install_win_ctc.bat diff --git a/third_party/README.md b/third_party/README.md index 843d0d3b2e6..d2c8c415fd1 100644 --- a/third_party/README.md +++ b/third_party/README.md @@ -1,27 +1,19 @@ -* [python_kaldi_features](https://github.com/ZitengWang/python_kaldi_features) -commit: fc1bd6240c2008412ab64dc25045cd872f5e126c -ref: https://zhuanlan.zhihu.com/p/55371926 -license: MIT +# install ctc_decoder for windows -* [python-pinyin](https://github.com/mozillazg/python-pinyin.git) -commit: 55e524aa1b7b8eec3d15c5306043c6cdd5938b03 -license: MIT +This is bat script to install paddlespeech_ctc_decoders for windows -* [zhon](https://github.com/tsroten/zhon) -commit: 09bf543696277f71de502506984661a60d24494c -license: MIT +## Prepare your environment -* [pymmseg-cpp](https://github.com/pluskid/pymmseg-cpp.git) -commit: b76465045717fbb4f118c4fbdd24ce93bab10a6d -license: MIT +insure your environment like this: -* [chinese_text_normalization](https://github.com/speechio/chinese_text_normalization.git) -commit: 9e92c7bf2d6b5a7974305406d8e240045beac51c -license: MIT +* gcc: version >= 12.1.0 +* cmake: version >= 3.24.0 +* make: version >= 3.82.90 +* visual studio: version >= 2019 -* [phkit](https://github.com/KuangDD/phkit.git) -commit: b2100293c1e36da531d7f30bd52c9b955a649522 -license: None +## Start your bat script -* [nnAudio](https://github.com/KinWaiCheuk/nnAudio.git) -license: MIT +```shell +start install_win_ctc.bat + +``` diff --git a/third_party/install_win_ctc.bat b/third_party/install_win_ctc.bat new file mode 100644 index 00000000000..0bf1e7bb127 --- /dev/null +++ b/third_party/install_win_ctc.bat @@ -0,0 +1,21 @@ +@echo off + +cd ctc_decoders +if not exist kenlm ( + git clone https://github.com/Doubledongli/kenlm.git + @echo. +) + +if not exist openfst-1.6.3 ( + echo "Download and extract openfst ..." + git clone https://gitee.com/koala999/openfst.git + ren openfst openfst-1.6.3 + @echo. +) + +if not exist ThreadPool ( + git clone https://github.com/progschj/ThreadPool.git + @echo. +) +echo "Install decoders ..." +python setup.py install --num_processes 4 \ No newline at end of file From 25fc89edf677bd44ef1350ca040878d3f156fca9 Mon Sep 17 00:00:00 2001 From: Doubledongli Date: Tue, 19 Jul 2022 12:34:33 +0800 Subject: [PATCH 6/8] delete old bat --- third_party/install.bat | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 third_party/install.bat diff --git a/third_party/install.bat b/third_party/install.bat deleted file mode 100644 index 0bf1e7bb127..00000000000 --- a/third_party/install.bat +++ /dev/null @@ -1,21 +0,0 @@ -@echo off - -cd ctc_decoders -if not exist kenlm ( - git clone https://github.com/Doubledongli/kenlm.git - @echo. -) - -if not exist openfst-1.6.3 ( - echo "Download and extract openfst ..." - git clone https://gitee.com/koala999/openfst.git - ren openfst openfst-1.6.3 - @echo. -) - -if not exist ThreadPool ( - git clone https://github.com/progschj/ThreadPool.git - @echo. -) -echo "Install decoders ..." -python setup.py install --num_processes 4 \ No newline at end of file From 7b284fd9a992705d18759fa130627918a308605f Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 19 Jul 2022 13:16:11 +0800 Subject: [PATCH 7/8] Update README.md --- third_party/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/third_party/README.md b/third_party/README.md index d2c8c415fd1..1f3c45ec124 100644 --- a/third_party/README.md +++ b/third_party/README.md @@ -1,6 +1,13 @@ +# python_kaldi_features + +[python_kaldi_features](https://github.com/ZitengWang/python_kaldi_features) +commit: fc1bd6240c2008412ab64dc25045cd872f5e126c +ref: https://zhuanlan.zhihu.com/p/55371926 +license: MIT + # install ctc_decoder for windows -This is bat script to install paddlespeech_ctc_decoders for windows +`install_win_ctc.bat` is bat script to install paddlespeech_ctc_decoders for windows ## Prepare your environment From 4bf07009d869a6c14cafbb553c28292384d395ae Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 19 Jul 2022 13:17:10 +0800 Subject: [PATCH 8/8] Update README.md --- third_party/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/README.md b/third_party/README.md index 1f3c45ec124..98e03b0a346 100644 --- a/third_party/README.md +++ b/third_party/README.md @@ -5,7 +5,7 @@ commit: fc1bd6240c2008412ab64dc25045cd872f5e126c ref: https://zhuanlan.zhihu.com/p/55371926 license: MIT -# install ctc_decoder for windows +# Install ctc_decoder for Windows `install_win_ctc.bat` is bat script to install paddlespeech_ctc_decoders for windows