Delta-ML · zh794390558 · Nov 15, 2019 · Sep 26, 2019 · Sep 26, 2019 · Sep 26, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -11,10 +11,9 @@ before_install:
   - docker run -it -d --name travis_con --user root -v ${DELTA_PATH}:${DOCKER_DELTA} ${CI_IMAGE} bash
   - docker exec travis_con bash -c "gcc -v && g++ -v"
   - docker exec travis_con bash -c "cd ${DOCKER_DELTA}; source env.sh"
-  - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; touch test.done"
     #- docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; make basic check_install test"
   - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; make basic check_install"
-  - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools; git clone --depth=1 https://github.com/kaldi-asr/kaldi.git"
+  - docker exec travis_con bash -c "cd ${DOCKER_DELTA}/tools/install; bash prepare_kaldi.sh"
 
 jobs:
   include:

diff --git a/MAINTAINERS b/MAINTAINERS
@@ -0,0 +1,2 @@
+Hui Zhang <zhtclz@foxmail.com>
+Chengyun Deng <deng_chengyun@126.com>
diff --git a/delta/__init__.py b/delta/__init__.py
@@ -13,8 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 import os
 
-
 PACKAGE_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/delta/data/feat/speech_feature.py b/delta/data/feat/speech_feature.py
@@ -15,11 +15,9 @@
 # ==============================================================================
 ''' speech feat entrypoint unittest'''
 import os
-
 import numpy as np
 import delta.compat as tf
 from absl import logging
-
 from delta.data.feat import speech_ops
 from delta.layers.ops import py_x_ops
 from delta.data.feat import python_speech_features as psf
@@ -86,7 +84,15 @@ def _freq_feat_graph(feat_name, **kwargs):
         spec = py_x_ops.spectrum(
             waveforms[:, 0],
             tf.cast(sample_rate, tf.dtypes.float32),
-            output_type=1)  #output_type: 1, power spec; 2 log power spec
+            window_length=0.025,
+            frame_length=0.010,
+            output_type=1,
+            snip_edges=1,
+            raw_energy=1,
+            preEph_coeff=0.97,
+            window_type='povey',
+            remove_dc_offset=True,
+            is_fbank=False)  #output_type: 1, power spec; 2 log power spec
         spec = tf.sqrt(spec)
         # shape must be [T, D, C]
         spec = tf.expand_dims(spec, -1)

diff --git a/delta/data/feat/speech_feature_test.py b/delta/data/feat/speech_feature_test.py
@@ -16,12 +16,10 @@
 ''' speech feature entrypoint unittest'''
 import os
 from pathlib import Path
-
 import librosa
 import numpy as np
 import delta.compat as tf
 from absl import logging
-
 from delta.data.feat import speech_ops
 from delta.data.feat import speech_feature
 from delta import PACKAGE_ROOT_DIR
@@ -42,9 +40,9 @@ def setUp(self):
 
     package_root = Path(PACKAGE_ROOT_DIR)
     self.wavfile = str(
-      package_root.joinpath('data/feat/python_speech_features/english.wav'))
+        package_root.joinpath('data/feat/python_speech_features/english.wav'))
     self.featfile = str(
-      package_root.joinpath('data/feat/python_speech_features/english.npy'))
+        package_root.joinpath('data/feat/python_speech_features/english.npy'))
 
   def tearDown(self):
     ''' tear down '''

diff --git a/delta/data/feat/tf_speech_feature_test.py b/delta/data/feat/tf_speech_feature_test.py
@@ -33,8 +33,7 @@ def setUp(self):
     package_root = Path(PACKAGE_ROOT_DIR)
     self.params = tffeat.speech_params(sr=8000, bins=40, cmvn=False)
     self.wavpath = str(
-        package_root.joinpath(
-            'data/feat/python_speech_features/english.wav'))
+        package_root.joinpath('data/feat/python_speech_features/english.wav'))
     self.sr_true, self.audio_true = load_wav(str(self.wavpath), sr=8000)
 
   def test_extract_feature(self):

diff --git a/delta/data/frontend/add_noise_end_to_end.py b/delta/data/frontend/add_noise_end_to_end.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import delta.compat as tf
+from delta.utils.hparam import HParams
+from delta.data.frontend.read_wav import ReadWav
+from delta.data.frontend.add_rir_noise_aecres import Add_rir_noise_aecres
+from delta.data.frontend.write_wav import WriteWav
+from delta.data.frontend.base_frontend import BaseFrontend
+
+
+class AddNoiseEndToEnd(BaseFrontend):
+
+  def __init__(self, config: dict):
+    super().__init__(config)
+    self.add_noise = Add_rir_noise_aecres(config)
+    self.read_wav = ReadWav(config)
+    self.write_wav = WriteWav(config)
+
+  @classmethod
+  def params(cls, config=None):
+    """
+        Set params.
+        :param config: contains nine optional parameters:
+            --sample_rate				  : Sample frequency of waveform data. (int, default = 16000)
+            --if_add_rir          : If true, add rir to audio data. (bool, default = False)
+            --rir_filelist        : FileList path of rir.(string, default = 'rirlist.scp')
+            --if_add_noise        : If true, add random noise to audio data. (bool, default = False)
+            --snr_min             : Minimum SNR adds to signal. (float, default = 0)
+            --snr_max             : Maximum SNR adds to signal. (float, default = 30)
+            --noise_filelist      : FileList path of noise.(string, default = 'noiselist.scp')
+            --if_add_aecres       : If true, add aecres to audio data. (bool, default = False)
+            --aecres_filelist     : FileList path of aecres.(string, default = 'aecreslist.scp')
+        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
+        """
+
+    sample_rate = 16000
+    if_add_rir = False
+    rir_filelist = 'rirlist.scp'
+    if_add_noise = False
+    noise_filelist = 'noiselist.scp'
+    snr_min = 0
+    snr_max = 30
+    if_add_aecres = False
+    aecres_filelist = 'aecreslist.scp'
+    audio_channels = 1
+
+    hparams = HParams(cls=cls)
+    hparams.add_hparam('sample_rate', sample_rate)
+    hparams.add_hparam('if_add_rir', if_add_rir)
+    hparams.add_hparam('if_add_noise', if_add_noise)
+    hparams.add_hparam('rir_filelist', rir_filelist)
+    hparams.add_hparam('noise_filelist', noise_filelist)
+    hparams.add_hparam('snr_min', snr_min)
+    hparams.add_hparam('snr_max', snr_max)
+    hparams.add_hparam('if_add_aecres', if_add_aecres)
+    hparams.add_hparam('aecres_filelist', aecres_filelist)
+    hparams.add_hparam('audio_channels', audio_channels)
+
+    if config is not None:
+      hparams.override_from_dict(config)
+
+    return hparams
+
+  def call(self, in_wavfile, out_wavfile):
+    """
+        Read a clean wav return a noisy wav.
+        :param in_wavfile: clean wavfile path.
+        :param out_wavfile: noisy wavfile path.
+        :return: write wav opration.
+        """
+
+    with tf.name_scope('add_noise_end_to_end'):
+      input_data, sample_rate = self.read_wav(in_wavfile)
+      noisy_data = self.add_noise(input_data, sample_rate) / 32768
+      write_op = self.write_wav(out_wavfile, noisy_data, sample_rate)
+
+    return write_op
diff --git a/delta/data/frontend/add_noise_end_to_end_test.py b/delta/data/frontend/add_noise_end_to_end_test.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+from pathlib import Path
+import delta.compat as tf
+from delta.data.frontend.add_noise_end_to_end import AddNoiseEndToEnd
+os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+from delta import PACKAGE_ROOT_DIR
+
+
+def change_file_path(scp_path, filetype, newfilePath):
+  with open(scp_path + filetype, 'r') as f:
+    s = f.readlines()
+  f.close()
+  with open(scp_path + newfilePath, 'w') as f:
+    for line in s:
+      f.write(scp_path + line)
+  f.close()
+
+
+class AddNoiseEndToEndTest(tf.test.TestCase):
+
+  def test_add_noise_end_to_end(self):
+
+    wav_path = str(
+        Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav'))
+
+    # reset path of noise && rir
+    data_path = str(Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data')) + '/'
+    noise_file = data_path + 'noiselist_new.scp'
+    change_file_path(data_path, 'noiselist.scp', 'noiselist_new.scp')
+    rir_file = data_path + 'rirlist_new.scp'
+    change_file_path(data_path, 'rirlist.scp', 'rirlist_new.scp')
+
+    with self.cached_session(use_gpu=False, force_gpu=False) as sess:
+      config = {
+          'if_add_noise': True,
+          'noise_filelist': noise_file,
+          'if_add_rir': True,
+          'rir_filelist': rir_file
+      }
+      noisy_path = wav_path[:-4] + '_noisy.wav'
+      add_noise_end_to_end = AddNoiseEndToEnd.params(config).instantiate()
+      writewav_op = add_noise_end_to_end(wav_path, noisy_path)
+      sess.run(writewav_op)
+
+
+if __name__ == '__main__':
+
+  tf.test.main()
diff --git a/delta/data/frontend/add_rir_noise_aecres.py b/delta/data/frontend/add_rir_noise_aecres.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import delta.compat as tf
+from delta.utils.hparam import HParams
+from delta.layers.ops import py_x_ops
+from delta.data.frontend.base_frontend import BaseFrontend
+
+
+class Add_rir_noise_aecres(BaseFrontend):
+
+  def __init__(self, config: dict):
+    super().__init__(config)
+
+  @classmethod
+  def params(cls, config=None):
+    """
+        Set params.
+        :param config: contains nine optional parameters:
+            --sample_rate				  : Sample frequency of waveform data. (int, default = 16000)
+            --if_add_rir          : If true, add rir to audio data. (bool, default = False)
+            --rir_filelist        : FileList path of rir.(string, default = 'rirlist.scp')
+            --if_add_noise        : If true, add random noise to audio data. (bool, default = False)
+            --snr_min             : Minimum SNR adds to signal. (float, default = 0)
+            --snr_max             : Maximum SNR adds to signal. (float, default = 30)
+            --noise_filelist      : FileList path of noise.(string, default = 'noiselist.scp')
+            --if_add_aecres       : If true, add aecres to audio data. (bool, default = False)
+            --aecres_filelist     : FileList path of aecres.(string, default = 'aecreslist.scp')
+        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
+        """
+
+    sample_rate = 16000
+    if_add_rir = False
+    rir_filelist = 'rirlist.scp'
+    if_add_noise = False
+    noise_filelist = 'noiselist.scp'
+    snr_min = 0
+    snr_max = 30
+    if_add_aecres = False
+    aecres_filelist = 'aecreslist.scp'
+
+    hparams = HParams(cls=cls)
+    hparams.add_hparam('sample_rate', sample_rate)
+    hparams.add_hparam('if_add_rir', if_add_rir)
+    hparams.add_hparam('if_add_noise', if_add_noise)
+    hparams.add_hparam('rir_filelist', rir_filelist)
+    hparams.add_hparam('noise_filelist', noise_filelist)
+    hparams.add_hparam('snr_min', snr_min)
+    hparams.add_hparam('snr_max', snr_max)
+    hparams.add_hparam('if_add_aecres', if_add_aecres)
+    hparams.add_hparam('aecres_filelist', aecres_filelist)
+
+    if config is not None:
+      hparams.override_from_dict(config)
+
+    return hparams
+
+  def call(self, audio_data, sample_rate=None):
+    """
+        Caculate power spectrum or log power spectrum of audio data.
+        :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
+        :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
+        :return: A float tensor of size N containing add-noise audio.
+        """
+
+    p = self.config
+    with tf.name_scope('add_rir_noise_aecres'):
+      if sample_rate == None:
+        sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)
+
+      assert_op = tf.assert_equal(
+          tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=tf.int32))
+      with tf.control_dependencies([assert_op]):
+        sample_rate = tf.cast(sample_rate, dtype=float)
+        add_rir_noise_aecres_out = py_x_ops.add_rir_noise_aecres(
+            audio_data,
+            sample_rate,
+            if_add_rir=p.if_add_rir,
+            rir_filelist=p.rir_filelist,
+            if_add_noise=p.if_add_noise,
+            snr_min=p.snr_min,
+            snr_max=p.snr_max,
+            noise_filelist=p.noise_filelist,
+            if_add_aecres=p.if_add_aecres,
+            aecres_filelist=p.aecres_filelist)
+
+        return tf.squeeze(add_rir_noise_aecres_out)