Merge a230ad5 into 0196c42

MontrealCorpusTools · Oct 1, 2021 · 80e2484 · 80e2484
2 parents 0196c42 + a230ad5
commit 80e2484
Show file tree

Hide file tree

Showing 104 changed files with 3,169 additions and 3,953 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -36,7 +36,7 @@ install:
   - which python
   - which sox
   - conda list
-  - python -m montreal_forced_aligner.command_line.thirdparty download
+  - python -m montreal_forced_aligner.command_line.mfa thirdparty download
   - ls $HOME/Documents/MFA/thirdparty/bin -al
   - $HOME/Documents/MFA/thirdparty/bin/compute-mfcc-feats --help
   - $HOME/Documents/MFA/thirdparty/bin/ivector-extractor-est --help

diff --git a/docs/source/annotator.rst b/docs/source/annotator.rst
@@ -3,30 +3,30 @@
 
 .. _annotator:
 
-*********
-Annotator
-*********
+**********
+MFA Anchor
+**********
+
+Anchor is a GUI utility for MFA that allows for users to modify transcripts and add/change entries in the pronunciation dictionary to interactively fix out of vocabulary issues.
 
 .. attention::
 
-   The GUI annotator is under development and is currently pre-alpha. Use at your own risk and please use version control
+   Anchor is under development and is currently pre-alpha. Use at your own risk and please use version control
    or back up any critical data.
 
-Currently the functionality of the Annotator GUI allows for users to modify transcripts and add/change
-entries in the pronunciation dictionary to interactively fix out of vocabulary issues.
-
-.. warning::
-
-   If you are trying to use the annotator from Windows, note that some issues will be present as native Windows use is not
-   fully supported. Specifically if you need G2P functionality, that does not function on Windows due to its dependencies
-   not being available (Pynini, Opengrm-ngram, OpenFst).
 
 To use the annotator, first follow the instructions in :ref:`installation`.  Once MFA is installed and thirdparty binaries
 have been downloaded, run the following command:
 
 .. code-block:: bash
 
-    mfa annotator
+    mfa anchor
+
+.. warning::
+
+   If you are trying to use the annotator from Windows, note that some issues will be present as native Windows use is not
+   fully supported. Specifically if you need G2P functionality, that does not function on Windows due to its dependencies
+   not being available (Pynini, Opengrm-ngram, OpenFst).
 
 Initial setup
 =============
@@ -132,3 +132,7 @@ The current available shortcuts are:
 
 
 
+Attribution
+===========
+
+The MFA annotator utility uses icons from FontAwesome
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -7,6 +7,16 @@
 Changelog
 =========
 
+2.0.0b0
+-------
+
+Beta release!
+
+- Fixed an issue in transcription when using a .ARPA language model rather than one built in MFA
+- Fixed an issue in parsing filenames containing spaces
+- Added a ``mfa configure`` command to set global options.  Users can now specify a new default for arguments like ``--num_jobs``, ``--clean`` or ``--temp_directory``
+
+
 2.0.0a24
 --------
 

diff --git a/docs/source/commands.rst b/docs/source/commands.rst
@@ -52,6 +52,7 @@ Other utilities
 
    "download", "Download a model trained by MFA developers", :ref:`pretrained_models`
    "thirdparty", "Download and validate new third party binaries", :ref:`installation`
+   "configure", "Configure MFA to use customized defaults for command line arguments", :ref:`installation`
 
 
 Grapheme-to-phoneme

diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
@@ -5,10 +5,20 @@
 Configuration
 *************
 
-Contents:
+Options available:
+
+.. option:: -h
+               --help
+
+   Display help message for the command
+
+
+
+Configuration of commands
+=========================
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 1
 
    configuration_align.rst
    configuration_transcription.rst

diff --git a/docs/source/configuration_align.rst b/docs/source/configuration_align.rst
@@ -196,7 +196,7 @@ Default training config file
      - sat:
          num_leaves: 2500
          max_gaussians: 15000
-         fmllr_power: 0.2
+         power: 0.2
          silence_weight: 0.0
          fmllr_update_type: "diag"
          subset: 10000
@@ -206,7 +206,7 @@ Default training config file
      - sat:
          num_leaves: 4200
          max_gaussians: 40000
-         fmllr_power: 0.2
+         power: 0.2
          silence_weight: 0.0
          fmllr_update_type: "diag"
          subset: 30000
@@ -246,7 +246,7 @@ Training configuration for 1.0
      - sat:
          num_leaves: 3100
          max_gaussians: 50000
-         fmllr_power: 0.2
+         power: 0.2
          silence_weight: 0.0
          cluster_threshold: 100
          fmllr_update_type: "full"

diff --git a/montreal_forced_aligner/aligner/base.py b/montreal_forced_aligner/aligner/base.py
@@ -1,10 +1,12 @@
 import os
 import logging
+import shutil
 
 from .. import __version__
 from ..multiprocessing import compile_information
 from ..config import TEMP_DIR
 
+from ..multiprocessing import convert_ali_to_textgrids
 from ..dictionary import MultispeakerDictionary
 
 
@@ -82,8 +84,17 @@ def dictionaries_for_job(self, job_name):
             return dictionary_names
         return None
 
-    def compile_information(self, model_directory, output_directory):
-        issues = compile_information(model_directory, self.corpus, self.corpus.num_jobs, self)
+    @property
+    def align_directory(self):
+        return os.path.join(self.temp_directory, 'align')
+
+    @property
+    def backup_output_directory(self):
+        return os.path.join(self.align_directory, 'textgrids')
+
+    def compile_information(self, output_directory):
+        model_directory = self.align_directory
+        issues, average_log_like = compile_information(model_directory, self.corpus, self.corpus.num_jobs, self)
         errors_path = os.path.join(output_directory, 'output_errors.txt')
         if os.path.exists(errors_path):
             self.logger.warning('There were errors when generating the textgrids. See the output_errors.txt in the '
@@ -95,9 +106,17 @@ def compile_information(self, model_directory, output_directory):
                     f.write('{}\t{}\n'.format(u, r))
             self.logger.warning('There were {} segments/files not aligned.  Please see {} for more details on why '
                                 'alignment failed for these files.'.format(len(issues), issue_path))
+        if os.path.exists(self.backup_output_directory) and os.listdir(self.backup_output_directory):
+            self.logger.info(f'Some TextGrids were not output in the output directory to avoid overwriting existing files. '
+                             f'You can find them in {self.backup_output_directory}, and if you would like to disable this '
+                             f'behavior, you can rerun with the --overwrite flag or run `mfa configure --always_overwrite`.')
 
     def export_textgrids(self, output_directory):
         """
         Export a TextGrid file for every sound file in the dataset
         """
-        raise NotImplementedError
+        if os.path.exists(self.backup_output_directory):
+            shutil.rmtree(self.backup_output_directory, ignore_errors=True)
+        convert_ali_to_textgrids(self.align_config, output_directory, self.align_directory, self.dictionary,
+                                 self.corpus, self.corpus.num_jobs)
+        self.compile_information(output_directory)
diff --git a/montreal_forced_aligner/aligner/pretrained.py b/montreal_forced_aligner/aligner/pretrained.py
@@ -1,10 +1,11 @@
 import os
 import re
+import time
 from collections import Counter
 
 from .base import BaseAligner
 from ..multiprocessing import (align, convert_ali_to_textgrids, compile_train_graphs,
-                               calc_fmllr, generate_pronunciations)
+                               calc_fmllr, generate_pronunciations, compile_information)
 from ..exceptions import KaldiProcessingError
 from ..helper import log_kaldi_errors, load_scp
 
@@ -62,17 +63,13 @@ def __init__(self, corpus, dictionary, acoustic_model, align_config,
         self.acoustic_model.export_model(self.align_directory)
         log_dir = os.path.join(self.align_directory, 'log')
         os.makedirs(log_dir, exist_ok=True)
-
+        self.align_config.logger = self.logger
         self.logger.info('Done with setup!')
 
     @property
     def model_directory(self):
         return os.path.join(self.temp_directory, 'model')
 
-    @property
-    def align_directory(self):
-        return os.path.join(self.temp_directory, 'align')
-
     def setup(self):
         self.dictionary.nonsil_phones = self.acoustic_model.meta['phones']
         super(PretrainedAligner, self).setup()
@@ -86,48 +83,25 @@ def align(self, subset=None):
         try:
             compile_train_graphs(self.align_directory, self.dictionary.output_directory,
                                  self.align_config.data_directory, self.corpus.num_jobs, self)
-            self.acoustic_model.feature_config.generate_features(self.corpus)
             log_dir = os.path.join(self.align_directory, 'log')
             os.makedirs(log_dir, exist_ok=True)
-            self.logger.info('Performing first-pass alignment...')
+
             align('final', self.align_directory, self.align_config.data_directory,
                   self.dictionary.optional_silence_csl,
                   self.corpus.num_jobs, self.align_config)
-
-            log_like = 0
-            tot_frames = 0
-            for j in range(self.corpus.num_jobs):
-                score_path = os.path.join(self.align_directory, 'ali.{}.scores'.format(j))
-                scores = load_scp(score_path, data_type=float)
-                for k, v in scores.items():
-                    log_like += v
-                    tot_frames += self.corpus.utterance_lengths[k]
-            if tot_frames:
-                self.logger.debug('Prior to SAT, average per frame likelihood (this might not actually mean anything): {}'.format(log_like/tot_frames))
-            else:
-                self.logger.debug('No files were aligned, this likely indicates serious problems with the aligner.')
+            unaligned, average_log_like = compile_information(self.align_directory, self.corpus, self.corpus.num_jobs, self)
+            self.logger.debug(f'Prior to SAT, average per frame likelihood (this might not actually mean anything): {average_log_like}')
             if not self.align_config.disable_sat and self.acoustic_model.feature_config.fmllr \
                     and not os.path.exists(os.path.join(self.align_directory, 'trans.0')):
-                self.logger.info('Calculating fMLLR for speaker adaptation...')
                 calc_fmllr(self.align_directory, self.align_config.data_directory,
                       self.dictionary.optional_silence_csl, self.corpus.num_jobs, self.align_config, initial=True, iteration='final')
-                self.logger.info('Performing second-pass alignment...')
                 align('final', self.align_directory, self.align_config.data_directory,
                       self.dictionary.optional_silence_csl,
                       self.corpus.num_jobs, self.align_config)
 
-                log_like = 0
-                tot_frames = 0
-                for j in range(self.corpus.num_jobs):
-                    score_path = os.path.join(self.align_directory, 'ali.{}.scores'.format(j))
-                    scores = load_scp(score_path, data_type=float)
-                    for k, v in scores.items():
-                        log_like += v
-                        tot_frames += self.corpus.utterance_lengths[k]
-                if tot_frames:
-                    self.logger.debug('Following SAT, average per frame likelihood (this might not actually mean anything): {}'.format(log_like/tot_frames))
-                else:
-                    self.logger.debug('No files were aligned, this likely indicates serious problems with the aligner.')
+                unaligned, average_log_like = compile_information(self.align_directory, self.corpus, self.corpus.num_jobs, self)
+                self.logger.debug(f'Following SAT, average per frame likelihood (this might not actually mean anything): {average_log_like}')
+
         except Exception as e:
             with open(dirty_path, 'w'):
                 pass
@@ -138,15 +112,6 @@ def align(self, subset=None):
         with open(done_path, 'w'):
             pass
 
-    def export_textgrids(self, output_directory):
-        """
-        Export a TextGrid file for every sound file in the dataset
-        """
-        ali_directory = self.align_directory
-        convert_ali_to_textgrids(self.align_config, output_directory, ali_directory, self.dictionary,
-                                 self.corpus, self.corpus.num_jobs, self)
-        self.compile_information(ali_directory, output_directory)
-
     def generate_pronunciations(self, output_path, calculate_silence_probs=False, min_count=1):
         pron_counts, utt_mapping = generate_pronunciations(self.align_config, self.align_directory, self.dictionary, self.corpus, self.corpus.num_jobs)
         if calculate_silence_probs:

diff --git a/montreal_forced_aligner/aligner/trainable.py b/montreal_forced_aligner/aligner/trainable.py
@@ -29,6 +29,8 @@ def __init__(self, corpus, dictionary, training_config, align_config, temp_direc
         self.pretrained_aligner = pretrained_aligner
         super(TrainableAligner, self).__init__(corpus, dictionary, align_config, temp_directory,
                                                call_back, debug, verbose, logger)
+        for trainer in self.training_config.training_configs:
+            trainer.logger = self.logger
 
     def setup(self):
         if self.dictionary is not None:
@@ -74,11 +76,6 @@ def train(self):
             previous = trainer
         previous.align(None)
 
-    def export_textgrids(self, output_directory):
-        """
-        Export a TextGrid file for every sound file in the dataset
-        """
-        ali_directory = self.training_config.values()[-1].align_directory
-        convert_ali_to_textgrids(self.align_config, output_directory, ali_directory, self.dictionary,
-                                 self.corpus, self.corpus.num_jobs, self)
-        self.compile_information(ali_directory, output_directory)
+    @property
+    def align_directory(self):
+        return self.training_config.values()[-1].align_directory
diff --git a/montreal_forced_aligner/command_line/adapt.py b/montreal_forced_aligner/command_line/adapt.py
@@ -32,13 +32,22 @@ def adapt_model(args, unknown_args=None):
         align_config = align_yaml_to_config(args.config_path)
     else:
         align_config = load_basic_align()
+    align_config.use_mp = not args.disable_mp
+    align_config.debug = args.debug
+    align_config.overwrite = args.overwrite
+    align_config.cleanup_textgrids = not args.disable_textgrid_cleanup
+
     if unknown_args:
         align_config.update_from_args(unknown_args)
     conf_path = os.path.join(data_directory, 'config.yml')
     if getattr(args, 'clean', False) and os.path.exists(data_directory):
         print('Cleaning old directory!')
         shutil.rmtree(data_directory, ignore_errors=True)
-    logger = setup_logger(command, data_directory)
+    if getattr(args, 'verbose', False):
+        log_level = 'debug'
+    else:
+        log_level = 'info'
+    logger = setup_logger(command, data_directory, console_level=log_level)
     logger.debug('ALIGN CONFIG:')
     log_config(logger, align_config)
     if os.path.exists(conf_path):
@@ -81,6 +90,8 @@ def adapt_model(args, unknown_args=None):
     acoustic_model = AcousticModel(args.acoustic_model_path, root_directory=model_directory)
     acoustic_model.log_details(logger)
     training_config = acoustic_model.adaptation_config()
+    training_config.training_configs[0].update({'beam': align_config.beam, 'retry_beam': align_config.retry_beam})
+    training_config.update_from_align(align_config)
     logger.debug('ADAPT TRAINING CONFIG:')
     log_config(logger, training_config)
     audio_dir = None
@@ -177,15 +188,4 @@ def run_adapt_model(args, unknown_args=None, downloaded_acoustic_models=None, do
     args.corpus_directory = args.corpus_directory.rstrip('/').rstrip('\\')
 
     validate_args(args, downloaded_acoustic_models, download_dictionaries)
-    adapt_model(args, unknown_args)
-
-
-if __name__ == '__main__':  # pragma: no cover
-    mp.freeze_support()
-    from montreal_forced_aligner.command_line.mfa import adapt_parser, fix_path, unfix_path, acoustic_languages, \
-        dict_languages
-
-    adapt_args, unknown = adapt_parser.parse_known_args()
-    fix_path()
-    run_adapt_model(adapt_args, unknown, acoustic_languages, dict_languages)
-    unfix_path()
+    adapt_model(args, unknown_args)