3.0.7 (#803)

MontrealCorpusTools · May 3, 2024 · 4499f28 · 4499f28
1 parent 33b3127
commit 4499f28
Show file tree

Hide file tree

Showing 23 changed files with 246 additions and 72 deletions.
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ conda env create -n mfa-dev -f environment.yml
 Alternatively, the dependencies can be installed via:
 
 ```
-conda install -c conda-forge python=3.8 kaldi sox librosa biopython praatio tqdm requests colorama pyyaml pynini openfst baumwelch ngram
+conda install -c conda-forge python=3.11 kaldi librosa biopython praatio tqdm requests colorama pyyaml pynini openfst baumwelch ngram
 ```
 
 MFA can be installed in develop mode via:

diff --git a/ci/docker_environment.yaml b/ci/docker_environment.yaml
@@ -12,8 +12,6 @@ dependencies:
   - pyyaml
   - dataclassy
   - kaldi=*=*cpu*
-  - sox
-  - ffmpeg
   - pynini
   - openfst=1.8.3
   - scikit-learn<1.3

diff --git a/docs/source/changelog/changelog_3.0.rst b/docs/source/changelog/changelog_3.0.rst
@@ -5,13 +5,22 @@
 3.0 Changelog
 *************
 
+3.0.7
+-----
+
+- Add check for current version vs latest version on run
+- Added :code:`--final_clean` flag to clean temporary files at the end of each run, along with a :code:`--always_final_clean` flag for :code:`mfa configure`
+- Removed dependencies on :code:`sox` and :code:`ffmpeg` as audio loading is done through :code:`librosa` in :code:`kalpy`
+- Removed poorly aligned files in subset from further training
+- Fixed an issue where specified words for cutoff modeling
+
 3.0.6
 -----
 
 - Fixed an issue where alignment analysis would not produce data for speech log likelihood and phone duration deviation
 - Changed phone duration deviation metric to be maximum duration deviation rather than average across all phones in the utterance
 - Fixed a crash when an empty phone set was specified in phone groups configuration files
-- Fixed a crash when when using the :code:`--language` flag with values other than :code`japanese`, :code`thai`, :code`chinese` or :code`korean`
+- Fixed a crash when when using the :code:`--language` flag with values other than :code:`japanese`, :code:`thai`, :code:`chinese` or :code:`korean`
 
 3.0.5
 =====

diff --git a/docs/source/user_guide/dictionary.rst b/docs/source/user_guide/dictionary.rst
@@ -191,17 +191,30 @@ Modeling cutoffs and hesitations
 
 Often in spontaneous speech, speakers will produce truncated or cut-off words of the following word/words. To help model this specific case, using the flag :code:`--use_cutoff_model` will enable a mode where pronunciations are generated for cutoff words matching one of the following criteria:
 
-1. The cutoff word matches the pattern of :code:`{start_bracket}(cutoff|hes)`, where :code:`{start_bracket}` is the set of all left side brackets defined in :code:`brackets` (:ref:`configuration_dictionary`). The following word must not be an OOV or non-speech word (silence, laughter, another cutoff, etc).
+1. The cutoff word matches the pattern of :code:`{start_bracket}(cutoff|hes)`, where :code:`{start_bracket}` is the set of all left side brackets defined in :code:`brackets` (:ref:`configuration_dictionary`). Optionally, you can specify the intended word via a hyphen within the brackets (i.e., :code:`<cutoff-cut>`).  If a target word isn't specified, then the immediately following word will be used if it's not an OOV or non-speech word (silence, laughter, another cutoff, etc).
 2. The cutoff word matches the pattern of :code:`{start_bracket}(cutoff|hes)[-_](word){end_bracket}`, where start and end brackets are defined in :code:`brackets` (:ref:`configuration_dictionary`).  The :code:`word` will be used in place of the following word above, but needs to be present in the dictionary, otherwise the target word for the cutoff will default back to the following word.
 
-The generated pronunciations will be subsequences of the following word, along with an :code:`spn` pronunciation.  For example, given an utterance transcript like "<cutoff> cut off" will have the following pronunciations generated for  the `English (US) MFA dictionary <https://mfa-models.readthedocs.io/en/latest/dictionary/English/English%20%28US%29%20MFA%20dictionary%20v3_0_0.html>`_:
+The generated pronunciations will be subsequences of the following word, along with an :code:`spn` pronunciation.  For example, consider an utterance transcript like
 
 ::
 
+   <cutoff-off> with the <cutoff> <cutoff> cut off
+
+
+The following pronunciations will be generated for  the `English (US) MFA dictionary <https://mfa-models.readthedocs.io/en/latest/dictionary/English/English%20%28US%29%20MFA%20dictionary%20v3_0_0.html>`_:
+
+::
+
+  <cutoff>   spn
   <cutoff-cut>   spn
   <cutoff-cut>   kʰ ɐ t
   <cutoff-cut>   kʰ ɐ
   <cutoff-cut>   kʰ
+  <cutoff-off>   spn
+  <cutoff-off>   ɒ f
+  <cutoff-off>   ɒ
+  <cutoff-off>   ɑ f
+  <cutoff-off>   ɑ
 
 
 .. _speaker_dictionaries:

diff --git a/environment.yml b/environment.yml
@@ -13,8 +13,6 @@ dependencies:
   - pyyaml
   - dataclassy
   - kaldi=*=*cpu*
-  - sox
-  - ffmpeg
   - scipy
   - pynini
   - openfst=1.8.3

diff --git a/montreal_forced_aligner/abc.py b/montreal_forced_aligner/abc.py
@@ -9,6 +9,7 @@
 import contextlib
 import logging
 import os
+import re
 import shutil
 import subprocess
 import sys
@@ -28,6 +29,7 @@
     get_type_hints,
 )
 
+import requests
 import sqlalchemy
 import yaml
 from sqlalchemy.orm import scoped_session, sessionmaker
@@ -694,6 +696,30 @@ def cleanup(self) -> None:
                 logger.error("There was an error in the run, please see the log.")
             else:
                 logger.info(f"Done! Everything took {time.time() - self.start_time:.3f} seconds")
+                if config.FINAL_CLEAN:
+                    logger.debug(
+                        "Cleaning up temporary files, use the --debug flag to keep temporary files."
+                    )
+                    if hasattr(self, "delete_database"):
+                        if config.USE_POSTGRES:
+                            proc = subprocess.run(
+                                [
+                                    "dropdb",
+                                    f"--host={config.database_socket()}",
+                                    "--if-exists",
+                                    "--force",
+                                    self.identifier,
+                                ],
+                                stderr=subprocess.PIPE,
+                                stdout=subprocess.PIPE,
+                                check=True,
+                                encoding="utf-8",
+                            )
+                            logger.debug(f"Stdout: {proc.stdout}")
+                            logger.debug(f"Stderr: {proc.stderr}")
+                        else:
+                            self.delete_database()
+                    self.clean_working_directory()
             self.save_worker_config()
             self.cleanup_logger()
         except (NameError, ValueError):  # already cleaned up
@@ -780,6 +806,24 @@ def setup_logger(self) -> None:
         os.makedirs(self.output_directory, exist_ok=True)
         configure_logger("mfa", log_file=self.log_file)
         logger = logging.getLogger("mfa")
+        if config.VERBOSE:
+            try:
+                response = requests.get(
+                    "https://api.github.com/repos/MontrealCorpusTools/Montreal-Forced-Aligner/releases/latest"
+                )
+                latest_version = response.json()["tag_name"].replace("v", "")
+                if current_version < latest_version:
+                    logger.debug(
+                        f"You are currently running an older version of MFA ({current_version}) than the latest available ({latest_version}). "
+                        f"To update, please run mfa_update."
+                    )
+            except KeyError:
+                pass
+        if re.search(r"\d+\.\d+\.\d+a", current_version) is not None:
+            logger.debug(
+                "Please be aware that you are running an alpha version of MFA. If you would like to install a more "
+                "stable version, please visit https://montreal-forced-aligner.readthedocs.io/en/latest/installation.html#installing-older-versions-of-mfa",
+            )
         logger.debug(f"Beginning run for {self.data_source_identifier}")
         logger.debug(f'Using "{config.CURRENT_PROFILE_NAME}" profile')
         if config.USE_MP:

diff --git a/montreal_forced_aligner/acoustic_modeling/trainer.py b/montreal_forced_aligner/acoustic_modeling/trainer.py
@@ -500,6 +500,57 @@ def export_model(self, output_model_path: Path) -> None:
         self.training_configs[self.final_identifier].export_model(output_model_path)
         logger.info(f"Saved model to {output_model_path}")
 
+    def quality_check_subset(self):
+        from _kalpy.util import Int32VectorWriter
+        from kalpy.gmm.data import AlignmentArchive
+        from kalpy.utils import generate_write_specifier
+
+        with self.session() as session:
+            utterance_ids = set(
+                x[0]
+                for x in session.query(Utterance.id)
+                .filter(Utterance.in_subset == True, Utterance.duration_deviation > 10)  # noqa
+                .all()
+            )
+            logger.debug(
+                f"Removing {len(utterance_ids)} utterances from subset due to large duration deviations"
+            )
+            bulk_update(session, Utterance, [{"id": x, "in_subset": False} for x in utterance_ids])
+            session.commit()
+            for j in self.jobs:
+                ali_paths = j.construct_path_dictionary(self.working_directory, "ali", "ark")
+                temp_ali_paths = j.construct_path_dictionary(
+                    self.working_directory, "temp_ali", "ark"
+                )
+                for dict_id, ali_path in ali_paths.items():
+                    new_path = temp_ali_paths[dict_id]
+                    write_specifier = generate_write_specifier(new_path)
+                    writer = Int32VectorWriter(write_specifier)
+                    alignment_archive = AlignmentArchive(ali_path)
+
+                    for alignment in alignment_archive:
+                        if alignment.utterance_id in utterance_ids:
+                            continue
+                        writer.Write(str(alignment.utterance_id), alignment.alignment)
+                    del alignment_archive
+                    writer.Close()
+                    ali_path.unlink()
+                    new_path.rename(ali_path)
+                    feat_path = j.construct_path(
+                        j.corpus.current_subset_directory, "feats", "scp", dictionary_id=dict_id
+                    )
+                    feat_lines = []
+                    with mfa_open(feat_path, "r") as feat_file:
+                        for line in feat_file:
+                            utterance_id = line.split(maxsplit=1)[0]
+                            if utterance_id in utterance_ids:
+                                continue
+                            feat_lines.append(line)
+
+                    with mfa_open(feat_path, "w") as feat_file:
+                        for line in feat_lines:
+                            feat_file.write(line)
+
     def train(self) -> None:
         """
         Run through the training configurations to produce a final acoustic model
@@ -527,20 +578,21 @@ def train(self) -> None:
                     previous.exported_model_path, self.working_directory
                 )
                 self.align()
-                if config.DEBUG:
-                    with self.session() as session:
-                        session.query(WordInterval).delete()
-                        session.query(PhoneInterval).delete()
-                        session.commit()
-                    self.collect_alignments()
+                with self.session() as session:
+                    session.query(WordInterval).delete()
+                    session.query(PhoneInterval).delete()
+                    session.commit()
+                self.collect_alignments()
+                self.analyze_alignments()
+                if self.current_subset != 0:
+                    self.quality_check_subset()
 
             self.set_current_workflow(trainer.identifier)
             if trainer.identifier.startswith("pronunciation_probabilities"):
-                if config.DEBUG:
-                    with self.session() as session:
-                        session.query(WordInterval).delete()
-                        session.query(PhoneInterval).delete()
-                        session.commit()
+                with self.session() as session:
+                    session.query(WordInterval).delete()
+                    session.query(PhoneInterval).delete()
+                    session.commit()
                 trainer.train_pronunciation_probabilities()
             else:
                 trainer.train()
@@ -623,6 +675,10 @@ def compute_phone_pdf_counts(self) -> None:
         logger.info("Finished accumulating transition stats!")
 
     def finalize_training(self):
+        with self.session() as session:
+            session.query(WordInterval).delete()
+            session.query(PhoneInterval).delete()
+            session.commit()
         self.compute_phone_pdf_counts()
         self.collect_alignments()
         self.analyze_alignments()
@@ -662,8 +718,11 @@ def num_current_utterances(self) -> int:
     def align_options(self) -> MetaDict:
         """Alignment options"""
         if self.current_aligner is not None:
-            return self.current_aligner.align_options
-        return super().align_options
+            options = self.current_aligner.align_options
+        else:
+            options = super().align_options
+        options["boost_silence"] = max(1.25, options["boost_silence"])
+        return options
 
     def align(self) -> None:
         """

diff --git a/montreal_forced_aligner/acoustic_modeling/triphone.py b/montreal_forced_aligner/acoustic_modeling/triphone.py
@@ -142,7 +142,7 @@ def __init__(self, args: TreeStatsArguments):
         self.working_directory = args.working_directory
         self.model_path = args.model_path
 
-    def _run(self) -> typing.Generator[typing.Tuple[int, int]]:
+    def _run(self):
         """Run the function"""
         with self.session() as session, thread_logger(
             "kalpy.train", self.log_path, job_name=self.job_name
@@ -166,6 +166,7 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int]]:
                 feature_archive = job.construct_feature_archive(self.working_directory, dict_id)
                 ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
                 train_logger.debug("Feature Archive information:")
+                train_logger.debug(f"File: {feature_archive.file_name}")
                 train_logger.debug(f"CMVN: {feature_archive.cmvn_read_specifier}")
                 train_logger.debug(f"Deltas: {feature_archive.use_deltas}")
                 train_logger.debug(f"Splices: {feature_archive.use_splices}")

diff --git a/montreal_forced_aligner/command_line/anchor.py b/montreal_forced_aligner/command_line/anchor.py
@@ -4,8 +4,11 @@
 import logging
 import sys
 
+import requests
 import rich_click as click
 
+from montreal_forced_aligner import config
+
 __all__ = ["anchor_cli"]
 
 logger = logging.getLogger("mfa")
@@ -24,4 +27,19 @@ def anchor_cli(*args, **kwargs) -> None:  # pragma: no cover
             "Anchor annotator utility is not installed, please install it via `conda install -c conda-forge anchor-annotator`."
         )
         sys.exit(1)
+    if config.VERBOSE:
+        try:
+            from anchor._version import version
+
+            response = requests.get(
+                "https://api.github.com/repos/MontrealCorpusTools/Anchor-annotator/releases/latest"
+            )
+            latest_version = response.json()["tag_name"].replace("v", "")
+            if version < latest_version:
+                click.echo(
+                    f"You are currently running an older version of Anchor annotator ({version}) than the latest available ({latest_version}). "
+                    f"To update, please run mfa_update."
+                )
+        except ImportError:
+            pass
     main()
diff --git a/montreal_forced_aligner/command_line/configure.py b/montreal_forced_aligner/command_line/configure.py
@@ -37,10 +37,17 @@
 @click.option(
     "--always_clean/--never_clean",
     "clean",
-    help="Turn on/off clean mode where MFA will clean temporary files before each run. "
+    help="Turn on/off mode where MFA will clean temporary files before each run. "
     f"Currently defaults to {config.CLEAN}.",
     default=None,
 )
+@click.option(
+    "--always_final_clean/--never_final_clean",
+    "final_clean",
+    help="Turn on/off mode where MFA will clean temporary files at the end of each run. "
+    f"Currently defaults to {config.FINAL_CLEAN}.",
+    default=None,
+)
 @click.option(
     "--always_verbose/--never_verbose",
     "verbose",

diff --git a/montreal_forced_aligner/command_line/mfa.py b/montreal_forced_aligner/command_line/mfa.py
@@ -3,7 +3,6 @@
 
 import atexit
 import logging
-import re
 import sys
 import time
 import warnings
@@ -118,17 +117,6 @@ def mfa_cli(ctx: click.Context) -> None:
     """
     from montreal_forced_aligner.command_line.utils import check_server, start_server, stop_server
 
-    try:
-        from montreal_forced_aligner._version import version
-
-        if re.search(r"\d+\.\d+\.\d+a", version) is not None:
-            print(
-                "Please be aware that you are running an alpha version of MFA. If you would like to install a more "
-                "stable version, please visit https://montreal-forced-aligner.readthedocs.io/en/latest/installation.html#installing-older-versions-of-mfa",
-                file=sys.stderr,
-            )
-    except ImportError:
-        pass
     config.load_configuration()
     auto_server = False
     run_check = True
@@ -182,7 +170,7 @@ def version_cli():
         from montreal_forced_aligner._version import version
     except ImportError:
         version = None
-    print(version)
+    click.echo(version)
 
 
 mfa_cli.add_command(adapt_model_cli)

diff --git a/montreal_forced_aligner/command_line/utils.py b/montreal_forced_aligner/command_line/utils.py
@@ -73,6 +73,12 @@ def common_options(f: typing.Callable) -> typing.Callable:
             help=f"Remove files from previous runs, default is {config.CLEAN}",
             default=None,
         ),
+        click.option(
+            "--final_clean/--no_final_clean",
+            "final_clean",
+            help=f"Remove temporary files at the end of run, default is {config.FINAL_CLEAN}",
+            default=None,
+        ),
         click.option(
             "--verbose/--no_verbose",
             "-v/-nv",

diff --git a/montreal_forced_aligner/config.py b/montreal_forced_aligner/config.py
@@ -136,6 +136,7 @@ def update_command_history(command_data: Dict[str, Any]) -> None:
 
 
 CLEAN = False
+FINAL_CLEAN = False
 VERBOSE = False
 DEBUG = False
 QUIET = False