Fix bug in subset for sqlite (#648)

MontrealCorpusTools · Jun 7, 2023 · 26a675e · 26a675e
1 parent dcf8997
commit 26a675e
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 2 deletions.
diff --git a/docs/source/changelog/changelog_2.2.rst b/docs/source/changelog/changelog_2.2.rst
@@ -5,6 +5,11 @@
 2.2 Changelog
 *************
 
+2.2.13
+======
+
+- Fixes an issue in using sqlite during subset creation for training
+
 2.2.12
 ======
 

diff --git a/montreal_forced_aligner/command_line/mfa.py b/montreal_forced_aligner/command_line/mfa.py
@@ -136,6 +136,9 @@ def mfa_cli(ctx: click.Context) -> None:
             auto_server = False
     else:
         auto_server = getattr(GLOBAL_CONFIG.global_profile, "auto_server", True)
+    if "--no_use_postgres" in sys.argv or not GLOBAL_CONFIG.current_profile.use_postgres:
+        run_check = False
+        auto_server = False
     if auto_server:
         start_server()
     elif run_check:

diff --git a/montreal_forced_aligner/corpus/base.py b/montreal_forced_aligner/corpus/base.py
@@ -1076,7 +1076,11 @@ def create_subset(self, subset: int) -> None:
                             session.query(Utterance.id)
                             .join(Utterance.speaker)
                             .filter(Speaker.dictionary_id == dict_id)
-                            .filter(Utterance.text.op("~")(r" [^ ]+ "))
+                            .filter(
+                                Utterance.text.op("~")(r" [^ ]+ ")
+                                if GLOBAL_CONFIG.current_profile.use_postgres
+                                else Utterance.text.regexp_match(r" [^ ]+ ")
+                            )
                             .filter(Utterance.ignored == False)  # noqa
                             .order_by(Utterance.duration)
                             .limit(larger_subset_num)
@@ -1144,7 +1148,11 @@ def create_subset(self, subset: int) -> None:
                     # Get all shorter utterances that are not one word long
                     larger_subset_query = (
                         session.query(Utterance.id)
-                        .filter(Utterance.text.op("~")(r"\s\S+\s"))
+                        .filter(
+                            Utterance.text.op("~")(r"\s\S+\s")
+                            if GLOBAL_CONFIG.current_profile.use_postgres
+                            else Utterance.text.regexp_match(r"\s\S+\s")
+                        )
                         .filter(Utterance.ignored == False)  # noqa
                         .order_by(Utterance.duration)
                         .limit(larger_subset_num)