ReadAlongs · joanise · Nov 18, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,19 +17,19 @@ repos:
         entry: isort
         language: system
         types: [python]
-        stages: [commit]
+        stages: [pre-commit]
     -   id: black
         name: black
         entry: black
         language: system
         types: [python]
-        stages: [commit]
+        stages: [pre-commit]
     -   id: mypy
         name: mypy
         entry: mypy
         language: system
         types: [python]
-        stages: [commit]
+        stages: [pre-commit]
 -   repo: https://github.com/pycqa/flake8
     # do flake8 last to avoid duplicate reports
     rev: 7.0.0

diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py
@@ -130,37 +130,37 @@ def create_web_component_html(
     try:
         js = requests.get(JS_BUNDLE_URL, timeout=10)
         js_status_code: Any = js.status_code
-    except requests.exceptions.ReadTimeout as e:
+    except requests.exceptions.ReadTimeout as e:  # pragma: no cover
         js_status_code = "TIMEOUT"
         LOGGER.warning(e)
 
     try:
         fonts = requests.get(FONTS_BUNDLE_URL, timeout=10)
         fonts_status_code: Any = fonts.status_code
-    except requests.exceptions.ReadTimeout as e:
+    except requests.exceptions.ReadTimeout as e:  # pragma: no cover
         LOGGER.warning(e)
         fonts_status_code = "TIMEOUT"
 
-    if js_status_code != 200:
+    if js_status_code != 200:  # pragma: no cover
         LOGGER.warning(
             f"Sorry, the JavaScript bundle that is supposed to be at {JS_BUNDLE_URL} returned a {js_status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
         )
         with open(
             os.path.join(os.path.dirname(__file__), "bundle.js"), encoding="utf8"
         ) as f:
             js_raw = f.read()
-    else:
+    else:  # pragma: no cover
         js_raw = js.text
 
-    if fonts_status_code != 200:
+    if fonts_status_code != 200:  # pragma: no cover
         LOGGER.warning(
             f"Sorry, the fonts bundle that is supposed to be at {FONTS_BUNDLE_URL} returned a {fonts_status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
         )
         with open(
             os.path.join(os.path.dirname(__file__), "bundle.css"), encoding="utf8"
         ) as f:
             fonts_raw = f.read()
-    else:
+    else:  # pragma: no cover
         fonts_raw = fonts.text
 
     return BASIC_HTML.format(

diff --git a/test/basic_test_case.py b/test/basic_test_case.py
@@ -1,7 +1,10 @@
 """Common base class for the ReadAlongs test suites"""
 
 import logging
+import os
+import sys
 import tempfile
+from contextlib import contextmanager
 from pathlib import Path
 from unittest import TestCase
 
@@ -65,3 +68,26 @@ def tearDown(self):
             # Some test cases can set the logging level to DEBUG when they pass
             # --debug to a CLI command, but don't let that affect subsequent tests.
             LOGGER.setLevel(logging.INFO)
+
+
+@contextmanager
+def silence_c_stderr():
+    """Capture stderr from C output, e.g., from SoundSwallower.
+
+    Note: to capture stderr for both C and Python code, combine this with
+    redirect_stderr(), but you must use capture_c_stderr() first:
+        with capture_c_stderr(), redirect_stderr(io.StringIO()):
+            # code
+
+    Loosely inspired by https://stackoverflow.com/a/24277852, but much simplified to
+    address our narrow needs, namely to silence stderr in a context manager.
+    """
+
+    stderr_fileno = sys.stderr.fileno()
+    stderr_save = os.dup(stderr_fileno)
+    stderr_fd = os.open(os.devnull, os.O_RDWR)
+    os.dup2(stderr_fd, stderr_fileno)
+    yield
+    os.dup2(stderr_save, stderr_fileno)
+    os.close(stderr_save)
+    os.close(stderr_fd)
diff --git a/test/run.py b/test/run.py
@@ -13,6 +13,7 @@
    other: run the other tests
 """
 
+import argparse
 import os
 import re
 import sys
@@ -97,7 +98,7 @@ def describe_suite(suite: TestSuite):
 SUITES = ["all", "dev", "e2e", "prod", "api", "other"]
 
 
-def run_tests(suite: str, describe: bool = False) -> bool:
+def run_tests(suite: str, describe: bool = False, verbosity=3) -> bool:
     """Run the specified test suite.
 
     Args:
@@ -131,18 +132,27 @@ def run_tests(suite: str, describe: bool = False) -> bool:
         describe_suite(test_suite)
         return True
     else:
-        runner = TextTestRunner(verbosity=3)
+        runner = TextTestRunner(verbosity=verbosity)
         success = runner.run(test_suite).wasSuccessful()
         if not success:
             LOGGER.error("Some tests failed. Please see log above.")
         return success
 
 
 if __name__ == "__main__":
-    describe = "--describe" in sys.argv
-    if describe:
-        sys.argv.remove("--describe")
-
-    result = run_tests("" if len(sys.argv) <= 1 else sys.argv[1], describe)
+    parser = argparse.ArgumentParser(description="Run ReadAlongs/Studio test suites.")
+    parser.add_argument("--quiet", "-q", action="store_true", help="reduce output")
+    parser.add_argument(
+        "--describe", action="store_true", help="describe the selected test suite"
+    )
+    parser.add_argument(
+        "suite",
+        nargs="?",
+        default="dev",
+        help="the test suite to run [dev]",
+        choices=SUITES,
+    )
+    args = parser.parse_args()
+    result = run_tests(args.suite, args.describe, 1 if args.quiet else 3)
     if not result:
         sys.exit(1)
diff --git a/test/test_anchors.py b/test/test_anchors.py
@@ -3,9 +3,11 @@
 """Unit testing for the anchors functionality in readalongs align"""
 
 import os
+from contextlib import redirect_stderr
+from io import StringIO
 from unittest import main
 
-from basic_test_case import BasicTestCase
+from basic_test_case import BasicTestCase, silence_c_stderr
 
 from readalongs.align import align_audio
 from readalongs.log import LOGGER
@@ -18,10 +20,11 @@ def test_anchors_inner_only(self):
         """Test aligning with anchors only between existing text"""
 
         # ej-fra-anchors has anchors between words/sentences only
-        results = align_audio(
-            os.path.join(self.data_dir, "ej-fra-anchors.readalong"),
-            os.path.join(self.data_dir, "ej-fra.m4a"),
-        )
+        with redirect_stderr(StringIO()):
+            results = align_audio(
+                os.path.join(self.data_dir, "ej-fra-anchors.readalong"),
+                os.path.join(self.data_dir, "ej-fra.m4a"),
+            )
         words = results["words"]
         # The input text file has 99 words, so should the aligned segments.
         self.assertEqual(len(words), 99)
@@ -39,11 +42,12 @@ def test_anchors_outer_too(self):
 
         # ej-fra-anchors2 also has anchors before the first word and after the last word
         save_temps_prefix = os.path.join(self.tempdir, "anchors2-temps")
-        results = align_audio(
-            os.path.join(self.data_dir, "ej-fra-anchors2.readalong"),
-            os.path.join(self.data_dir, "ej-fra.m4a"),
-            save_temps=save_temps_prefix,
-        )
+        with redirect_stderr(StringIO()):
+            results = align_audio(
+                os.path.join(self.data_dir, "ej-fra-anchors2.readalong"),
+                os.path.join(self.data_dir, "ej-fra.m4a"),
+                save_temps=save_temps_prefix,
+            )
         words = results["words"]
         # The input text file has 99 words, so should the aligned segments.
         self.assertEqual(len(words), 99)
@@ -83,10 +87,11 @@ def test_anchors_align_modes(self):
         with open(xml_file, "wt", encoding="utf8") as f:
             print(xml_with_anchors, file=f)
         with self.assertLogs(LOGGER, level="INFO") as cm:
-            results = align_audio(
-                xml_file,
-                os.path.join(self.data_dir, "noise.mp3"),
-            )
+            with silence_c_stderr(), redirect_stderr(StringIO()):
+                results = align_audio(
+                    xml_file,
+                    os.path.join(self.data_dir, "noise.mp3"),
+                )
         words = results["words"]
         self.assertEqual(len(words), 10)
         logger_output = "\n".join(cm.output)

diff --git a/test/test_api.py b/test/test_api.py
@@ -5,6 +5,8 @@
 """
 
 import os
+from contextlib import redirect_stderr
+from io import StringIO
 from unittest import main
 
 import click
@@ -23,13 +25,14 @@ def test_call_align(self):
         # API accepts them too.
         langs = ("fra",)  # make sure language can be an iterable, not just a list.
         with SoundSwallowerStub("t0b0d0p0s0w0:920:1520", "t0b0d0p0s1w0:1620:1690"):
-            (status, exception, log) = api.align(
-                self.data_dir / "ej-fra.txt",
-                self.data_dir / "ej-fra.m4a",
-                self.tempdir / "output",
-                langs,
-                output_formats=["html", "TextGrid", "srt"],
-            )
+            with redirect_stderr(StringIO()):
+                (status, exception, log) = api.align(
+                    self.data_dir / "ej-fra.txt",
+                    self.data_dir / "ej-fra.m4a",
+                    self.tempdir / "output",
+                    langs,
+                    output_formats=["html", "TextGrid", "srt"],
+                )
         self.assertEqual(status, 0)
         self.assertTrue(exception is None)
         self.assertIn("Words (<w>) not present; tokenizing", log)
@@ -53,16 +56,18 @@ def test_call_align(self):
             "Make sure the API call doesn't not modify my variables",
         )
 
-        (status, exception, log) = api.align("", "", self.tempdir / "errors")
+        with redirect_stderr(StringIO()):
+            (status, exception, log) = api.align("", "", self.tempdir / "errors")
         self.assertNotEqual(status, 0)
         self.assertFalse(exception is None)
 
     def test_call_make_xml(self):
-        (status, exception, log) = api.make_xml(
-            self.data_dir / "ej-fra.txt",
-            self.tempdir / "prepared.readalong",
-            ("fra", "eng"),
-        )
+        with redirect_stderr(StringIO()):
+            (status, exception, log) = api.make_xml(
+                self.data_dir / "ej-fra.txt",
+                self.tempdir / "prepared.readalong",
+                ("fra", "eng"),
+            )
         self.assertEqual(status, 0)
         self.assertTrue(exception is None)
         self.assertIn("Wrote ", log)

diff --git a/test/test_audio.py b/test/test_audio.py
@@ -41,9 +41,6 @@ def align(self, input_text_path, input_audio_path, output_path, flags):
             input_audio_path,
             output_path,
         ] + flags
-        LOGGER.info(
-            f"Aligning {input_text_path} and {input_audio_path}, outputting to {output_path}"
-        )
         return run(args, capture_output=True, check=False, encoding="utf-8")
 
     def test_mute_section(self):

diff --git a/test/test_dna_text.py b/test/test_dna_text.py
@@ -2,6 +2,8 @@
 
 """Test handling of DNA text in tokenization"""
 
+from contextlib import redirect_stderr
+from io import StringIO
 from unittest import main
 
 from basic_test_case import BasicTestCase
@@ -23,7 +25,8 @@ def test_tok_all_words(self):
 <s>Voici une deuxième phrase.</s>
 </document>"""
         xml = parse_xml(txt)
-        tokenized = tokenize_xml.tokenize_xml(xml)
+        with redirect_stderr(StringIO()):
+            tokenized = tokenize_xml.tokenize_xml(xml)
         as_txt = etree.tounicode(tokenized)
         # print(etree.tounicode(tokenized))
 
@@ -54,7 +57,8 @@ def test_tok_some_words(self):
 <s>Un <foo do-not-align="1">mot ou deux</foo> à exclure.</s>
 </document>"""
         xml = parse_xml(txt)
-        tokenized = tokenize_xml.tokenize_xml(xml)
+        with redirect_stderr(StringIO()):
+            tokenized = tokenize_xml.tokenize_xml(xml)
         as_txt = etree.tounicode(tokenized)
         # print('as_txt="' + as_txt +'"')
 
@@ -96,7 +100,8 @@ def test_tok_div_p_s(self):
 </div>
 </document>"""
         xml = parse_xml(txt)
-        tokenized = tokenize_xml.tokenize_xml(xml)
+        with redirect_stderr(StringIO()):
+            tokenized = tokenize_xml.tokenize_xml(xml)
         as_txt = etree.tounicode(tokenized)
         # print('as_txt="' + as_txt +'"')
 
@@ -143,15 +148,17 @@ def test_dna_word(self):
 
         txt = """<s xml:lang="fra">Une <w do-not-align="true">exclude</w> phrase.</s>"""
         xml = parse_xml(txt)
-        tokenized = tokenize_xml.tokenize_xml(xml)
+        with redirect_stderr(StringIO()):
+            tokenized = tokenize_xml.tokenize_xml(xml)
         self.assertRaises(RuntimeError, add_ids, tokenized)
 
     def test_dna_word_nested(self):
         """You also can't have a <w> element inside a DNA element"""
 
         txt = """<s xml:lang="fra">Une <foo do-not-align="true"><bar><w>exclude</w></bar></foo> phrase.</s>"""
         xml = parse_xml(txt)
-        tokenized = tokenize_xml.tokenize_xml(xml)
+        with redirect_stderr(StringIO()):
+            tokenized = tokenize_xml.tokenize_xml(xml)
         self.assertRaises(RuntimeError, add_ids, tokenized)