Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ repos:
entry: isort
language: system
types: [python]
stages: [commit]
stages: [pre-commit]
- id: black
name: black
entry: black
language: system
types: [python]
stages: [commit]
stages: [pre-commit]
- id: mypy
name: mypy
entry: mypy
language: system
types: [python]
stages: [commit]
stages: [pre-commit]
- repo: https://github.com/pycqa/flake8
# do flake8 last to avoid duplicate reports
rev: 7.0.0
Expand Down
12 changes: 6 additions & 6 deletions readalongs/text/make_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,37 +130,37 @@ def create_web_component_html(
try:
js = requests.get(JS_BUNDLE_URL, timeout=10)
js_status_code: Any = js.status_code
except requests.exceptions.ReadTimeout as e:
except requests.exceptions.ReadTimeout as e: # pragma: no cover
js_status_code = "TIMEOUT"
LOGGER.warning(e)

try:
fonts = requests.get(FONTS_BUNDLE_URL, timeout=10)
fonts_status_code: Any = fonts.status_code
except requests.exceptions.ReadTimeout as e:
except requests.exceptions.ReadTimeout as e: # pragma: no cover
LOGGER.warning(e)
fonts_status_code = "TIMEOUT"

if js_status_code != 200:
if js_status_code != 200: # pragma: no cover
LOGGER.warning(
f"Sorry, the JavaScript bundle that is supposed to be at {JS_BUNDLE_URL} returned a {js_status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
)
with open(
os.path.join(os.path.dirname(__file__), "bundle.js"), encoding="utf8"
) as f:
js_raw = f.read()
else:
else: # pragma: no cover
js_raw = js.text

if fonts_status_code != 200:
if fonts_status_code != 200: # pragma: no cover
LOGGER.warning(
f"Sorry, the fonts bundle that is supposed to be at {FONTS_BUNDLE_URL} returned a {fonts_status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
)
with open(
os.path.join(os.path.dirname(__file__), "bundle.css"), encoding="utf8"
) as f:
fonts_raw = f.read()
else:
else: # pragma: no cover
fonts_raw = fonts.text

return BASIC_HTML.format(
Expand Down
26 changes: 26 additions & 0 deletions test/basic_test_case.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Common base class for the ReadAlongs test suites"""

import logging
import os
import sys
import tempfile
from contextlib import contextmanager
from pathlib import Path
from unittest import TestCase

Expand Down Expand Up @@ -65,3 +68,26 @@ def tearDown(self):
# Some test cases can set the logging level to DEBUG when they pass
# --debug to a CLI command, but don't let that affect subsequent tests.
LOGGER.setLevel(logging.INFO)


@contextmanager
def silence_c_stderr():
"""Capture stderr from C output, e.g., from SoundSwallower.

Note: to capture stderr for both C and Python code, combine this with
redirect_stderr(), but you must use capture_c_stderr() first:
with capture_c_stderr(), redirect_stderr(io.StringIO()):
# code

Loosely inspired by https://stackoverflow.com/a/24277852, but much simplified to
address our narrow needs, namely to silence stderr in a context manager.
"""

stderr_fileno = sys.stderr.fileno()
stderr_save = os.dup(stderr_fileno)
stderr_fd = os.open(os.devnull, os.O_RDWR)
os.dup2(stderr_fd, stderr_fileno)
yield
os.dup2(stderr_save, stderr_fileno)
os.close(stderr_save)
os.close(stderr_fd)
24 changes: 17 additions & 7 deletions test/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
other: run the other tests
"""

import argparse
import os
import re
import sys
Expand Down Expand Up @@ -97,7 +98,7 @@ def describe_suite(suite: TestSuite):
SUITES = ["all", "dev", "e2e", "prod", "api", "other"]


def run_tests(suite: str, describe: bool = False) -> bool:
def run_tests(suite: str, describe: bool = False, verbosity=3) -> bool:
"""Run the specified test suite.

Args:
Expand Down Expand Up @@ -131,18 +132,27 @@ def run_tests(suite: str, describe: bool = False) -> bool:
describe_suite(test_suite)
return True
else:
runner = TextTestRunner(verbosity=3)
runner = TextTestRunner(verbosity=verbosity)
success = runner.run(test_suite).wasSuccessful()
if not success:
LOGGER.error("Some tests failed. Please see log above.")
return success


if __name__ == "__main__":
describe = "--describe" in sys.argv
if describe:
sys.argv.remove("--describe")

result = run_tests("" if len(sys.argv) <= 1 else sys.argv[1], describe)
parser = argparse.ArgumentParser(description="Run ReadAlongs/Studio test suites.")
parser.add_argument("--quiet", "-q", action="store_true", help="reduce output")
parser.add_argument(
"--describe", action="store_true", help="describe the selected test suite"
)
parser.add_argument(
"suite",
nargs="?",
default="dev",
help="the test suite to run [dev]",
choices=SUITES,
)
args = parser.parse_args()
result = run_tests(args.suite, args.describe, 1 if args.quiet else 3)
if not result:
sys.exit(1)
33 changes: 19 additions & 14 deletions test/test_anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
"""Unit testing for the anchors functionality in readalongs align"""

import os
from contextlib import redirect_stderr
from io import StringIO
from unittest import main

from basic_test_case import BasicTestCase
from basic_test_case import BasicTestCase, silence_c_stderr

from readalongs.align import align_audio
from readalongs.log import LOGGER
Expand All @@ -18,10 +20,11 @@ def test_anchors_inner_only(self):
"""Test aligning with anchors only between existing text"""

# ej-fra-anchors has anchors between words/sentences only
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors.readalong"),
os.path.join(self.data_dir, "ej-fra.m4a"),
)
with redirect_stderr(StringIO()):
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors.readalong"),
os.path.join(self.data_dir, "ej-fra.m4a"),
)
words = results["words"]
# The input text file has 99 words, so should the aligned segments.
self.assertEqual(len(words), 99)
Expand All @@ -39,11 +42,12 @@ def test_anchors_outer_too(self):

# ej-fra-anchors2 also has anchors before the first word and after the last word
save_temps_prefix = os.path.join(self.tempdir, "anchors2-temps")
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors2.readalong"),
os.path.join(self.data_dir, "ej-fra.m4a"),
save_temps=save_temps_prefix,
)
with redirect_stderr(StringIO()):
results = align_audio(
os.path.join(self.data_dir, "ej-fra-anchors2.readalong"),
os.path.join(self.data_dir, "ej-fra.m4a"),
save_temps=save_temps_prefix,
)
words = results["words"]
# The input text file has 99 words, so should the aligned segments.
self.assertEqual(len(words), 99)
Expand Down Expand Up @@ -83,10 +87,11 @@ def test_anchors_align_modes(self):
with open(xml_file, "wt", encoding="utf8") as f:
print(xml_with_anchors, file=f)
with self.assertLogs(LOGGER, level="INFO") as cm:
results = align_audio(
xml_file,
os.path.join(self.data_dir, "noise.mp3"),
)
with silence_c_stderr(), redirect_stderr(StringIO()):
results = align_audio(
xml_file,
os.path.join(self.data_dir, "noise.mp3"),
)
words = results["words"]
self.assertEqual(len(words), 10)
logger_output = "\n".join(cm.output)
Expand Down
31 changes: 18 additions & 13 deletions test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""

import os
from contextlib import redirect_stderr
from io import StringIO
from unittest import main

import click
Expand All @@ -23,13 +25,14 @@ def test_call_align(self):
# API accepts them too.
langs = ("fra",) # make sure language can be an iterable, not just a list.
with SoundSwallowerStub("t0b0d0p0s0w0:920:1520", "t0b0d0p0s1w0:1620:1690"):
(status, exception, log) = api.align(
self.data_dir / "ej-fra.txt",
self.data_dir / "ej-fra.m4a",
self.tempdir / "output",
langs,
output_formats=["html", "TextGrid", "srt"],
)
with redirect_stderr(StringIO()):
(status, exception, log) = api.align(
self.data_dir / "ej-fra.txt",
self.data_dir / "ej-fra.m4a",
self.tempdir / "output",
langs,
output_formats=["html", "TextGrid", "srt"],
)
self.assertEqual(status, 0)
self.assertTrue(exception is None)
self.assertIn("Words (<w>) not present; tokenizing", log)
Expand All @@ -53,16 +56,18 @@ def test_call_align(self):
"Make sure the API call doesn't not modify my variables",
)

(status, exception, log) = api.align("", "", self.tempdir / "errors")
with redirect_stderr(StringIO()):
(status, exception, log) = api.align("", "", self.tempdir / "errors")
self.assertNotEqual(status, 0)
self.assertFalse(exception is None)

def test_call_make_xml(self):
(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt",
self.tempdir / "prepared.readalong",
("fra", "eng"),
)
with redirect_stderr(StringIO()):
(status, exception, log) = api.make_xml(
self.data_dir / "ej-fra.txt",
self.tempdir / "prepared.readalong",
("fra", "eng"),
)
self.assertEqual(status, 0)
self.assertTrue(exception is None)
self.assertIn("Wrote ", log)
Expand Down
3 changes: 0 additions & 3 deletions test/test_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def align(self, input_text_path, input_audio_path, output_path, flags):
input_audio_path,
output_path,
] + flags
LOGGER.info(
f"Aligning {input_text_path} and {input_audio_path}, outputting to {output_path}"
)
return run(args, capture_output=True, check=False, encoding="utf-8")

def test_mute_section(self):
Expand Down
17 changes: 12 additions & 5 deletions test/test_dna_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

"""Test handling of DNA text in tokenization"""

from contextlib import redirect_stderr
from io import StringIO
from unittest import main

from basic_test_case import BasicTestCase
Expand All @@ -23,7 +25,8 @@ def test_tok_all_words(self):
<s>Voici une deuxième phrase.</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print(etree.tounicode(tokenized))

Expand Down Expand Up @@ -54,7 +57,8 @@ def test_tok_some_words(self):
<s>Un <foo do-not-align="1">mot ou deux</foo> à exclure.</s>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print('as_txt="' + as_txt +'"')

Expand Down Expand Up @@ -96,7 +100,8 @@ def test_tok_div_p_s(self):
</div>
</document>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
as_txt = etree.tounicode(tokenized)
# print('as_txt="' + as_txt +'"')

Expand Down Expand Up @@ -143,15 +148,17 @@ def test_dna_word(self):

txt = """<s xml:lang="fra">Une <w do-not-align="true">exclude</w> phrase.</s>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
self.assertRaises(RuntimeError, add_ids, tokenized)

def test_dna_word_nested(self):
"""You also can't have a <w> element inside a DNA element"""

txt = """<s xml:lang="fra">Une <foo do-not-align="true"><bar><w>exclude</w></bar></foo> phrase.</s>"""
xml = parse_xml(txt)
tokenized = tokenize_xml.tokenize_xml(xml)
with redirect_stderr(StringIO()):
tokenized = tokenize_xml.tokenize_xml(xml)
self.assertRaises(RuntimeError, add_ids, tokenized)


Expand Down
Loading