diff --git a/lrctoolbox/synced_lyrics.py b/lrctoolbox/synced_lyrics.py index 3085033..68138c2 100644 --- a/lrctoolbox/synced_lyrics.py +++ b/lrctoolbox/synced_lyrics.py @@ -5,7 +5,7 @@ import logging import re from pathlib import Path -from typing import Any, ClassVar +from typing import Any, ClassVar, Iterator from lrctoolbox.exceptions import FileTypeError from lrctoolbox.lrc_metadata import ( @@ -17,9 +17,27 @@ logger = logging.getLogger(__name__) -synced_lyrics_pattern = re.compile(r"\[(\d+:\d+.\d+)\](.*)") +synced_lyrics_pattern = re.compile( + r"(?P(?:\[\d+:\d+.\d+\])+)(?P.*)" +) lyricist_pattern = re.compile(r"Lyricist:?\s*(.*)", re.I) metadata_pattern = re.compile(r"\[(\w+):\s?(.*)\]") +timestamp_parsing_pattern = re.compile(r"\[(\d+):(\d+).(\d+)\]") + + +def parse_timestamps(timestamps: str) -> Iterator[int]: + """Parse the timestamps from the string""" + + for a_match in re.finditer(timestamp_parsing_pattern, timestamps): + ms_ = int(a_match.group(3)) + # make sure the ms is 3 digits + ms_ = ms_ * 10 ** (3 - len(str(ms_))) + timestamp_in_ms = ( + int(a_match.group(1)) * 60 * 1000 + + int(a_match.group(2)) * 1000 + + ms_ + ) + yield timestamp_in_ms class SyncedLyrics(LRCMetadata): @@ -120,7 +138,9 @@ def is_missing_any_timestamp(self) -> bool: return any(line.timestamp is None for line in self._synced_lines) @classmethod - def parse_str(cls, line: str) -> SyncedLyricLine | dict[str, str]: + def parse_str( + cls, line: str + ) -> SyncedLyricLine | dict[str, str] | list[SyncedLyricLine]: """Parse a line for lyrics or lrc metadata""" # match the lyricist match = re.search(lyricist_pattern, line) @@ -132,22 +152,16 @@ def parse_str(cls, line: str) -> SyncedLyricLine | dict[str, str]: # match the synced lyrics match = re.search(synced_lyrics_pattern, line) if match: - timestamp, lyric = match.groups() - timestamp_pattern = re.compile(r"(\d+):(\d+).(\d+)") - match = re.search(timestamp_pattern, timestamp) - if not match: - return SyncedLyricLine(lyric.strip()) - - ms_ = int(match.group(3)) - # make sure the ms is 3 digits - ms_ = ms_ * 10 ** (3 - len(str(ms_))) - timestamp_in_ms = ( - int(match.group(1)) * 60 * 1000 - + int(match.group(2)) * 1000 - + ms_ - ) - # logger.debug(synced_lyrics._lines[-1]) - return SyncedLyricLine(lyric.strip(), timestamp_in_ms) + timestamps, lyric = match.groups() + assert timestamps != "", f"Timestamps: {timestamps}" + logger.debug("Synced lyrics found: %s", line.encode("utf-8")) + valid_timestamps = list(parse_timestamps(timestamps)) + if len(valid_timestamps) == 1: + return SyncedLyricLine(lyric.strip(), valid_timestamps.pop()) + return [ + SyncedLyricLine(lyric.strip(), timestamp) + for timestamp in valid_timestamps + ] # match the metadata match = re.search(metadata_pattern, line) @@ -196,6 +210,9 @@ def load_from_lines(cls, lines: list[str]) -> SyncedLyrics: if isinstance(parsed_line, SyncedLyricLine): synced_lyrics._synced_lines.append(parsed_line) continue + if isinstance(parsed_line, list): + synced_lyrics._synced_lines.extend(parsed_line) + continue synced_lyrics.update_metadata(parsed_line) if synced_lyrics.has_timestamps_all_equal: diff --git a/tests/conftest.py b/tests/conftest.py index 68aa6e7..2f01fa6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,13 +11,18 @@ re_name = "LRCMaker" version = "1.0.0" author = "DrB" -synced_lyrics_lines = [ +synced_lyrics_lines_wrapped = [ + "[00:00.00]Foo bar", + "[00:05.00]Baz qux", + "[00:10.00][00:15.00]Quux quuz", +] +synced_lyrics_lines_unwrapped = [ "[00:00.00]Foo bar", "[00:05.00]Baz qux", "[00:10.00]Quux quuz", + "[00:15.00]Quux quuz", ] - -lines = [ +metadata_lines = [ f"[ar:{artist}]", f"[ti:{title}]", f"[al:{album}]", @@ -27,17 +32,27 @@ f"[ve:{version}]", f"[uri:{uri}]", f"[length:{length}]", -] + synced_lyrics_lines +] @pytest.fixture(scope="module") -def only_lyrics(): - return synced_lyrics_lines +def only_lyrics_wrapped(): + return synced_lyrics_lines_wrapped + + +@pytest.fixture(scope="module") +def only_lyrics_unwrapped(): + return synced_lyrics_lines_unwrapped + + +@pytest.fixture +def lines_with_metadata_wrapped(): + return metadata_lines + synced_lyrics_lines_wrapped @pytest.fixture -def lines_with_metadata(): - return lines +def lines_with_metadata_unwrapped(): + return metadata_lines + synced_lyrics_lines_unwrapped @pytest.fixture @@ -56,5 +71,5 @@ def metadata(): @pytest.fixture -def sample_synced_lyrics(lines_with_metadata): - return SyncedLyrics.load_from_lines(lines_with_metadata) +def sample_synced_lyrics(lines_with_metadata_wrapped): + return SyncedLyrics.load_from_lines(lines_with_metadata_wrapped) diff --git a/tests/test_synced_lyrics.py b/tests/test_synced_lyrics.py index 182f8be..b300ba2 100644 --- a/tests/test_synced_lyrics.py +++ b/tests/test_synced_lyrics.py @@ -1,21 +1,46 @@ -from pathlib import Path import random +from pathlib import Path import pytest from lrctoolbox.lrc_metadata import TrackMetadata from lrctoolbox.synced_lyric_line import SyncedLyricLine -from lrctoolbox.synced_lyrics import SyncedLyrics +from lrctoolbox.synced_lyrics import SyncedLyrics, parse_timestamps + + +@pytest.mark.parametrize( + "lines, expected", + [ + ( + "[00:00.00]", + [ + 0, + ], + ), + ("[00:00.00][00:05.00]", [0, 5000]), + ( + "[00:05.00]", + [ + 5000, + ], + ), + ("[14:25.565]", [14 * 60 * 1000 + 25 * 1000 + 565]), + ("[00:00.00][00:05.00][00:10.00]", [0, 5000, 10000]), + ("", []), + ], +) +def test_timestamps_parsing(lines, expected): + assert list(parse_timestamps(lines)) == expected -def test_load_from_lines(only_lyrics, metadata, lines_with_metadata): - random.shuffle(lines_with_metadata) +def test_load_from_lines(only_lyrics_unwrapped, metadata, lines_with_metadata_wrapped): + random.shuffle(lines_with_metadata_wrapped) synced_lyrics = SyncedLyrics.load_from_lines( - lines_with_metadata + [""] * 10 + lines_with_metadata_wrapped + [""] * 10 ) for key, value in metadata.items(): assert getattr(synced_lyrics, key) == value - assert synced_lyrics.lyrics == only_lyrics + assert synced_lyrics.lyrics == only_lyrics_unwrapped def test_update_metadata(sample_synced_lyrics: SyncedLyrics): @@ -118,7 +143,7 @@ def test_metadata_parsing(line, expected): @pytest.mark.parametrize( "line, expected", [ - ("[00:00.00]Foo bar", SyncedLyricLine(text="Foo bar", timestamp=0)), + ("[00:00.00] Foo bar", SyncedLyricLine(text="Foo bar", timestamp=0)), ("[00:05.00]Baz qux", SyncedLyricLine(text="Baz qux", timestamp=5000)), ( "[14:25.565]Quux quuz", @@ -127,19 +152,26 @@ def test_metadata_parsing(line, expected): ), ), ("Quux quuz", SyncedLyricLine(text="Quux quuz")), + ( + "[00:00.00][00:05.00]Foo bar", + [ + SyncedLyricLine(text="Foo bar", timestamp=0), + SyncedLyricLine(text="Foo bar", timestamp=5000), + ], + ), ], ) def test_string_parsing_lyrics(line, expected): synced_lyrics = SyncedLyrics() res = synced_lyrics.parse_str(line) - assert isinstance(res, SyncedLyricLine) + assert isinstance(res, (SyncedLyricLine, list)) assert res == expected def test_saving_to_file_no_metadata( tmp_path: Path, sample_synced_lyrics: SyncedLyrics, - only_lyrics, + only_lyrics_unwrapped, ): path = tmp_path / "foo" / "example.lrc" sample_synced_lyrics.save_to_file( @@ -148,15 +180,13 @@ def test_saving_to_file_no_metadata( assert path.exists() with path.open() as f: lines = f.read().splitlines() - assert lines == only_lyrics + assert lines == only_lyrics_unwrapped path.unlink() def test_saving_to_file_with_metadata( tmp_path: Path, sample_synced_lyrics: SyncedLyrics, - lines_with_metadata, - only_lyrics, ): path = tmp_path / "example.lrc" sample_synced_lyrics.re_name = None