Skip to content

Commit

Permalink
feat: parse multiple timestamps (#15)
Browse files Browse the repository at this point in the history
* can parse multiple timestamps from a line

* Refactor synced lyrics lines and metadata fixtures for wrapped and unwrapped lyrics
  • Loading branch information
Dr-Blank committed Feb 10, 2024
1 parent 7ded453 commit bdc8b8a
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 41 deletions.
55 changes: 36 additions & 19 deletions lrctoolbox/synced_lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import re
from pathlib import Path
from typing import Any, ClassVar
from typing import Any, ClassVar, Iterator

from lrctoolbox.exceptions import FileTypeError
from lrctoolbox.lrc_metadata import (
Expand All @@ -17,9 +17,27 @@

logger = logging.getLogger(__name__)

synced_lyrics_pattern = re.compile(r"\[(\d+:\d+.\d+)\](.*)")
synced_lyrics_pattern = re.compile(
r"(?P<timestamps>(?:\[\d+:\d+.\d+\])+)(?P<lyrics>.*)"
)
lyricist_pattern = re.compile(r"Lyricist:?\s*(.*)", re.I)
metadata_pattern = re.compile(r"\[(\w+):\s?(.*)\]")
timestamp_parsing_pattern = re.compile(r"\[(\d+):(\d+).(\d+)\]")


def parse_timestamps(timestamps: str) -> Iterator[int]:
"""Parse the timestamps from the string"""

for a_match in re.finditer(timestamp_parsing_pattern, timestamps):
ms_ = int(a_match.group(3))
# make sure the ms is 3 digits
ms_ = ms_ * 10 ** (3 - len(str(ms_)))
timestamp_in_ms = (
int(a_match.group(1)) * 60 * 1000
+ int(a_match.group(2)) * 1000
+ ms_
)
yield timestamp_in_ms


class SyncedLyrics(LRCMetadata):
Expand Down Expand Up @@ -120,7 +138,9 @@ def is_missing_any_timestamp(self) -> bool:
return any(line.timestamp is None for line in self._synced_lines)

@classmethod
def parse_str(cls, line: str) -> SyncedLyricLine | dict[str, str]:
def parse_str(
cls, line: str
) -> SyncedLyricLine | dict[str, str] | list[SyncedLyricLine]:
"""Parse a line for lyrics or lrc metadata"""
# match the lyricist
match = re.search(lyricist_pattern, line)
Expand All @@ -132,22 +152,16 @@ def parse_str(cls, line: str) -> SyncedLyricLine | dict[str, str]:
# match the synced lyrics
match = re.search(synced_lyrics_pattern, line)
if match:
timestamp, lyric = match.groups()
timestamp_pattern = re.compile(r"(\d+):(\d+).(\d+)")
match = re.search(timestamp_pattern, timestamp)
if not match:
return SyncedLyricLine(lyric.strip())

ms_ = int(match.group(3))
# make sure the ms is 3 digits
ms_ = ms_ * 10 ** (3 - len(str(ms_)))
timestamp_in_ms = (
int(match.group(1)) * 60 * 1000
+ int(match.group(2)) * 1000
+ ms_
)
# logger.debug(synced_lyrics._lines[-1])
return SyncedLyricLine(lyric.strip(), timestamp_in_ms)
timestamps, lyric = match.groups()
assert timestamps != "", f"Timestamps: {timestamps}"
logger.debug("Synced lyrics found: %s", line.encode("utf-8"))
valid_timestamps = list(parse_timestamps(timestamps))
if len(valid_timestamps) == 1:
return SyncedLyricLine(lyric.strip(), valid_timestamps.pop())
return [
SyncedLyricLine(lyric.strip(), timestamp)
for timestamp in valid_timestamps
]

# match the metadata
match = re.search(metadata_pattern, line)
Expand Down Expand Up @@ -196,6 +210,9 @@ def load_from_lines(cls, lines: list[str]) -> SyncedLyrics:
if isinstance(parsed_line, SyncedLyricLine):
synced_lyrics._synced_lines.append(parsed_line)
continue
if isinstance(parsed_line, list):
synced_lyrics._synced_lines.extend(parsed_line)
continue
synced_lyrics.update_metadata(parsed_line)

if synced_lyrics.has_timestamps_all_equal:
Expand Down
35 changes: 25 additions & 10 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
re_name = "LRCMaker"
version = "1.0.0"
author = "DrB"
synced_lyrics_lines = [
synced_lyrics_lines_wrapped = [
"[00:00.00]Foo bar",
"[00:05.00]Baz qux",
"[00:10.00][00:15.00]Quux quuz",
]
synced_lyrics_lines_unwrapped = [
"[00:00.00]Foo bar",
"[00:05.00]Baz qux",
"[00:10.00]Quux quuz",
"[00:15.00]Quux quuz",
]

lines = [
metadata_lines = [
f"[ar:{artist}]",
f"[ti:{title}]",
f"[al:{album}]",
Expand All @@ -27,17 +32,27 @@
f"[ve:{version}]",
f"[uri:{uri}]",
f"[length:{length}]",
] + synced_lyrics_lines
]


@pytest.fixture(scope="module")
def only_lyrics():
return synced_lyrics_lines
def only_lyrics_wrapped():
return synced_lyrics_lines_wrapped


@pytest.fixture(scope="module")
def only_lyrics_unwrapped():
return synced_lyrics_lines_unwrapped


@pytest.fixture
def lines_with_metadata_wrapped():
return metadata_lines + synced_lyrics_lines_wrapped


@pytest.fixture
def lines_with_metadata():
return lines
def lines_with_metadata_unwrapped():
return metadata_lines + synced_lyrics_lines_unwrapped


@pytest.fixture
Expand All @@ -56,5 +71,5 @@ def metadata():


@pytest.fixture
def sample_synced_lyrics(lines_with_metadata):
return SyncedLyrics.load_from_lines(lines_with_metadata)
def sample_synced_lyrics(lines_with_metadata_wrapped):
return SyncedLyrics.load_from_lines(lines_with_metadata_wrapped)
54 changes: 42 additions & 12 deletions tests/test_synced_lyrics.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,46 @@
from pathlib import Path
import random
from pathlib import Path

import pytest

from lrctoolbox.lrc_metadata import TrackMetadata
from lrctoolbox.synced_lyric_line import SyncedLyricLine
from lrctoolbox.synced_lyrics import SyncedLyrics
from lrctoolbox.synced_lyrics import SyncedLyrics, parse_timestamps


@pytest.mark.parametrize(
"lines, expected",
[
(
"[00:00.00]",
[
0,
],
),
("[00:00.00][00:05.00]", [0, 5000]),
(
"[00:05.00]",
[
5000,
],
),
("[14:25.565]", [14 * 60 * 1000 + 25 * 1000 + 565]),
("[00:00.00][00:05.00][00:10.00]", [0, 5000, 10000]),
("", []),
],
)
def test_timestamps_parsing(lines, expected):
assert list(parse_timestamps(lines)) == expected


def test_load_from_lines(only_lyrics, metadata, lines_with_metadata):
random.shuffle(lines_with_metadata)
def test_load_from_lines(only_lyrics_unwrapped, metadata, lines_with_metadata_wrapped):
random.shuffle(lines_with_metadata_wrapped)
synced_lyrics = SyncedLyrics.load_from_lines(
lines_with_metadata + [""] * 10
lines_with_metadata_wrapped + [""] * 10
)
for key, value in metadata.items():
assert getattr(synced_lyrics, key) == value
assert synced_lyrics.lyrics == only_lyrics
assert synced_lyrics.lyrics == only_lyrics_unwrapped


def test_update_metadata(sample_synced_lyrics: SyncedLyrics):
Expand Down Expand Up @@ -118,7 +143,7 @@ def test_metadata_parsing(line, expected):
@pytest.mark.parametrize(
"line, expected",
[
("[00:00.00]Foo bar", SyncedLyricLine(text="Foo bar", timestamp=0)),
("[00:00.00] Foo bar", SyncedLyricLine(text="Foo bar", timestamp=0)),
("[00:05.00]Baz qux", SyncedLyricLine(text="Baz qux", timestamp=5000)),
(
"[14:25.565]Quux quuz",
Expand All @@ -127,19 +152,26 @@ def test_metadata_parsing(line, expected):
),
),
("Quux quuz", SyncedLyricLine(text="Quux quuz")),
(
"[00:00.00][00:05.00]Foo bar",
[
SyncedLyricLine(text="Foo bar", timestamp=0),
SyncedLyricLine(text="Foo bar", timestamp=5000),
],
),
],
)
def test_string_parsing_lyrics(line, expected):
synced_lyrics = SyncedLyrics()
res = synced_lyrics.parse_str(line)
assert isinstance(res, SyncedLyricLine)
assert isinstance(res, (SyncedLyricLine, list))
assert res == expected


def test_saving_to_file_no_metadata(
tmp_path: Path,
sample_synced_lyrics: SyncedLyrics,
only_lyrics,
only_lyrics_unwrapped,
):
path = tmp_path / "foo" / "example.lrc"
sample_synced_lyrics.save_to_file(
Expand All @@ -148,15 +180,13 @@ def test_saving_to_file_no_metadata(
assert path.exists()
with path.open() as f:
lines = f.read().splitlines()
assert lines == only_lyrics
assert lines == only_lyrics_unwrapped
path.unlink()


def test_saving_to_file_with_metadata(
tmp_path: Path,
sample_synced_lyrics: SyncedLyrics,
lines_with_metadata,
only_lyrics,
):
path = tmp_path / "example.lrc"
sample_synced_lyrics.re_name = None
Expand Down

0 comments on commit bdc8b8a

Please sign in to comment.