In [1]:
from helpers import text_to_words_syllables_str, get_all_kjv_words_syllables_str
from pyphen import Pyphen

In [2]:
words, syllables = text_to_words_syllables_str(
    """
    But “I know whom I have believed,
    and am persuaded that he is able
    to keep that which I’ve committed
    unto him against that day.”
    """
)

print(" ".join(words))

hyphenator = Pyphen(lang="en_GB")
all_kjv_words, all_kjv_syllables = get_all_kjv_words_syllables_str()
multisyllable_words = []
for i in range(len(all_kjv_syllables)):
    if all_kjv_syllables[i] != "1":
        multisyllable_words.append((all_kjv_words[i], all_kjv_syllables[i]))

But I know whom I have believed and am persuaded that he is able to keep that which Ive committed unto him against that day


In [3]:
print(syllables[:10])

1111112113


In [4]:
def hyphenate_word(word: str, syllable_count: int | str, hyphenator: Pyphen):
    """
    Hyphenates a word based on a given syllable count.
    It first tries to use the Pyphen library. If the result does not match the
    syllable_count, it falls back to a method that divides the word into
    roughly equal parts.
    """
    # Arbitrary decision: use Pyphen(lang="en_GB") as opposed to "en_US"
    # Let Pyphen attempt to hyphenate the word first.
    hyphenated_word = hyphenator.inserted(word)

    syllable_count = int(syllable_count)

    # Check if Pyphen's hyphenation matches the desired syllable count.
    # The number of hyphens should be one less than the number of syllables.
    if hyphenated_word.count("-") == syllable_count - 1:
        return hyphenated_word
    else:
        # If Pyphen's output is not correct, use a fallback method.
        # For a single syllable word, no hyphens are needed.
        if syllable_count <= 1:
            return word

        word_len = len(word)
        step = word_len / syllable_count

        parts = []
        last_cut = 0
        for i in range(1, syllable_count):
            # Calculate the position for the next hyphen.
            cut = round(i * step)
            parts.append(word[last_cut:cut])
            last_cut = cut
        parts.append(word[last_cut:])
        return "-".join(parts)


for word, syllable_count in multisyllable_words[0:10]:
    print(f"{word}, {syllable_count}: {hyphenate_word(word, int(syllable_count), hyphenator)}")

beginning, 3: be-gin-ning
created, 3: cr-eat-ed
heaven, 2: heav-en
without, 2: with-out
darkness, 2: dark-ness
upon, 2: up-on
Spirit, 2: Spir-it
upon, 2: up-on
waters, 2: wa-ters
divided, 3: di-vid-ed


In [5]:
def validate_words_syllables_search_parameters(
    words_long: list[str],
    words_short: list[str],
    syllables_long: str,
    syllables_short: str,
) -> None:
    if len(words_long) != len(syllables_long):
        raise ValueError(
            f"Length mismatch. Must be equal: len(words_long) = {len(words_long)}, len(syllables_long) = {len(syllables_long)}"
        )
    elif len(words_short) != len(syllables_short):
        raise ValueError(
            f"Length mismatch. Must be equal: len(words_short) = {len(words_short)}, len(syllables_short) = {len(syllables_short)}"
        )
    elif len(words_long) < len(words_short):
        raise ValueError(
            f"Length mismatch. len(words_long) must be >= len(words_short): len(words_long) = {len(words_long)}, len(words_short) = {len(words_short)}"
        )
    elif len(syllables_long) < len(syllables_short):
        raise ValueError(
            f"Length mismatch. len(syllables_long) must be >= len(syllables_short): len(syllables_long) = {len(syllables_long)}, len(syllables_short) = {len(syllables_short)}"
        )


def match_results_to_text(
    words_long: list[str],
    words_short: list[str],
    syllables_long: str,
    syllables_short: str,
    index: int = 0,
    hyphenator: Pyphen = hyphenator,
):
    validate_words_syllables_search_parameters(
        words_long, words_short, syllables_long, syllables_short
    )
    output_string = ""
    for i in range(len(words_short)):
        word_long = words_long[index + i]
        word_short = words_short[i]
        syllable_long = syllables_long[index + i]
        syllable_short = syllables_short[i]
        if syllable_long != "1":
            word_long = hyphenate_word(word_long, syllable_long, hyphenator)
        if syllable_short != "1":
            word_short = hyphenate_word(word_short, syllable_short, hyphenator)
        indicator = " "
        if syllable_long != syllable_short:
            indicator = "X"
        output_string += f"{word_long:^15} {indicator} {word_short:^15} / {syllable_long} {indicator} {syllable_short}\n"
    return output_string    
    

print(match_results_to_text(all_kjv_words, words, all_kjv_syllables, syllables, 757980))

      for               But       / 1   1
       I                 I        / 1   1
     know              know       / 1   1
     whom              whom       / 1   1
       I                 I        / 1   1
     have              have       / 1   1
   be-lieved         be-lieved    / 2   2
      and               and       / 1   1
      am                am        / 1   1
  per-sua-ded       per-sua-ded   / 3   3
     that              that       / 1   1
      he                he        / 1   1
      is                is        / 1   1
     ab-le             ab-le      / 2   2
      to                to        / 1   1
     keep              keep       / 1   1
     that              that       / 1   1
     which             which      / 1   1
       I                Ive       / 1   1
     have       X   com-mit-ted   / 1 X 3
  com-mit-ted   X      un-to      / 3 X 2
     un-to      X       him       / 2 X 1
      him       X    agai-nst     / 1 X 2
   agai-nst     X      that       