In [1]:
from faker import Faker
from textwrap import wrap
import re
from typing import Generator, Iterator
from operator import itemgetter

In [2]:
fake = Faker()

---

In [4]:
word = 'research'

In [5]:
syllables = ['in','on','ou','ne','th','ar','ce','es','ng','re','ti','ing','ion','an','ed','en','er','im','io','me','ro','ss','ta']

---

# Divide word by syllables

## Syllables

### Sort syllables

In [10]:
def sortByLength(syllables: list[str]) -> list[str]:
    return sorted(sorted(syllables), key=len, reverse=True)

### Syllables in word

In [13]:
def filterSyllablesByWord(word: str, syllables: list[str]) -> Generator[str, None, None]:
    filteredSyllables = (syllable for syllable in syllables if re.search(syllable, word, re.IGNORECASE))
    
    for syllable in filteredSyllables:
        word = re.sub(syllable, ' ' * len(syllable), word, flags=re.IGNORECASE)
        yield syllable
    else:
        yield from re.findall(r'\w+', word, re.IGNORECASE)

## Indexes

### Start positions of syllables

In [17]:
def findIndexes(word: str, syllable: str) -> list[int]:
    indexes = list()
    syllableRegex = re.compile(syllable, re.IGNORECASE)
        
    while (match := syllableRegex.search(word)) is not None:
        index, _ = match.span()
        indexes.append(index)
        word = syllableRegex.sub(' ' * len(match.group()), word, 1)

    return indexes

### Index Syllable pair

In [20]:
def findIndexesOfSyllablePair(word: str, syllable: str) -> list[tuple[int, str]]:
    indexes = findIndexes(word, syllable)
    return [(index, syllable) for index in indexes]

In [22]:
def findIndexesSyllablesPairs(word: str, syllables: Iterator[str]) -> Generator[tuple[int, str], None, None]:
    for syllable in filterSyllablesByWord(word, syllables):
        yield from findIndexesOfSyllablePair(word, syllable)

## Sort and filter

In [25]:
def sortAndFilterIndexesSyllablesPairs(indexesSyllablesPairs: Iterator[tuple[int, str]]) -> list[tuple[int, str]]:
    sortedIndexesSyllablesByLength = sorted(indexesSyllablesPairs, key=itemgetter(1), reverse=True)
    sortedIndexesSyllablesByIndex = sorted(sortedIndexesSyllablesByLength, key=itemgetter(0))
    stack = list()

    for (index, syllable) in sortedIndexesSyllablesByIndex:
        try:
            (latestIndex, latestSyllable) = stack[-1]
    
            if latestIndex + len(latestSyllable) <= index:
                stack.append((index, syllable))
        except IndexError:
            stack.append((index, syllable))

    return stack

## Divide

In [28]:
def divideBy(word: str, syllables: Iterator[str]) -> list[str]:
    filteredSyllables = filterSyllablesByWord(word, syllables)
    indexesSyllablesPairs = findIndexesSyllablesPairs(word, filteredSyllables)
    sortedAndFilteredIndexesSyllablesPairs = sortAndFilterIndexesSyllablesPairs(indexesSyllablesPairs)

    return [syllable for (_, syllable) in sortedAndFilteredIndexesSyllablesPairs]

In [29]:
filteredSyllables = filter(lambda syllable: len(syllable) >= 2, sortByLength(syllables))
dividedBySyllables = divideBy(word, filteredSyllables)