In [4]:
from faker import Faker
from textwrap import wrap
from itertools import chain
from functools import reduce
from collections import defaultdict

---

In [6]:
fake = Faker()

In [7]:
word = fake.word()

---

# Divide word into syllables for search

## Spin

### Spin word

#### One spin

In [13]:
def spinWord(word: str) -> str:
    firstLetter = word[0]
    rest = word[1:]
    return ''.join([rest, firstLetter])

#### Spin around

In [15]:
def spinWordAround(word: str) -> list[str]:
    originalWord = word
    spunList = [originalWord]
    spun = spinWord(word)

    while originalWord != spun:
        spunList.append(spun)
        spun = spinWord(spun)

    return spunList

## Chunk

In [17]:
def getSyllables(word: str, length: int) -> list[str]:
    syllables = wrap(word, length)
    return list(filter(lambda syllable: len(syllable) == length, syllables))


def getDiphthongs(word: str) -> list[str]:
    return getSyllables(word, 2)


def getTriphthongs(word: str) -> list[str]:
    return getSyllables(word, 3)

## Flatten

In [19]:
def flattenSyllables(syllables: list[list[str]]) -> list[str]:
    return list(chain.from_iterable(syllables))

In [20]:
def divideIntoSyllables(word: str) -> list[str]:
    spins = spinWordAround(word)
    diphthongs = [getDiphthongs(spun) for spun in spins]
    triphthongs = [getTriphthongs(spun) for spun in spins]
    syllables = flattenSyllables([*sorted(triphthongs), *sorted(diphthongs)])
    existed = list(filter(lambda syllable: syllable in word, syllables))
    unique = reduce(lambda acc, i: acc + [i] if i not in acc else acc, existed, [])
    return unique

### Frequencies

In [22]:
def getSyllablesFrequencies(words: list[str]) -> dict[str, int]:
    syllablesFrequencies = defaultdict(int)
    
    for word in words:
        for syllable in divideIntoSyllables(word):
                syllablesFrequencies[syllable] += 1

    sortedAlphabetically = sorted(syllablesFrequencies)
    sortedByLength = sorted(sortedAlphabetically, key=len, reverse=True)
    sortedByFrequency = sorted(sortedByLength, key=syllablesFrequencies.get, reverse=True)
    syllablesFrequenciesDict = dict([(syllable, syllablesFrequencies[syllable]) for syllable in sortedByFrequency])
    
    return syllablesFrequenciesDict