In [17]:
from collections import defaultdict
from pathlib import Path
from typing import List, Optional, Dict, Tuple, Set

from pysle.isletool import LexicalTool, WordNotInISLE

isleDict = LexicalTool('ISLEdict.txt')


def get_ipa_syllables(word) -> Optional[List[List[str]]]:
    try:
        return isleDict.lookup(word)[0][0][0]
    except WordNotInISLE:
        return None

sy_file = Path('25K-syllabified-sorted-alphabetically.txt')
lines = sy_file.read_text().splitlines()

failed = 0
total = len(lines)

correspondence: Dict[str, List[Tuple[str, str]]] = defaultdict(list)

stress_markers = {'ˌ', 'ˈ'}
all_chars: Set[str] = set()
for line in lines:
    word = line.replace(';', '')
    word_morphemes = line.split(';')
    ipa_syllables = get_ipa_syllables(word)
    if ipa_syllables is None or len(ipa_syllables) != len(word_morphemes):
        failed += 1
    else:
        for i, syllable in enumerate(ipa_syllables):
            new_syllable = ''
            
            for part in syllable:
                new_part = ''
                for char in part:
                    if char not in stress_markers:
                        new_part += char
                new_syllable += new_part
            all_chars |= set(new_syllable)
            correspondence[word].append((word_morphemes[i], new_syllable))
print(all_chars)

{'w', 'v', 'ŋ', 'p', 'ɝ', 'ɑ', 'ɾ', 'b', 'ɚ', 'ɔ', 'ʊ', 'n', 'u', 'j', 'm', 'k', 'i', 'ə', 'd', 'æ', 'ɛ', 'o', 's', 'ɵ', 'e', 'ʒ', 'l', 'z', '̩', 'ʌ', 'g', 'ɹ', 'f', 'ɪ', 'h', 'ð', 'ʃ', 'a', 't'}
