In [110]:
%reload_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
from conlanger.tools import SyllableStructures, Lexicon



In [111]:
data = np.load("./data/generated_languages.npz", allow_pickle=True)
generated_languages = np.squeeze(data["generated_languages"])
weirdness = data["weirdness"]

l = generated_languages.shape[0]

most_different = np.argmax(weirdness)
least_different = np.argmin(weirdness)

generated_cons_pl = generated_languages[:, 0:14, 0:24]
generated_cons_npl = generated_languages[:, 15:22, 0:10]
generated_cons_coart = generated_languages[:, 15:18, 10:14]
generated_vowels = generated_languages[:, 15:22, 14:24]

generated_languages.shape, generated_cons_pl.shape, generated_cons_npl.shape, generated_cons_coart.shape, generated_vowels.shape

((100, 24, 24), (100, 14, 24), (100, 7, 10), (100, 3, 4), (100, 7, 10))

In [112]:
# load the phoneme table dataframes

cons_pl_tbl_df = pd.read_csv(
    "./data/consonants_plumonic.csv",
    dtype=str,
    index_col=[0],
    keep_default_na=False,
    na_values=["-1"],
    on_bad_lines="warn",
)

cons_npl_tbl_df = pd.read_csv(
    "./data/consonants_non_plumonic.csv",
    dtype=str,
    index_col=[0],
    keep_default_na=False,
    na_values=["-1"],
)

cons_coart_tbl_df = pd.read_csv(
    "./data/consonants_coarticulated.csv",
    dtype=str,
    index_col=[0],
    keep_default_na=False,
    na_values=["-1"],
)

vowels_tbl_df = pd.read_csv(
    "./data/vowels.csv",
    dtype=str,
    index_col=[0],
    keep_default_na=False,
    na_values=["-1"],
)

cons_pl_tbl_df.shape, cons_npl_tbl_df.shape, cons_coart_tbl_df.shape, vowels_tbl_df.shape

((14, 24), (7, 10), (3, 4), (7, 10))

In [113]:
def map_generated_to_phonemes(tbl):
    def mapper(lang):
        return np.where(lang > 0, tbl.replace(np.nan, "*").replace("", "*"), "")
    return mapper
    
generated_cons_pl_phonemes = np.array([map_generated_to_phonemes(cons_pl_tbl_df)(p) for p in generated_cons_pl])
generated_cons_npl_phonemes = np.array([map_generated_to_phonemes(cons_npl_tbl_df)(p) for p in generated_cons_npl])
generated_cons_coart_phonemes = np.array([map_generated_to_phonemes(cons_coart_tbl_df)(p) for p in generated_cons_coart])
generated_vowels_phonemes = np.array([map_generated_to_phonemes(vowels_tbl_df)(p) for p in generated_vowels])

# display(pd.DataFrame(generated_cons_pl_phonemes[5], index=cons_pl_tbl_df.index, columns=cons_pl_tbl_df.columns))
# display(pd.DataFrame(generated_cons_npl_phonemes[5], index=cons_npl_tbl_df.index, columns=cons_npl_tbl_df.columns))
# display(pd.DataFrame(generated_cons_coart_phonemes[5], index=cons_coart_tbl_df.index, columns=cons_coart_tbl_df.columns))
# display(pd.DataFrame(generated_vowels_phonemes[5], index=vowels_tbl_df.index, columns=vowels_tbl_df.columns))

generated_cons_pl_phonemes.shape, generated_cons_npl_phonemes.shape, generated_cons_coart_phonemes.shape, generated_vowels_phonemes.shape

((100, 14, 24), (100, 7, 10), (100, 3, 4), (100, 7, 10))

In [114]:
consonants = [[c for c in np.unique(cs) if c != ""] for  cs in np.hstack([
    generated_cons_pl_phonemes.reshape(l, -1),
    generated_cons_npl_phonemes.reshape(l, -1),
    generated_cons_coart_phonemes.reshape(l, -1),
])]

vowels = [[v for v in np.unique(vs) if v != ""] for  vs in generated_vowels_phonemes.reshape(l, -1)]
glides = [[g for g in np.unique(gs) if g != ""] for gs in np.hstack([generated_cons_pl_phonemes[:, 6, :], generated_cons_coart_phonemes[:, 1, :]])]
nasals = [[g for g in np.unique(gs) if g != ""] for gs in generated_cons_pl_phonemes[:, 0, :]]

for x in [0.01, 0.49, 0.99]:
    idx = np.abs(weirdness-x).argmin()
    print(f"weirdness: {weirdness[idx]}")
    print(f"consonants: {consonants[idx]}")
    print(f"vowels: {vowels[idx]}")
    print(f"glides: {glides[idx]}")
    print(f"nasals: {nasals[idx]}\n")

weirdness: 0.0
consonants: ['b', 'd', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't̠', 'w', 'x', 'ŋ', 'ɡ', 'ɹ', 'ʎ']
vowels: ['i', 'u', 'ə']
glides: ['j', 'w', 'ɹ']
nasals: ['m', 'n', 'ŋ']

weirdness: 0.5
consonants: ['b', 'd', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'ts', 'w', 'z', 'ŋ', 'ɡ', 'ɾ', 'ʃ']
vowels: ['a', 'e', 'i', 'o', 'u', 'ə', 'ɛ']
glides: ['j', 'w']
nasals: ['m', 'n', 'ŋ']

weirdness: 1.0
consonants: ['b', 'd', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'ts', 'v', 'w', 'z', 'ŋ', 'ɟ', 'ɡ', 'ɲ', 'ɾ', 'ʃ', 'ʔ']
vowels: ['a', 'e', 'i', 'o', 'u', 'ə', 'ɛ']
glides: ['j', 'w']
nasals: ['m', 'n', 'ŋ', 'ɲ']



In [115]:
word_list_df = pd.read_csv(
    "./data/uld/annotated_word_list.csv",
    encoding="utf-8",
    engine="python",
    index_col=[0],
    dtype={"usage": list},
)

word_list = word_list_df[
    (word_list_df.v == 1)
    | (word_list_df.n == 1)
    | (word_list_df.adj == 1)
    | (word_list_df.adv == 1)
][["word", "topic", "v", "n", "adj", "adv"]].rename(columns={"word": "meaning"})

word_list.head(5)

Unnamed: 0,meaning,topic,v,n,adj,adv
27,be,function words,True,False,False,False
28,become,function words,True,False,False,False
29,cause,function words,True,False,False,False
30,make,function words,True,False,False,False
31,do,function words,True,False,False,False


In [121]:
for x in [0.1, 0.5, 0.9]:
    idx = np.abs(weirdness - x).argmin()
    ss = SyllableStructures()
    structure = ss.pick(x)
    print(f"weirdness: {x}")
    print(f"{structure}")

    lex = Lexicon(
        syllable_structure=structure,
        word_list=word_list.to_dict(
            orient="records"
        ),
        consonants=consonants[idx],
        vowels=vowels[idx],
        glides=glides[idx],
        nasals=nasals[idx],
    )

    syllable = lex.create_syllable()
    print(f"syllable: {syllable}")

    word = lex.create_word()
    print(f"word: {word}")

    lexicon = lex.create()
    print(f"lexicon: {str(lexicon)}")

    print("\n")

weirdness: 0.1
(C)V(C)(C)
syllable: mimj
word: ə
lexicon: ŋaɹa,alt,əsuh,tənp,ləl,uluʎ,bit,hifw,ujhnitsib,ɹifidə,malr,bəunts,idbaɹɡif,akuŋts,ɡaf,aə,a,ind,ar,kiwsijtsŋəm,il,jius,əsr,hifŋjəbi,iwətnwuŋ,ətsdal,uʎ,ʎiats,paiɹnimm,sirpəw,uwɹu,ŋuhfət,tsəmrbuwafb,ikt,tapə,aɹfiŋkiɹ,fiwrifu,manɡ,uhddaɹŋəw,ulpəɹɡ,wiid,pib,pifhəʎik,nuwɡ,ləsəkaʎr,kuulʎəsb,iʎ,wuap,ij,idaspa,abutsəlts,mibhats,rusniŋəwɡ,wiŋ,nudj,ɡipaŋpaj,tsəs,itmsifts,wukjəts,ɹətə,wətuiɡ,jəpp,faŋ,in,nitu,ubkbin,ub,təwwi,ut,ŋaʎiə,ajil,liɡibfi,ikljakhaɹ,tsan,amhɡəkdlə,iŋf,ləpipfap,i,unuɡfaw,uiʎf,ud,unkikbi,kəɹn,i,fiwjif,ɹurimɡhəɡ,təwfək,uɹf,həsəd,rad,aɡu,siɡbumŋə,baliwf,kubd,ɹiwjdaad,pilɹiɹ,ŋitbɡas,ŋaɡjuɹ,amiŋ,bitət,uəbə,an,tsip,af,ədkəp,uŋutt,ibəb,isŋə,dishuut,auʎur,luŋɹu,itməɹtsku,ij,uɹwum,fi,ər,uswfaŋdənf,mədal,timatsddaŋ,əkduə,tsəɹɡ,aŋitjtsat,i,uɡpəɡdis,əɡɡuw,pabujts,ur,fəfəʎətɹ,uməhɹus,publwab,kanhəwp,ak,aibʎup,anah,əɹhtaəhh,ufifna,tsəwlfə,hihfər,ru,aisəd,asbəktat,usm,əɡ,əbhlidɹun,islmən,wihŋtsatj,ilifʎ,ə,ɹəwrəŋ,uahuwk,umat,ʎəl,jajl,