# English: sound test and plot
- This is the final output whose outcome should be put in the final paper. 
- For each trial, a pair / group of sounds will be plotted in a 3D space, and the Mahalanobis distances will be calculated. Although mostly significant, the Hotelling's Ttest will also be conducted. 

## Preparation

### Connect to Drive

In [1]:
# Mount Google Drive: connect to google drive storage
# Should be changed to other codes when using HPC or running locally. 
from google.colab import drive
drive.mount('/content/drive')

# Specify directory of course materials in Google Drive
main_dir = '/content/drive/My Drive/FeatureLearning/'

Mounted at /content/drive


### Import Libs

In [2]:
! pip install pingouin

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pingouin
  Downloading pingouin-0.5.3-py3-none-any.whl (198 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.6/198.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting pandas-flavor>=0.2.0
  Downloading pandas_flavor-0.5.0-py3-none-any.whl (7.1 kB)
Collecting outdated
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7048 sha256=c88b98621d54003af1769c7d0f3c384604f64175854892193d06a57ca43883e9
  Stored in directory: /root/.cache/pip/wheels/04/bb/0d/2d02ec45f29c48d6192476bfb59c5a0e64b605e7212374dd15
Successfully built littleut

In [3]:
import numpy as np
import random
import pickle
import scipy.stats as st
from itertools import combinations
from scipy.spatial import distance
import matplotlib.pyplot as plt
import pingouin as pg
import pandas as pd
import plotly.express as px
import torch

### Define paths to be used

In [4]:
#################################################################
#             Directory              #
#################################################################

root_dir = "/content/"
base_dir = "/content/drive/My Drive/"
working_dir = base_dir + "FeatureLearning/"

save_dir = working_dir + "modelsave/english/"

data_out_dir = working_dir + "plotresults/english/"

translator_name = working_dir + "mapper/english_to_feats.xlsx"

### Read 
Read in the model generated hidden representations of the unseen chunks and the corresponding tags. 

In [5]:
hidden_name = save_dir + "english_sounddist_hidden_mt.pkl"
tags_name = save_dir + "english_sounddist_tags_mt.pkl"

with open(hidden_name, 'rb') as f:
    hiddens = pickle.load(f)
with open(tags_name, 'rb') as f:
    tags = pickle.load(f)

### Functions
Define functions and classes to be used for plotting and evaluating the sounds' hidden representations

In [6]:
def random_get(hids, tags, intend=300): 
    # for randomly picking tokens from a given pool
    intended_size = intend
    size, _ = hids.shape
    if size < intended_size: 
        return hids, tags
    choices = np.random.choice(size, intended_size, replace=False)
    return np.take(hids, choices, axis=0), np.take(tags, choices, axis=0)

In [7]:
def get_segs_and_tags(seg, hiddens, tags): 
    # pick out the corresponding 
    wheres = np.where(tags==seg)[0]
    return np.take(hiddens, wheres, axis=0), np.take(tags, wheres, axis=0)

In [8]:
def dist_calc(a, b): 
    ca = np.cov(a.T)
    cb = np.cov(b.T)
    iv = np.linalg.inv((ca + cb) / 2)
    dist = distance.mahalanobis(np.average(a, axis=0), np.average(b, axis=0),iv)
    return dist

In [9]:
def maha_dist(a, b): 
    iv = np.linalg.inv(np.cov(np.concatenate((a, b), axis=0).T))
    dist = distance.mahalanobis(np.average(a, axis=0), np.average(b, axis=0),iv)
    return dist

In [10]:
def tests(a, b): 
    t2 = pg.multivariate_ttest(a, b)
    dist = dist_calc(a, b)
    return t2, dist


In [11]:
def a_bunch_of_tests(bunch, names): 
    bunch_ress = {}
    combs = combinations(list(range(len(bunch))), 2)
    for i, j in combs:
        res = tests(bunch[i], bunch[j])
        bunch_ress[(names[i], names[j])] = res
    return bunch_ress

In [12]:
def pca_ttest(Xt, y, divs): 
    these = sep_them(Xt, y, divs)
    bunch_ress = {}
    combs = combinations(list(range(len(divs))), 2)
    for i, j in combs: 
        res = st.ttest_ind(these[i], these[j])
        bunch_ress[(divs[i], divs[j])] = res
    return bunch_ress


In [13]:
def print_ress(ress): 
    outs = None
    for pair in ress.keys(): 
        t2_ori, dist = ress[pair]
        t2 = t2_ori.copy()
        t2.insert(0, "j", [pair[1]], allow_duplicates=True)
        t2.insert(0, "i", [pair[0]], allow_duplicates=True)
        t2.insert(len(t2.columns), "dist", [dist], allow_duplicates=True)
        if outs is None: 
            outs = t2
        else: 
            outs = pd.concat([outs, t2])
    return outs

In [14]:
def get_these_data(hiddens, tags, these, intend, test=False): 
    these_dict = {k: v for v, k in enumerate(these)}
    these_hids, these_tags = [], []
    nums = []
    for this in these: 
        this_hids, this_tags = get_segs_and_tags(this, hiddens, tags)
        this_hids, this_tags = random_get(this_hids, this_tags, intend=intend)
        these_hids.append(this_hids)
        these_tags.append(this_tags)
        nums.append(this_tags.shape[0])
    these_hids_together, these_tags_together = np.concatenate(these_hids, axis=0), np.concatenate(these_tags, axis=0)
    these_numtags = [these_dict[tag] for tag in these_tags_together]
    ress = None
    if test: 
        ress = a_bunch_of_tests(these_hids, these)
    return these_hids_together, np.array(these_numtags), ress, nums


In [15]:
def get_these_data_by_group(hiddens, tags, these_groups, group_names, intend, test=False, control_total=False): 
    group_names_dict = {k: v for v, k in enumerate(group_names)}
    these_group_hids, these_group_tags = [], []
    nums = []
    for idx, group_name in enumerate(group_names): 
        this_group = these_groups[idx]
        if control_total: 
            adj_intend = intend // len(this_group)
        else: 
            adj_intend = intend
        this_group_hids, this_group_tags, _, _ = get_these_data(hiddens, tags, this_group, adj_intend, test=False)
        this_group_tags = np.full(this_group_tags.shape, group_name)
        these_group_hids.append(this_group_hids)
        these_group_tags.append(this_group_tags)
        nums.append(this_group_tags.shape[0])
    these_group_hidstogether, these_group_tagstogether = np.concatenate(these_group_hids, axis=0), np.concatenate(these_group_tags, axis=0)
    these_group_numtags = [group_names_dict[tag] for tag in these_group_tagstogether]
    ress = None
    if test: 
        ress = a_bunch_of_tests(these_group_hids, group_names)
    return these_group_hidstogether, np.array(these_group_numtags), ress, nums


In [16]:
def sep_them(these_hids, these_numtags, these): 
    sep_these_hids = []
    for i in range(len(these)): 
        this_hid, _ = get_segs_and_tags(i, these_hids, these_numtags)
        sep_these_hids.append(this_hid)
    return sep_these_hids

In [17]:
def minmax(arr, a=-1, b=1): 
    min = arr.min()
    max = arr.max()
    return (b - a) * ((arr - min) / (max - min)) + a

In [18]:
def operate_on(arr): 
    return minmax(arr)

In [19]:
def framify(these_hids, these_numtags, these, t=None): 
    # these are token categories to be included
    # these hids are the corresponding hids
    # these numtags are the corresponding tags, named using indices in these
    # these_hids = st.zscore(these_hids, axis=0)
    df = pd.DataFrame(data=these_hids)
    df = df.rename(columns={0: "dim_0", 1: "dim_1", 2: "dim_2"})
    df['dim_0_norm'] = operate_on(df['dim_0'])
    df['dim_1_norm'] = operate_on(df['dim_1'])
    df['dim_2_norm'] = operate_on(df['dim_2'])

    # df['dim_0_zscore'] = df['dim_0']
    # df['dim_1_zscore'] = df['dim_1']
    # df['dim_2_zscore'] = df['dim_2']

    df["Numtag"] = these_numtags
    tags = [these[i] for i in these_numtags]
    df["Tag"] = tags
    if t: 
        ipas = list(map(t.translate, tags))
    else: 
        ipas = tags
    df["IPA"] = ipas
    sizes = [2 for i in these_numtags]
    df["Size"] = sizes
    return df

In [20]:
def framify_group(these_hids, these_numtags, these, t=None): 
    # these are token categories to be included
    # these hids are the corresponding hids
    # these numtags are the corresponding tags, named using indices in these
    # these_hids = st.zscore(these_hids, axis=0)
    df = pd.DataFrame(data=these_hids)
    df = df.rename(columns={0: "dim_0", 1: "dim_1", 2: "dim_2"})
    df['dim_0_norm'] = operate_on(df['dim_0'])
    df['dim_1_norm'] = operate_on(df['dim_1'])
    df['dim_2_norm'] = operate_on(df['dim_2'])

    # df['dim_0_zscore'] = df['dim_0']
    # df['dim_1_zscore'] = df['dim_1']
    # df['dim_2_zscore'] = df['dim_2']

    df["Numtag"] = these_numtags
    tags = [these[i] for i in these_numtags]
    df["Tag"] = tags
    sizes = [2 for i in these_numtags]
    df["Size"] = sizes
    return df

In [21]:
def plot_scat(these_hids, these_numtags, these, x=0, y=1): 
    plt.figure(figsize=(24, 16))
    plot = plt.scatter(these_hids[:,x], these_hids[:,y], c=these_numtags, alpha=0.5)
    plt.xlabel('PC-' + str(x), fontsize=24)
    plt.ylabel('PC-' + str(y), fontsize=24)
    plt.legend(handles=plot.legend_elements()[0], labels=these, prop={'size': 24})
    plt.title("Scatter plot of sounds along principle components", loc='center', fontsize=36)
    plt.show()

In [22]:
def plot_dense(these_hids, these_numtags, these, x=0): 
    sep_these = sep_them(these_hids, these_numtags, these)
    # colors = [("#"+''.join([random.choice('ABCDEF0123456789') for i in range(6)])) for j in range(len(these))]
    prop_cycle = plt.rcParams['axes.prop_cycle']
    colors = prop_cycle.by_key()['color']
    plt.figure(figsize=(24, 16))
    plots = []
    for idx, this in enumerate(sep_these): 
        density = gaussian_kde(this[:,x])
        density.covariance_factor = lambda : .25
        density._compute_covariance()
        xs = np.linspace(-0.75, 0.75, 300)
        plot = plt.fill_between(xs,density(xs),  color=colors[idx], alpha=0.4)
        plots.append(plot)
    plt.legend(plots,
        these, prop={'size': 24})
    plt.xlabel('PC-' + str(x), fontsize=24)
    plt.title("Density of sounds along a principle component", loc='center', fontsize=36)
    plt.show()

In [23]:
def plot3d(X, y, these, t=None): 
    df = framify(X, y, these, t)
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'filename': 'custom_image',
        'height': 1280,
        'width': 1280,
        'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
    }
    }
    fig = px.scatter_3d(df, x="dim_0_norm", y="dim_1_norm", z="dim_2_norm",
                color='IPA')
    fig.update_traces(marker=dict(size=2),
                    selector=dict(mode='markers'))
    fig.update_layout(
        scene = dict(
            xaxis = dict(nticks=8, range=[-1,1],),
                        yaxis = dict(nticks=8, range=[-1,1],),
                        zaxis = dict(nticks=8, range=[-1,1],),),)
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_layout(legend_title_text='Phone')
    fig.update_layout(
        legend=dict(
            x=0,
            y=1,
            title_font_family="Times New Roman",
            font=dict(
                family="Times New Roman",
                size=36,
                color="black"
            ),
            # bgcolor="LightSteelBlue",
            bordercolor="Black",
            borderwidth=1
        )
    )
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
    )
    camera = dict(
        eye=dict(x=0., y=0., z=2.5)
    )
    fig.update_layout(scene_camera=camera)
    html_plot = fig.to_html(full_html=False, config=config)
    # fig.show(config=config)
    return html_plot

In [24]:
def plot3dGroup(X, y, these, t=None): 
    df = framify_group(X, y, these, t)
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'filename': 'custom_image',
        'height': 1280,
        'width': 1280,
        'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
    }
    }
    fig = px.scatter_3d(df, x="dim_0_norm", y="dim_1_norm", z="dim_2_norm",
                color='Tag')
    fig.update_traces(marker=dict(size=2),
                    selector=dict(mode='markers'))
    fig.update_layout(
        scene = dict(
            xaxis = dict(nticks=8, range=[-1,1],),
                        yaxis = dict(nticks=8, range=[-1,1],),
                        zaxis = dict(nticks=8, range=[-1,1],),),)
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_layout(legend_title_text='Class')
    fig.update_layout(
        legend=dict(
            x=0,
            y=1,
            title_font_family="Times New Roman",
            font=dict(
                family="Times New Roman",
                size=36,
                color="black"
            ),
            # bgcolor="LightSteelBlue",
            bordercolor="Black",
            borderwidth=1
        )
    )
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
    )
    camera = dict(
        eye=dict(x=0., y=0., z=2.5)
    )
    fig.update_layout(scene_camera=camera)
    html_plot = fig.to_html(full_html=False, config=config)
    # fig.show(config=config)
    return html_plot

In [25]:
def save_table(total_out_file, outcome, nums, these): 
    these_in_name = "".join(these)
    outcome.to_excel("{}test_{}.xlsx".format(data_out_dir, these_in_name))
    with open(total_out_file, "a", encoding="utf-8") as of: 
        of.write("{title:-^64}\n".format(title=these_in_name))
        of.write("Counts: {}\n".format(" | ".join(map(str, nums))))
        of.write(outcome.to_string())
        of.write("\n\n")

In [26]:
def pairwise_distance_stats(df):
    """
    Calculates average, median, standard deviation, maximum, and minimum pairwise distances from a DataFrame with columns "i", "j", and "dist".
    """
    avg_dist = np.mean(df['dist'])
    med_dist = np.median(df['dist'])
    sd_dist = np.std(df['dist'])
    max_dist = np.max(df['dist'])
    min_dist = np.min(df['dist'])
    
    return avg_dist, med_dist, sd_dist, max_dist, min_dist

In [27]:
def pairwise_distance_table(avg_dist, med_dist, sd_dist, max_dist, min_dist):
    """
    Generates an HTML table with the average, median, standard deviation, maximum, and minimum pairwise distances from a DataFrame with columns "i", "j", and "dist".
    """
    # Generate HTML table
    html_code = "<table border='1'>\n"
    html_code += "<tr><th>Statistic</th><th>Value</th></tr>\n"
    html_code += "<tr><td>Average pairwise distance:</td><td>{:.2f}</td></tr>\n".format(avg_dist)
    html_code += "<tr><td>Median pairwise distance:</td><td>{:.2f}</td></tr>\n".format(med_dist)
    html_code += "<tr><td>Standard deviation of pairwise distances:</td><td>{:.2f}</td></tr>\n".format(sd_dist)
    html_code += "<tr><td>Maximum pairwise distance:</td><td>{:.2f}</td></tr>\n".format(max_dist)
    html_code += "<tr><td>Minimum pairwise distance:</td><td>{:.2f}</td></tr>\n".format(min_dist)
    html_code += "</table>\n"

    return html_code

In [28]:
class Maker: 
    def __init__(self, hiddens, tags, translator=None): 
        self.hiddens = hiddens
        self.tags = tags
        self.translator = translator
    
    def get_the_things(self, these, intend=3000): 
        these_ipas = list(map(self.translator.translate, these))
        these_in_name = "_".join(these)
        outname = "{}{}.html".format(data_out_dir, these_in_name)

        X, y, ress, nums = get_these_data(self.hiddens, self.tags, these, intend=intend, test=True)
        print(nums)
        outtable = print_ress(ress).to_html()
        outhtml = plot3d(X, y, these, t=self.translator)

        with open(outname, "w") as f: 
            f.write('<meta charset="UTF-8">')
            f.write("<h3>Phones: {}</h3>".format(", ".join(these)))
            f.write("<h3>IPA: {}</h3>".format(", ".join(these_ipas)))
            f.write("<h3>Counts: {}</h3>".format(", ".join(map(str, nums))))
            f.write("<hr>")
            f.write(outtable)
            f.write("<hr>")
            f.write(outhtml)

In [29]:
class MakerStat: 
    def __init__(self, hiddens, tags, translator=None): 
        self.hiddens = hiddens
        self.tags = tags
        self.translator = translator
    
    def get_the_things(self, these, intend=3000): 
        these_ipas = list(map(self.translator.translate, these))
        these_in_name = "_".join(these)
        outname = "{}{}.html".format(data_out_dir, these_in_name)

        X, y, ress, nums = get_these_data(self.hiddens, self.tags, these, intend=intend, test=True)
        print(nums)
        pr = print_ress(ress)
        outtable = pr.to_html()
        avg, med, std, max, min = pairwise_distance_stats(pr)
        outstattable = pairwise_distance_table(avg, med, std, max, min)
        outhtml = plot3d(X, y, these, t=self.translator)

        with open(outname, "w") as f: 
            f.write('<meta charset="UTF-8">')
            f.write("<h3>Phones: {}</h3>".format(", ".join(these)))
            f.write("<h3>IPA: {}</h3>".format(", ".join(these_ipas)))
            f.write("<h3>Counts: {}</h3>".format(", ".join(map(str, nums))))
            f.write("(Test table using orthography, plot legend using IPA)")
            f.write("<hr>")
            f.write(outtable)
            f.write("<hr>")
            f.write(outstattable)
            f.write("<hr>")
            f.write(outhtml)
        return pr

In [41]:
def to_t_test(df1, df2): 
    g1 = df1['dist']
    g2 = df2['dist']
    t_stat, p_value = st.ttest_ind(g1, g2)
    return t_stat, p_value

In [43]:
class MakerGroup: 
    def __init__(self, hiddens, tags, translator=None): 
        self.hiddens = hiddens
        self.tags = tags
        self.translator = translator
    
    def get_the_things(self, groupnames, these, intend=3000): 
        to_print_these_ipas = ""
        these_ipas = []
        for group in these: 
            to_print_these_ipas += "/"
            this = list(map(self.translator.translate, group))
            to_print_these_ipas += "/, /".join(this)
            to_print_these_ipas += "/; "
            these_ipas.append(this)

        these_in_name = "_".join(groupnames)
        outname = "{}{}.html".format(data_out_dir, these_in_name)

        X, y, ress, nums = get_these_data_by_group(self.hiddens, self.tags, these, groupnames, intend=intend, test=True)
        print(nums)
        outtable = print_ress(ress).to_html()
        outhtml = plot3dGroup(X, y, groupnames, t=self.translator)

        with open(outname, "w") as f: 
            f.write('<meta charset="UTF-8">')
            f.write("<h3>Phones: {}</h3>".format(", ".join(groupnames)))
            f.write("<h3>IPA: {}</h3>".format(to_print_these_ipas))
            f.write("<h3>Counts: {}</h3>".format(", ".join(map(str, nums))))
            f.write("(Test table using orthography, plot legend using IPA)")
            f.write("<hr>")
            f.write(outtable)
            f.write("<hr>")
            f.write(outhtml)

In [44]:
class Translator: 
    def __init__(self, filename): 
        mapperdict = pd.read_excel(filename)[["symbol", "ipa"]].to_dict()
        self.symbols = mapperdict["symbol"]
        self.ipa = mapperdict["ipa"]
        self.locator = {v: k for k, v in self.symbols.items()}
    
    def translate(self, s): 
        if s in self.locator.keys(): 
            idx = self.locator[s]
            return self.ipa[idx]
        else: 
            return "0"

## Running on diffrent groups

In [45]:
t = Translator(translator_name)

In [46]:
mk = Maker(hiddens, tags, translator=t)

In [47]:
mkst = MakerStat(hiddens, tags, translator=t)

In [48]:
mkgp = MakerGroup(hiddens, tags, translator=t)

### pairs

In [None]:
these = ["n", "l"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "t"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["g", "k"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["jh", "ch"]
mk.get_the_things(these, intend=1900)

[1809, 1900]


In [None]:
these = ["dh", "th"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["zh", "sh"]
mk.get_the_things(these, intend=500)

[461, 500]


In [None]:
these = ["z", "s"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["b", "p"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["v", "f"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "jh"]
mk.get_the_things(these, intend=1800)

[1800, 1800]


In [None]:
these = ["t", "ch"]
mk.get_the_things(these, intend=2000)

[2000, 1944]


In [None]:
these = ["p", "f"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["t", "th"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["t", "s"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["t", "sh"]
mk.get_the_things(these, intend=2500)

[2500, 2413]


In [None]:
these = ["b", "v"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "dh"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "z"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "zh"]
mk.get_the_things(these, intend=500)

[500, 461]


In [None]:
these = ["sh", "ch"]
mk.get_the_things(these, intend=2100)

[2100, 1944]


In [None]:
these = ["zh", "jh"]
mk.get_the_things(these, intend=500)

[461, 500]


In [None]:
these = ["b", "m"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["d", "n"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


In [None]:
these = ["g", "ng"]
mk.get_the_things(these, intend=3000)

[3000, 3000]


### list

In [None]:
these = ["b", "d", "g"]
mk.get_the_things(these, intend=3000)

[3000, 3000, 3000]


In [None]:
these = ["p", "t", "k"]
mk.get_the_things(these, intend=3000)

[3000, 3000, 3000]


In [None]:
these = ["n", "l", "dx", "r"]
mk.get_the_things(these, intend=3000)

[3000, 3000, 3000, 3000]


In [None]:
these = ["m", "n", "ng"]
mk.get_the_things(these, intend=3000)

[3000, 3000, 3000]


In [None]:
these = ["ae", "eh", "uh", "ih"]
mk.get_the_things(these, intend=3000)

[3000, 3000, 2575, 3000]


### All tokens (for calculating average dist)

In [54]:
# all vowels (including mono and diphthongs)
# max 3000
these = ['aa', 'ae', 'ay', 'aw', 'ao', 'oy', 'ow', 'eh', 'ey', 'er', 'ah', 'uw', 'uh', 'ih', 'iy']
prmd = mkst.get_the_things(these, intend=3000)

[3000, 3000, 3000, 1803, 3000, 207, 3000, 3000, 3000, 3000, 3000, 3000, 2575, 3000, 3000]


In [55]:
# all monophthongs
# max 3000
these = ['aa', 'ae', 'ao', 'eh', 'er', 'ah', 'uw', 'uh', 'ih', 'iy']
prm = mkst.get_the_things(these, intend=3000)

[3000, 3000, 3000, 3000, 3000, 3000, 3000, 2575, 3000, 3000]


In [56]:
# all consonants
# max 3000
these = ['m', 'n', 'ng', 'l', 'dx', 'nx', 'tq', 't', 'd', 'ch', 'jh', 'th', 'dh', 'sh', 'zh', 's', 'z', 'k', 'g', 'p', 'b', 'f', 'v']
prc = mkst.get_the_things(these, intend=3000)

[3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 1944, 1809, 3000, 3000, 2413, 461, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000]


In [57]:
to_t_test(prmd, prc)

(-2.6084814404318037, 0.009477675382234936)

In [58]:
to_t_test(prm, prc)

(-1.6136229401967692, 0.10767476607335544)

### Features

In [None]:
these_groups = [['d', 'jh', 'dh', 'zh', 'z', 'g', 'b', 'v'], ['t', 'ch', 'th', 'sh', 's', 'k', 'p', 'f']]
group_names = ["+voice", "-voice"]
mkgp.get_the_things(group_names, these_groups, intend=3000)

[20270, 22357]


In [None]:
these_groups = [["ae", "eh", "ey", "ih", "iy"], ["ao", "oy", "ow", "ah", "uw", "uh"]]
group_names = ["front", "back"]
mkgp.get_the_things(group_names, these_groups, intend=3000)

[15000, 14782]


In [None]:
these_groups = [["ey", "ih", "iy", "oy", "ow", "uw", "uh"], ["ao", "ah", "ae", "eh"]]
group_names = ["+high", "-high"]
mkgp.get_the_things(group_names, these_groups, intend=3000)

[17782, 12000]


In [None]:
these_groups = [["s", "z", "sh", "zh", "ch", "jh", "f", "v"], ["t", "g", "k", "d", "b", "p", "hh", "m", "n", "l", "r", "j", "w"]]
group_names = ["+strident", "-strident"]
mkgp.get_the_things(group_names, these_groups, intend=3000)

[18627, 36000]


In [None]:
these_groups = [["n", "m", "ng"], ["d", "b", "g"]]
group_names = ["+nasal", "-nasal"]
mkgp.get_the_things(group_names, these_groups, intend=3000)

[9000, 9000]


In [None]:
these_groups = [["ch", "jh"], ["t", "d"]]
group_names = ["+delayed release", "-delayed release"]
mkgp.get_the_things(group_names, these_groups, intend=2000)

[3753, 4000]


In [None]:
these_groups = [["b", "p", "m"], ["d", "t", "n"]]
group_names = ["+labial", "-labial"]
mkgp.get_the_things(group_names, these_groups, intend=3000)


[9000, 9000]


In [None]:
these_groups = [["aw", "oy", "ow", "ey"], ["ao", "eh", "er", "ah", "uw", "uh", "ih", "iy"]]
group_names = ["+diphthong", "-diphthong"]
mkgp.get_the_things(group_names, these_groups, intend=3000)


[8010, 23575]
