In [None]:
root = '/Users/admin/Documents/PhD/Code/perceptual-tuning-results/'

mp_folder = root + 'ABX/mp_scores'

analysis_folder = root + 'ABX/analyses/RL_AmEnglish/resampling'

fig_path = root + 'ABX/figures/rl.pdf'

fig_robustness_path = root + 'ABX/figures/rl_robustness.pdf'

fig_baselines_ path = root + 'ABX/figures/rl_baselines.pdf'

In [None]:
# Uncomment for development/debugging
# %matplotlib inline


# Uncomment to plot nice-looking final figures
import matplotlib as mpl

mpl.style.use('classic')
mpl.use("pgf")
pgf_with_custom_preamble = {
    "font.family": "serif", # use serif/main font for text elements
    "text.usetex": True,    # use inline math for ticks
    "pgf.rcfonts": False,   # don't setup fonts from rc parameters
    "pgf.preamble": [
         "\\usepackage{unicode-math}",  # unicode math setup
         "\\setmainfont{Doulos SIL}" # serif font via preamble
         ]
}
mpl.rcParams.update(pgf_with_custom_preamble)


from scone_phobia import apply_analysis
from scone_phobia.utils.mp_scores import estimate_std
from scone_phobia.analyses.RL_AmEnglish import RL_AmEnglish as AE_RL
import scone_phobia.metadata.add_metadata as add_metadata
from scone_phobia.plots.catplot import custom_catplot, set_edgecolor
import matplotlib.patches as patches
import numpy as np

To avoid any issue of dependency that could arise when aggregating estimates of variability of our error estimates and to be able to perform permutation tests, we resample our custom analysis fully, rather than aggregating estimates for uncorrelated variables.

In [None]:

dpgmm = 'dpgmm_novtln_vad'

# select models relevant for our perceptual tuning study
percTun_filt = lambda mp_fname: 'AMtri1_sat_small_LMtri1satsmall' in mp_fname\
                                  or 'mfcc_novtln' in mp_fname\
                                  or dpgmm in mp_fname

# For RL we only need to resample mp_files tested on AE
RL_filt = lambda mp_fname: percTun_filt(mp_fname) and \
                          ('BUCtest' in mp_fname or 'WSJtest' in mp_fname)

analysis = AE_RL

# get data
df_rl, boot_df_rl = apply_analysis(analysis, mp_folder,
                                   filt=RL_filt,
                                   add_metadata=add_metadata.language_register,
                                   resampling=True,
                                   resample_caching_scheme='mp_file',
                                   analysis_folder=analysis_folder,
                                   pickle_encoding=None,
                                   resampled_pickle_encoding="latin1",
                                   verbose=0)

# we're going to do further aggregation on errors and compute test statistics
# so we have no use for the resampled standard deviation estimates for the current errors
del df_rl['std']

In [None]:
# Average results into 2 conditions (`Am. Eng. native` vs `Jap. native' models),
# averaging over the two AE test sets and two training registers.

def condition(df_row):
    cols = ['training set', 'training language']
    x, lx = [df_row[e] for e in cols]
    if x == 'None':
        cond = 'No language tuning'  # not applicable
    elif lx == 'American English':
        cond = 'Am. Eng. native'
    else:
        cond = 'Jap. native'
    return cond


def agg_conds(df, df_is_resampled=False):
    df['condition'] = [condition(row) for _, row in df.iterrows()]
    groupby_cols = ['model type', 'contrast', 'condition']
    if df_is_resampled:
        # ensure separate analysis for each resample
        groupby_cols = groupby_cols + ['batch ID', 'batch size', 'boot ID']
    agg_df = df.groupby(groupby_cols, as_index=False).mean()
    return agg_df


df_cond = agg_conds(df_rl)
boot_df_cond = agg_conds(boot_df_rl, df_is_resampled=True)

In [None]:
# Get estimate of standard deviations
df = estimate_std(df_cond, boot_df_cond)

# As well, for control without averaging over AE test sets and training registers
df_control = estimate_std(df_rl, boot_df_rl)

In [None]:
# Utility functions for plots

def clean_ticks(facetgrid):
    for ax in facetgrid.axes.flatten():
        ax.tick_params(axis='both', which='both', width=0, length=0)
    facetgrid.despine(left=True)

    
def set_y_grid(facetgrid):
    for ax in facetgrid.axes.flatten():
        ax.set_axisbelow(True)
        ax.grid(axis='y')

        
def set_col_titles(facetgrid, col_labels, fontsize=25):
    for ax, t in zip(facetgrid.axes.flatten(), col_labels):
        ax.set_title(t, fontsize=fontsize)

In [None]:
# Parameters shared across plots
err_args = {'ecolor': 'k',
            'capsize': 2,
            'elinewidth': 2,
            'markeredgewidth': 2}

### Main figure

In [None]:
palette={'L-R': 'grey', 'W-Y': 'k', 'all_C': 'w'}
col_order = ['L-R', 'W-Y', 'all_C']
col_labels = ['[\\textipa{\*r}]-[l]', '[w]-[j]', 'C-C']

x_order = [dpgmm]
x_labels = ['']

hue_order = ['Am. Eng. native', 'Jap. native']
hue_labels = ['Native', 'Non-native']

palette={'Am. Eng. native': 'b', 'Jap. native': 'r'}
g, gx_dict = custom_catplot(x="model type", y="error", yerr="std",
                            err_args=err_args,
                            col="contrast",
                            hue="condition",
                            data=df,
                            kind="bar",
                            order=x_order,
                            col_order=col_order,
                            hue_order=hue_order,
                            legend=False,  #True,
                            palette=palette,
                            aspect=.33)


for ax, lab in zip(g.axes.flatten(), col_labels):
    ax.set_xticks([])
    ax.set_xlabel('')
    ax.set_xlim([-.48, .48])
    ax.set_ylim([0, 15.5])


clean_ticks(g)
set_y_grid(g)
set_col_titles(g, col_labels, fontsize=25)
for tick in g.axes[0,0].yaxis.get_major_ticks():
    tick.label.set_fontsize(20)
g.set_ylabels('ABX error rate (in \%)', fontsize=20)

g.savefig(fig_path)

### Figure showing robustness of the results across training and test sets

In [None]:
# Select only gmm data
df_control = df_control[df_control['model type'] == dpgmm]

facet_labels = ['[\\textipa{\*r}]-[l]\nRead test stimuli', '[\\textipa{\*r}]-[l]\nSpont. test stimuli',
                '[w]-[j]\nRead test stimuli', '[w]-[j]\nSpont. test stimuli',
                'C-C\nRead test stimuli', 'C-C\nSpont. test stimuli']
x_order = ["Read", "Spontaneous"]
row_order = ['L-R', 'W-Y', 'all_C']
col_order = ["Read", "Spontaneous"]
hue_order = ["American English", "Japanese"]
xticklabels = ["Read training", "Spont. training"]


# main part
#palette = {e : (my_red if 'diff L' in e else my_blue) for e in df['condition']}
g, x_dict = custom_catplot(x="training register", y="error", yerr="std",
                           order=x_order,
                           col="test register",
                           col_order=col_order,
                           row="contrast",
                           row_order=row_order,
                           hue="training language",
                           hue_order=hue_order,
                           data=df_control,
                           kind="bar",
                           err_args=err_args,
                           legend=False,
                           sharex=False,
                           aspect=1)

# labels, fontsize etc. 
g.set_xticklabels(xticklabels, fontsize=15)
for axes in g.axes:
    for tick in axes[0].yaxis.get_major_ticks():
        tick.label.set_fontsize(20)
g.set_ylabels('ABX error rate (in \%)', fontsize=18)
g.set_xlabels('Training register', fontsize=18)
for ax, t in zip(g.axes.flatten(), facet_labels):
    ax.tick_params(axis='both', which='both', width=0, length=0)
    ax.set_axisbelow(True)
    ax.grid(axis='y')
    ax.set_title(t, fontsize=20)
    ax.set_xlim([-.48, 1.48])
    ax.set_yticks(2*np.arange(10))
    ax.set_ylim([0, 19])
g.despine(left=True)
g.fig.tight_layout()


g.savefig(fig_robustness_path)

### Extended Figure: with baseline and topline

In [None]:
palette={'L-R': 'grey', 'W-Y': 'k', 'all_C': 'w'}
col_order = ['L-R', 'W-Y', 'all_C']
col_labels = ['[\\textipa{\*r}]-[l]', '[w]-[j]', 'C-C']

x_order = [dpgmm, 'AMtri1_sat_small_LMtri1satsmall']
x_labels = ['GMM\n(unsupervised)', 'HMM\n(supervised)']

hue_order = ['Am. Eng. native', 'Jap. native']
hue_labels = ['Native', 'Non-native']

palette={'Am. Eng. native': 'b', 'Jap. native': 'r'}
g, gx_dict = custom_catplot(x="model type", y="error", yerr="std",
                            err_args=err_args,
                            col="contrast",
                            hue="condition",
                            data=df,
                            kind="bar",
                            order=x_order,
                            col_order=col_order,
                            hue_order=hue_order,
                            legend=False,  #True,
                            palette=palette,
                            aspect=.66)

g.set_xticklabels(x_labels, fontsize=12)
for ax, lab in zip(g.axes.flatten(), col_labels):
    ax.set_xlim([-.48, 1.48])
    ax.set_ylim([0, 30])


clean_ticks(g)
set_y_grid(g)
set_col_titles(g, col_labels, fontsize=25)
for tick in g.axes[0,0].yaxis.get_major_ticks():
    tick.label.set_fontsize(20)
g.set_ylabels('ABX error rate (in \%)', fontsize=20)
g.set_xlabels('Model', fontsize=15)

# prepare baseline data
baseline_data = [df[(df['contrast'] == con) & (df["model type"] == 'mfcc_novtln')]
                     for con in col_order]  # order must match order of columns
baseline_data = [(float(s['error']), float(s['std'])) for s in baseline_data]

# draw baseline
for ax, (mfcc_err, mfcc_std) in zip(g.axes.flatten(), baseline_data):
    mi, ma = ax.get_xlim()
    # plot dotted line
    line = ax.plot([mi, ma], [mfcc_err, mfcc_err], 'k--')
    # put in background
    line[0].set_zorder(0)
    # add error-bands
    rect = patches.Rectangle((mi,mfcc_err-mfcc_std), ma-mi, 2*mfcc_std,
                              edgecolor=(.8, .8, .8, 1),
                              facecolor=(.8, .8, .8, 1))
    # put error-bands in background
    rect.set_zorder(0)
    ax.add_patch(rect)


g.savefig(fig_baselines_path)