## Figure 4

In [None]:
import mne
import numpy as np
import csv
from glob import glob
from tqdm.notebook import tqdm
import pandas as pd
import h5py
from scipy.spatial import ConvexHull
from matplotlib import pyplot as plt
from matplotlib import rcParams as rc
rc['pdf.fonttype'] = 42
plt.style.use('seaborn')
import sys
sys.path.append('../preprocessing/utils/')
import strf

In [None]:
# Change these path for running the notebook locally
eeg_data_path = '/path/to/dataset/' # downloadable from OSF: https://doi.org/10.17605/OSF.IO/FNRD9
git_path  = '/path/to/git/speaker_induced_suppression_EEG/'
# Where the output of train_linear_model.ipynb is saved. Run that first if you haven't already.
h5_path = '/path/to/h5/' 

In [None]:
perception_color = '#117733'
production_color = '#332288'
consistent_color = '#ddcc77'
inconsistent_color = '#aa4499'
picks = ['F1','Fz','F2','FC1','FCz','FC2','C1','Cz','C2']
tmin,tmax = -.3, .5
delays = np.arange(np.floor(tmin*128),np.ceil(tmax*128),dtype=int)
exclude = ['OP0001','OP0002','OP0004','OP0017','OP0020']
subjs = np.sort([s[-6:] for s in glob(f'{git_path}eventfiles/*') if 'OP0' in s and s[-6:] not in exclude])
models = ['model1','model1e','model2','model2e','model3','model3e','model4','model4e']
features = {model_number:strf.get_feats(model_number=model_number,extend_labels=True) for model_number in models}
n_feats = {model_number:len(features[model_number]) for model_number in models}

### Load data

In [None]:
# Load data from hdf5, pandas
wts, corrs, pvals, sig_wts, sig_corrs, alphas = dict(), dict(), dict(), dict(), dict(), dict()
results_csv_fpath = f"{git_path}stats/lem_results.csv"
df = pd.read_csv(results_csv_fpath)
for m in models:
    wts[m], corrs[m], pvals[m], sig_wts[m], sig_corrs[m], alphas[m] = dict(), dict(), dict(), dict(), dict(), dict()
    b = tqdm(subjs)
    for s in b:
        blockid = f"{s}_B1"
        b.set_description(f'Loading STRF for {s} {m}')
        with h5py.File(f"{h5_path}{s}_weights.hdf5",'r') as f:
            wts[m][s] = np.array(f.get(m))
        ch_names = mne.io.read_raw_brainvision(f"{eeg_data_path}{s}/{blockid}/{blockid}_cca.vhdr",
                                               preload=False,verbose=False).info['ch_names']
        subj_corrs, subj_best_alphas, subj_pvals = np.zeros(len(ch_names)), np.zeros(len(ch_names)), np.zeros(len(ch_names))
        for i, ch in enumerate(ch_names):
            tgt_row = df[(df['subject']==s) & (df['model']==m) & (df['channel']==ch)]
            subj_corrs[i] = df.loc[tgt_row.index, 'r_value']
            subj_best_alphas[i] = df.loc[tgt_row.index, 'best_alpha']
            subj_pvals[i] = df.loc[tgt_row.index, 'p_value']
        corrs[m][s] = np.array(subj_corrs)
        pvals[m][s] = np.array(subj_pvals)
        alphas[m][s] = np.array(subj_best_alphas)
    # Extract significant weights, corrs
    for s in subjs:
        nchans = wts[m][s].shape[2]
        sig_wts[m][s] = np.zeros((len(delays),n_feats[m],nchans))
        sig_corrs[m][s] = np.zeros((nchans))
        for i in np.arange(nchans):
            if pvals[m][s][i] < 0.01:
                sig_wts[m][s][i] = wts[m][s][i]
                sig_corrs[m][s][i] = corrs[m][s][i]

### Plotting

In [None]:
# Convex hull plot
def convex_hull(xcorrs,ycorrs,
                corr_min=0.,corr_max=0.5,
                hull_color='hotpink',hull_alpha=0.5):
    '''
    xcorrs,ycorrs: 1D NumPy array of correlation coefficients
    '''
    plt.gca().set_xlim([corr_min,corr_max])
    plt.gca().set_ylim([corr_min,corr_max])
    plt.gca().set_yticks(np.hstack((np.arange(corr_min,corr_max,step=0.1),corr_max)))
    plt.gca().set_xticks(np.hstack((np.arange(corr_min,corr_max,step=0.1),corr_max)))
    hull = ConvexHull(np.vstack((xcorrs,ycorrs)).T)
    vhull = hull.vertices.copy()
    vhull = np.append(vhull,vhull[0])
    plt.fill(xcorrs[vhull],ycorrs[vhull],color=hull_color,alpha=hull_alpha)
    plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(5,5))
corr_min,corr_max = -.1,.6
plt.xlabel('Model correlation\n EMG regressed')
plt.ylabel('Model correlation\n EMG not regressed')
# Regress EMG
xcorrs = np.hstack(list(corrs['model1'].values()))
ycorrs = np.hstack(list(corrs['model1e'].values()))
convex_hull(xcorrs,ycorrs,corr_min=corr_min,corr_max=corr_max,hull_color=cca_color,hull_alpha=0.4)
plt.scatter(xcorrs,ycorrs,s=5,color='red')
plt.bar(0,0,color=cca_color,label="Identical feature encoding")
# Don't regress EMG
xcorrs = np.hstack(list(corrs['model2'].values()))
ycorrs = np.hstack(list(corrs['model2e'].values()))
convex_hull(xcorrs,ycorrs,corr_min=corr_min,corr_max=corr_max,hull_color=raw_color,hull_alpha=0.4)
plt.scatter(xcorrs,ycorrs,s=5,color='blue');
plt.bar(0,0,color=raw_color,label='Task-specific feature encoding')
plt.plot([corr_min,corr_max],[corr_min,corr_max],color='k',alpha=0.5)

plt.legend()
plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(5,5))
corr_min,corr_max = -.1,.6
plt.xlabel('Model correlation\n identical feature encoding')
plt.ylabel('Model correlation\n task-specific feature encoding')
# Regress EMG
xcorrs = np.hstack(list(corrs['model1'].values()))
ycorrs = np.hstack(list(corrs['model2'].values()))
convex_hull(xcorrs,ycorrs,corr_min=corr_min,corr_max=corr_max,hull_color=cca_color,hull_alpha=0.4)
plt.scatter(xcorrs,ycorrs,s=5,color='red')
plt.bar(0,0,color=cca_color,label="EMG regressed")
# Don't regress EMG
xcorrs = np.hstack(list(corrs['model1e'].values()))
ycorrs = np.hstack(list(corrs['model2e'].values()))
convex_hull(xcorrs,ycorrs,corr_min=corr_min,corr_max=corr_max,hull_color=raw_color,hull_alpha=0.4)
plt.scatter(xcorrs,ycorrs,s=5,color='blue');
plt.bar(0,0,color=raw_color,label='EMG not regressed')
plt.plot([corr_min,corr_max],[corr_min,corr_max],color='k',alpha=0.5)
plt.legend()
plt.tight_layout()