This notebook uses the TextGrids to create event files for epoching the data in MNE.

**Please manually check your forced-aligned TextGrids for accuracy before creating these event files.** If you skip this step, your results will look terrible. (The TextGrids provided in this repo are all accurate)

In [None]:
import os
import numpy as np
import csv
import re
import sys
import scipy
from glob import glob
sys.path.append("../textgrids/")
import textgrid
sys.path.append("./")
import match_filter
from fuzzywuzzy import fuzz
import warnings

In [None]:
# Local paths, please update accordingly
git_path = '/path/to/git/kurteff2024_code/'
data_path = '/path/to/bids/dataset/'

In [None]:
# Change these values accordingly
subj = "TCH14"
block = "B12"

### Click eventfiles

In [None]:
corr_thresh = 0.15 # adjust if you aren't finding the proper number of matches
nreps = 100 # adjust: n_trials * 2

spkr_fs, spkr_audio = scipy.io.wavfile.read(spkr_path)
click_fs, click_audio = scipy.io.wavfile.read(
    os.path.join(git_path, "onsetProd", "ipad", "ipad_dcmc", "SupplementalFiles", "click.wav")
)
click_audio = scipy.signal.resample(click_audio, int((click_audio.shape[0]/click_fs)*spkr_fs))[:,0]
click_audio = click_audio/click_audio.max()
spkr_audio = spkr_audio/spkr_audio.max()
matches = match_filter(click_audio, spkr_audio, spkr_fs, corr_thresh=corr_thresh, nreps=nreps, debug=True)
click_onsets = np.sort(matches[0][:,0]); click_offsets = np.sort(matches[0][:,1])
click_eventfile_fpath = os.path.join(git_path,"preprocessing","events","csv",subj,blockid,
                                     f"{blockid}_click_eve.txt")
if os.path.isfile(click_eventfile_fpath):
    print("Click eventfile already exists")
else:
    click_eventfile_txt = np.vstack((
        click_onsets, click_offsets, np.ones(click_onsets.shape[0]))).astype(float).T
    np.savetxt(click_eventfile_fpath, click_eventfile_txt, fmt="%.3f", delimiter='\t', newline='\n')

### Production (mic) eventfiles
You will need:
1. Accurate phone, word, and sentence level mic TextGrids

In [None]:
easy_reading = ['S0017','S0018','S0021','S0023','S0024','TCH8','TCH14']; easy_reading = subj in easy_reading
blockid = "_".join([subj,block])
sentence_tg_fpath = os.path.join(git_path,"preprocessing","events","textgrids",subj,blockid,
                                 f"{blockid}_mic_sentence.TextGrid")
log_path = os.path.join(git_path,"preprocessing","events","logfiles",f"{blockid}.txt")
phoneme_ids = np.loadtxt(os.path.join(git_path,"preprocessing","events","csv","phonemes.txt"),
                         dtype=str, delimiter='\n')
word_ids = np.loadtxt(os.path.join(git_path,"preprocessing","events","csv","words.txt"),
                      dtype=str, delimiter='\n')
sn_ext = "sentences_easyreading.txt" if easy_reading else "sentences.txt"
sentence_ids = np.loadtxt(os.path.join(git_path,"preprocessing","events","csv",sn_ext),
                          dtype=str, delimiter='\n')
sentence_ids = [re.sub(r'[^\w\s]','', row).upper() for row in sentence_ids]
with open(log_path,'r') as f:
    next(f), next(f), next(f); d = csv.DictReader(f,delimiter='\t')
    playback_condition = np.array(([[r['CurrentBlock']] for r in d if r['TrialPart']=='listen'])).squeeze()
with open(sentence_tg_fpath) as f:
    sen_tg = textgrid.TextGrid(f.read())
sentence_grid = np.array([s for s in sen_tg.tiers[0].simple_transcript if s[2] != 'sp'])
with open(phone_tg_fpath,'r') as f:
    phone_tg = textgrid.TextGrid(f.read())
word_grid = np.array(phone_tg.tiers[1].simple_transcript)
word_onsets = word_grid[:,0].astype(float); word_offsets = word_grid[:,1].astype(float)
words = np.array([re.sub(r'[^\w\s]','',w) for w in word_grid[:,2]])
word_idxs = [i for i,w in enumerate(words) if w not in ['NS','sp','CG','LG','BR','SL','LS']]
word_onsets = word_onsets[word_idxs]; word_offsets = word_offsets[word_idxs]; words = words[word_idxs]
task_times = phone_tg.tiers[2].simple_transcript
task_onsets = np.array([float(r[0]) for r in task_times if r[2] == 'task'])
task_offsets = np.array([float(r[1]) for r in task_times if r[2] == 'task'])
task_times = np.array([[t, task_offsets[i]] for i,t in enumerate(task_onsets)])
task_inds = np.hstack(([[i for i,o in enumerate(
    word_onsets) if o >= interval[0] and o < interval[1]] for interval in task_times]))
words = words[task_inds]; word_onsets = word_onsets[task_inds]; word_offsets = word_offsets[task_inds]
phone_grid = np.array(phone_tg.tiers[0].simple_transcript)
phone_onsets = phone_grid[:,0].astype(float); phone_offsets = phone_grid[:,1].astype(float)
phones = phone_grid[:,2]
phone_idxs = [i for i,p in enumerate(phones) if p not in ['ns','sp','cg','lg','br','sl','ls']]
phone_onsets = phone_onsets[phone_idxs]; phone_offsets = phone_offsets[phone_idxs]; phones = phones[phone_idxs]
task_inds = np.hstack(([[i for i,o in enumerate(
    phone_onsets) if o >= interval[0] and o < interval[1]] for interval in task_times]))
phones = phones[task_inds]; phone_onsets = phone_onsets[task_inds]; phone_offsets = phone_offsets[task_inds]
sentence_events, word_events, phone_events = [], [], []
sentence_events_echo, word_events_echo, phone_events_echo = [], [], []
sentence_events_shuff, word_events_shuff, phone_events_shuff = [], [], []

for i, sen in enumerate(sentence_grid):
    onset = float(sen[0]); offset = float(sen[1]); transcript = sen[2]
    event_id = np.array([fuzz.ratio(transcript, s) for s in sentence_ids]).argmax()
    if [fuzz.ratio(transcript,s) for s in sentence_ids][event_id] < fuzzy_thresh:
        warnings.warn(f"""
            Fuzzy matching for sentence '{transcript}' falls below fuzzy_thresh.
            It was matched to MOCHA sentence {sen}.
            If this is not accurate, please manually adjust the sentence eventfiles.
        """)
    sentence_events.append([onset, offset, event_id, transcript])
    if playback_condition[i] == 'echolalia':
        sentence_events_echo.append([onset, offset, event_id, transcript])
    elif playback_condition[i] == 'shuffled':
        sentence_events_shuff.append([onset, offset, event_id, transcript])
    sen_word_idxs = np.intersect1d(np.where(word_onsets >= onset)[0], np.where(word_offsets <= offset)[0])
    for wi in sen_word_idxs:
        word_event_id = np.where(word_ids == words[wi])[0][0] if words[wi] in word_ids else 88888
        word_events.append([word_onsets[wi], word_offsets[wi], word_event_id, words[wi]])
        if playback_condition[i] == 'echolalia':
            word_events_echo.append([word_onsets[wi], word_offsets[wi], word_event_id, words[wi]])
        elif playback_condition[i] == 'shuffled':
            word_events_shuff.append([word_onsets[wi], word_offsets[wi], word_event_id, words[wi]])
    sen_phone_idxs = np.intersect1d(np.where(phone_onsets >= onset)[0],np.where(phone_offsets <= offset)[0])
    for pi in sen_phone_idxs:
        if phones[pi] in phoneme_ids:
            phone_event_id = np.where(phoneme_ids == phones[pi])[0][0]
        else:
            phone_event_id = 88888; warnings.warn(
                f"Unknown phoneme encountered at {phone_onsets[pi]}, please manually correct TextGrid.")
            
        phone_events.append([phone_onsets[pi], phone_offsets[pi], phone_event_id, phones[pi]])
        if playback_condition[i] == 'echolalia':
            phone_events_echo.append([phone_onsets[pi], phone_offsets[pi], phone_event_id, phones[pi]])
        elif playback_condition[i] == 'shuffled':
            phone_events_shuff.append([phone_onsets[pi], phone_offsets[pi], phone_event_id, phones[pi]])
event_folder = os.path.join(git_path,"preprocessing","events",subj,blockid)
if len(glob(os.path.join(event_folder, "*mic*"))) > 0:
    print("Mic eventfiles already exist.")
else:
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_sn_all.txt"),
               sentence_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_sn_el.txt"),
               sentence_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_sn_sh.txt"),
               sentence_events_shuff, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_wr_all.txt"),
               word_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_wr_el.txt"),
               word_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_wr_sh.txt"),
               word_events_shuff, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_ph_all.txt"),
               phone_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_ph_el.txt"),
               phone_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_mic_ph_sh.txt"),
               phone_events_shuff, fmt="%s", delimiter="\t", newline="\n")

### Perception (spkr) eventfiles
You will need:

1. Accurate production TextGrids (all)
2. Accurate perception TextGrids (all)

In [None]:
spkr_phone_tg_fpath = os.path.join(
    git_path,"preprocessing","events","textgrids",subj,blockid,f"{blockid}_spkr.TextGrid")
spkr_sen_tg_fpath = os.path.join(
    git_path,"preprocessing","events","textgrids",subj,blockid,f"{blockid}_spkr_sentence.TextGrid")
shuff_start = float(sen_tg[np.where(playback_condition=='shuffled')[0][0],0])
spkr_phone_events = []; spkr_phone_events_echo, spkr_phone_events_shuff = [], []
with open(spkr_phone_tg_fpath,'r') as f:
    spkr_phone_tg = textgrid.TextGrid(f.read())
    spkr_phone_tier = spkr_phone_tg.tiers[0].simple_transcript
    spkr_word_tier = spkr_phone_tg.tiers[1].simple_transcript
with open(spkr_sen_tg_fpath,'r') as f:
    spkr_sen_tg = textgrid.TextGrid(f.read())
    spkr_sen_tier = spkr_sen_tg.tiers[0].simple_transcript
for event in spkr_phone_tier[1:-1]:
    onset = event[0]; offset = event[1]; transcript = event[2]
    phone_id = np.where(phoneme_ids == event[2])[0][0]
    spkr_phone_events.append([onset,offset,phone_id,transcript])
    if onset >= shuff_start:
        spkr_phone_events_shuff.append([onset,offset,phone_id,transcript])
    else:
        spkr_phone_events_echo.append([onset,offset,phone_id,transcript])
spkr_word_events = []; spkr_word_events_echo, spkr_word_events_shuff = [], []
for event in spkr_word_tier[1:-1]:
    onset = event[0]; offset = event[1]; transcript = event[2]
    word_id = np.where(word_ids == event[2])[0][0] if event[2] in list(word_ids) else 88888
    spkr_word_events.append([onset,offset,word_id,transcript])
    if onset >= shuff_start:
        spkr_word_events_shuff.append([onset,offset,word_id,transcript])
    else:
        spkr_word_events_echo.append([onset,offset,word_id,transcript])
spkr_sen_events = []; spkr_sen_events_echo, spkr_sen_events_shuff = [], []
for event in spkr_sen_tier[1:-1]:
    onset = float(event[0]); offset = float(event[1]); transcript = event[2]
    event_id = np.array([fuzz.ratio(transcript, s) for s in sentence_ids]).argmax()
    if [fuzz.ratio(transcript,s) for s in sentence_ids][event_id] < fuzzy_thresh:
        warnings.warn(f"""
        Fuzzy matching for sentence '{transcript}' falls below fuzzy_thresh.
        It was matched to MOCHA sentence {sen}.
        If this is not accurate, please manually adjust the sentence eventfiles.
        """)
    spkr_sen_events.append([onset,offset,event_id,transcript])
    if onset >= shuff_start:
        spkr_sen_events_shuff.append([onset,offset,event_id,transcript])
    else:
        spkr_sen_events_echo.append([onset,offset,event_id,transcript])
if len(glob(os.path.join(event_folder,"*spkr*"))) > 0:
    print("Spkr eventfiles already exist.")
else:
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_sn_all.txt"),
               spkr_sen_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_sn_el.txt"),
               spkr_sen_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_sn_sh.txt"),
               spkr_sen_events_shuff, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_wr_all.txt"),
               spkr_word_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_wr_el.txt"),
               spkr_word_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_wr_sh.txt"),
               spkr_word_events_shuff, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_ph_all.txt"),
               spkr_phone_events, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_ph_el.txt"),
               spkr_phone_events_echo, fmt="%s", delimiter="\t", newline="\n")
    np.savetxt(os.path.join(event_folder,f"{blockid}_spkr_ph_sh.txt"),
               spkr_phone_events_shuff, fmt="%s", delimiter="\t", newline="\n")