# Encoding the Audio

In [None]:
import sys 
import os 
import pickle 
import numpy as np
import spacy
import seaborn as sns
import matplotlib as plt 
import matplotlib.pyplot as plt 
import pandas as pd

sys.path.append(os.path.abspath("../lingpred_new/"))
from plotting import times_100, lowerCI, upperCI, reshape, colours
from encoding_analysis import regress_out_one, make_arbitrary_static_vectors, brainscore_no_coef
from utils import get_words_onsets_offsets, get_indices_per_task, make_y_matrix_per_run, get_runs

In [2]:
# select the dataset:
# -------------------
dataset = 'Goldstein' # or 
#dataset = 'Armeni'
#dataset = 'Gwilliams'

# type of audio data:
use_real_word_offsets = True

# dummy variables needed for that get_run functions
session = 1
subject = 1
task    = '0'

# Let's get the dataframe containing the words:
runs     = get_runs(dataset, session, subject, task)
words_df = pd.DataFrame() 
for run in runs:
    if len(runs)==1:
        words_df = get_words_onsets_offsets(dataset, subject=subject, session=session, run=run)
    else: 
        temp     = get_words_onsets_offsets(dataset, subject=subject, session=session, run=run)
        words_df = pd.concat([words_df, temp])
        
len(words_df)

5136

In [3]:
# load the appropriate acoustic model:

if use_real_word_offsets:     
    with open('../audio/{}/acoustic_model_8_mels_averaged_per_word_using_word_offsets.pkl'.format(dataset), 'rb') as f:
        acoustics = pickle.load(f)
else:   
    with open('../audio/{}/acoustic_model_8_mels_averaged_per_word_using_next_word_onset_as_offset.pkl'.format(dataset), 'rb') as f:
        acoustics = pickle.load(f)

acoustics.shape

(5136, 9)

In [4]:
'''
# Compute the indices for making the y matrix
# -------------------------------------------
# This is already saved under audio/<dataset>/indices_all_tasks.pkl
# No need to run this again, as this would take  approx. 10 min

indices = get_indices_per_task(dataset)
indices.shape

dir_path = '../audio/{}/'.format(dataset)
file_name = 'indices_all_tasks.pkl'
path      = dir_path + file_name

print(path)
f = open(path,"wb")
pickle.dump(indices, f)
f.close()

'''

# load the indices:
if dataset in ['Goldstein', 'Gwilliams']:
    with open('../audio/{}/indices_all_tasks.pkl'.format(dataset), 'rb') as f:
        indices = pickle.load(f)
if dataset == 'Armeni':
    with open('../audio/Armeni/indices_session_1.pkl', 'rb') as f:
        indices = pickle.load(f)


indices.shape

(5136, 157)

In [5]:
# use indices to make an acoustic y matrix of shape (length, 157, dim)
y_acoustics = make_y_matrix_per_run(acoustics, indices)
print('Acoustic y matrix of shape (length, 157, dimensionality)')
y_acoustics.shape

Acoustic y matrix of shape (length, 157, dimensionality)


(5136, 157, 9)

In [7]:
# Let us look at what our words dataframe looks like at the moment:
words_df.head(6)

Unnamed: 0,word,onset,offset
0,Act,3.71,3.79
1,"one,",3.99,4.19
2,monkey,4.651,4.931
3,in,4.951,5.011
4,the,5.051,5.111
5,middle.,5.151,5.391


In [8]:
# make spacy doc of the text
nlp     = spacy.load('en_core_web_lg')
text    = " ".join(words_df["word"].astype(str))
doc     = nlp(text)

# since our doc is longer than our dataframe we have to match PoS to fit the dataframe
# (words like "there's" are two words in the doc but one in the dataframe)
matched_PoS = []
token_idx   = 0

for word in words_df['word']:
    # get the PoS of the current token
    if token_idx < len(doc):
        matched_PoS.append(doc[token_idx].pos_)
        # advance token_idx until the next word boundary
        token_idx += len(nlp(word))  # number of spaCy tokens in this word
    else:
        # fallback if doc ends unexpectedly
        matched_PoS.append(None)

len(matched_PoS), len(words_df)

(5136, 5136)

In [9]:
# amend the words dataframe with useful information:
words_df['duration']              = words_df['offset'] - words_df['onset']
words_df['duration previous word']= words_df['duration'].shift(1)
words_df['PoS']                   = matched_PoS
words_df['previous PoS']          = words_df['PoS'].shift(1)
words_df['word_index']            = range(len(words_df))

# avoid having NaNs (for the first word the duration of the previous words is undefined)
words_df.loc[0, 'duration previous word'] = 0

# Let us look at what our words dataframe looks like at the moment:
words_df.head(6)

Unnamed: 0,word,onset,offset,duration,duration previous word,PoS,previous PoS,word_index
0,Act,3.71,3.79,0.08,0.0,PROPN,,0
1,"one,",3.99,4.19,0.2,0.08,NUM,PROPN,1
2,monkey,4.651,4.931,0.28,0.2,NOUN,NUM,2
3,in,4.951,5.011,0.06,0.28,ADP,NOUN,3
4,the,5.051,5.111,0.06,0.06,DET,ADP,4
5,middle.,5.151,5.391,0.24,0.06,NOUN,DET,5


### Getting the X Matrices: GPT, GloVe and Arbitrary vectors

In [11]:

# Make X matrices for arbitrary vectors        
X_arbitrary  = make_arbitrary_static_vectors(words_df, dim=300)

# Get GloVe vectors:
X_Glove = np.vstack([nlp(word).vector for word in words_df.word]) # np array of shape (nr_words, 300)

# Get GPT vectors:
if dataset == 'Goldstein':  
    with open('../audio/Goldstein/X_GPT_original_and_residualised_layer_47.pkl', 'rb') as f:
        X_matrices_GPT     = pickle.load(f)
        X_GPT              = X_matrices_GPT['X_GPT_layer_47']
elif dataset == 'Armeni':
    with open('../audio/Armeni/X_y_matrices_Glove_GPT_arbitrary_session_1.pkl', 'rb') as f:
        X_matrices_GPT     = pickle.load(f)
        X_GPT              = X_matrices_GPT['X_GPT']

# make X matrices with residualised vectors:
X_arbitrary_residualised = regress_out_one(X_arbitrary)
X_Glove_residualised     = regress_out_one(X_Glove)
X_GPT_residualised       = regress_out_one(X_GPT)

print('Arbitrary vectors have shape:', X_arbitrary.shape, ' and residualised: ', X_arbitrary_residualised.shape)
print('GloVe vectors have shape:', X_Glove.shape, ' and residualised: ', X_Glove_residualised.shape)
print('GPT vectors have shape:', X_GPT.shape, ' and residualised: ', X_GPT_residualised.shape)

Arbitrary vectors have shape: (5136, 300)  and residualised:  (5135, 300)
GloVe vectors have shape: (5136, 300)  and residualised:  (5135, 300)
GPT vectors have shape: (5136, 1600)  and residualised:  (5135, 1600)


### Audio Encoding: Main Effect

In [None]:
# compute encoding for original GloVe and arbitrary vectors:
y = y_acoustics
y = np.swapaxes(np.swapaxes(y, 0, 1), 0, 2)
print('y should be of shape: (nr_mels, nr_words, nr_timepoints')
print('y has shape ', y.shape)

encoding_arbitrary = brainscore_no_coef(X_arbitrary, y)
encoding_Glove     = brainscore_no_coef(X_Glove, y)
encoding_GPT       = brainscore_no_coef(X_GPT, y)

