In [4]:
import os
import numpy as np
from keras import optimizers
from keras.callbacks import ModelCheckpoint, Callback
from keras.layers import Input, Embedding, LSTM, Dense, concatenate, dot, multiply, Lambda
from keras.models import Model, Sequential, load_model
from keras.layers.wrappers import TimeDistributed
from keras.utils import to_categorical
from keras import backend as K
from keras import initializers
from random import shuffle
import random
import csv
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from pathlib import Path
random.seed(999)
EMBEDDINGS_PATH = os.path.join(Path(os.getcwd()).parent,'Data\\dic_word2vec_ding_new.npy')
SENTENCES_PATH = os.path.join(Path(os.getcwd()).parent,'Data\\Ding_grammatical.csv')

In [5]:
# power spectra function
def power_spectra(w2vec_array, sampling_rate=16):
    """Computes power spectra using the Discrete Fourier Transform.

    Args:
        w2vec_array: arrays of shape (n_sentences*time_steps, n_units)
            representing hidden layer activations in response to each word of the
            concatenated sequence of sentences.
        sample_rate: number of measures (outputs of the model) per second.

    Returns:
        Mean power spectra and frequency axis.
    """

    # Separate each unit and concatenate across words to form 1 vector per unit
    unit_vectors = np.transpose(w2vec_array)

    # Frequency domain
    # num_samples is just the number of data points for each unit
    num_samples = unit_vectors.shape[1]
    freq = np.fft.rfftfreq(num_samples, d=1./sampling_rate)
    print ('freq ' ,freq.shape)
    # Calculate the FFT and power spectra for each unit
    units_ps = []
    for vector in unit_vectors:
        ft_unit = np.fft.rfft(vector)  # fft
        ps_uni = np.abs(ft_unit) ** 2  # power spectrum
        units_ps.append(ps_uni)

    # Average power spectra over units
    mean_ps = np.mean(units_ps, axis=0)
    print ('mean_ps',mean_ps.shape)
    return freq, mean_ps

In [6]:
def plot_results(plot_name, freq, power, act,sampling_rate=20,n_samples=60):
    """Plot all the results of one condition"""

    # Time domain parameters
    sampling_interval = 1.0/sampling_rate  # sampling interval
    t = np.arange(0, n_samples, sampling_interval)  # time vector
    # Build plot
    fig, ax = plt.subplots(nrows=2, ncols=1)
    ax[0].plot(t, act, linewidth=1.0)
    # Axis labels
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Activation')

    ax[1].plot(freq[1:], power[1:], 'r', linewidth=1.0)
    ax[1].set_xlabel('Freq (Hz)')
    ax[1].set_ylabel('Power')

    # Adjusts subplot
    plt.tight_layout()
    # Save
    fig.savefig(plot_name+'.pdf')


In [7]:
def get_embedding_masks():
    '''
    Mask dimensions according to the FY paradigm (without the noise), and get 50 time step representations for each word
    '''
    random.seed(999)
    mask_embeddings = {}
    word_embeddings = np.load(EMBEDDINGS_PATH, allow_pickle=True).item()
    embedding_dim = word_embeddings['rat'].shape[0]
    words_rep = {}
    for key in word_embeddings.keys():
        word_rep = np.zeros((embedding_dim,50))
        mask_embeddings[key] = np.random.uniform(low=40-25,high=40+25,size=(embedding_dim,))
        for i in range(50):
            mask = (mask_embeddings[key] < (i*5)) * word_embeddings[key]
            word_rep[:,i] = mask
        words_rep[key] = word_rep
    return words_rep

In [12]:
def get_data_split():
    with open(SENTENCES_PATH, 'r') as f:
        sentence_list = [row for row in csv.reader(f)]
    random.shuffle(sentence_list)
    train_sentences = sentence_list[:50]
    test_sentences = sentence_list[50:]
    return train_sentences, test_sentences

train_sentences, test_sentences = get_data_split()    
word_representations = get_embedding_masks()

In [23]:
def get_input_embeddings(input_data, representations):
    '''
    Every sentence is sampled at 200 Hz, therefore, every sentence is represented as a 300x(60*200) matrix
    '''
    random.shuffle(input_data)
    input_features = []#np.zeros((representations['rat'].shape[0],len(input_data)*200)) 
    for sent in input_data:
        for word in sent:
            input_features.append(representations[word])
    # input_features currently has shape - number of sentences(50) * ( 300 * 200 )
    input_features = np.concatenate(input_features,axis=1)
    # now input_features has shape - 300 * ()
    return input_features

(300, 10000)