# Home assignment Milestone 2 -- BRAIN2SPEECH

During the project we would like to syntetise speech from brain signal.

For this we use the Intractranial EEG datasets that can be found here: https://osf.io/nrgx6/

The main tasks after the data preparation:
- Create a CNN network for the task
- Evaulate the results of the prediction using the MAE metrics

We used the research of NeuralinterfacingLab as the starting point of our project, the research lab used LineaRegression for syntetise speech from the brain signal and we will use deep learning for the same exercise. During the projects we used some scripts created by NeuralinterfacingLab.

In [1]:
# Import data_prepare modul that contains functions that we created.
from data_prepare import train_test_split
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import os
from sklearn.decomposition import PCA

# Prepare data for learning

We prepare the data for learning neural networks. We use a function that execute the results of the last milestone get a train-ready dataset.

In [2]:
def generate_datasets(pt, feat_path=r'./features'):
    
    r"""
    Generate train, validation and test datasets from the one sample of the raw data.
    This function also filter the datasets and remove silence from the dataset.
    param pt: name of the sample
    param feat_path: path of the raw data file
  """
    
    spectrogram = np.load(os.path.join(feat_path,f'{pt}_spec.npy'))
    data = np.load(os.path.join(feat_path,f'{pt}_feat.npy'))
    labels = np.load(os.path.join(feat_path,f'{pt}_procWords.npy'))
    featName = np.load(os.path.join(feat_path,f'{pt}_feat_names.npy'))
    
    # Standardize data
    mu = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data = (data-mu)/std
    
    # Reduce Dimensions
    pca = PCA()
    pca.fit(data)
    data = np.dot(data, pca.components_[:50,:].T)

    # Creating data for CNN: stacking window_size input data and connecting it to the output
    # at the end of the window
    window_size = 4
    input = np.zeros((data.shape[0] - window_size, window_size, data.shape[1]))
    output = np.zeros((spectrogram.shape[0] - window_size, spectrogram.shape[1]))
    for i in range(data.shape[0] - window_size):
      output[i, :] = spectrogram[i + window_size, :]
      input[i, :, :] = data[i:i+window_size, :]

    # Generate train, validation and test datasets
    X_train, X_test, Y_train, Y_test = train_test_split(input, output, test_size=0.1)
    
    # Standardize input
    mu_input = np.mean(X_train, axis=0)
    std_input = np.std(X_train, axis=0)
    X_train = (X_train-mu_input)/std_input
    X_test = (X_test-mu_input)/std_input

    # Standardize output
    mu_output = np.mean(Y_train, axis=0)
    std_output = np.std(Y_train, axis=0)
    Y_train = (Y_train-mu_output)/std_output
    Y_test = (Y_test-mu_output)/std_output

    return (X_train, Y_train, X_test, Y_test, mu_input, std_input, mu_output, std_output)

In [7]:
def prepare_data():
    # Get lists of samples
    pts = ['sub-%02d'%i for i in range(1,11)]
    # New list for the prepared datasets
    norm_params = {}
    X_test = []
    Y_test = []
    
    # Prepare data from all sample for deep learning
    # In addition to the previous data preparation steps, we also filter out the silence
    (X_train, Y_train, x_test, y_test, mu_input, std_input, mu_output, std_output) = generate_datasets(pts[0])
    X_test.append(x_test)
    Y_test.append(y_test)
    norm_params[str(0)]={'mu_input':mu_input, 'std_input':std_input, 'mu_output':mu_output, 'std_output':std_output}
    for i, pt in enumerate(pts[1:]):
        #Prepare data for learning
        (x_train, y_train, x_test, y_test, mu_input, std_input, mu_output, std_output) = generate_datasets(pt)
        X_train = np.concatenate((X_train, x_train))
        Y_train = np.concatenate((Y_train, y_train))
        X_test.append(x_test)
        Y_test.append(y_test)
        norm_params[str(i+1)]={'mu_input':mu_input, 'std_input':std_input, 'mu_output':mu_output, 'std_output':std_output}

        
    # data = {}
    # for i, key in enumerate(["Xtrain", "Ytrain", "Xtest", "Ytest"]):
    #     data[key] = np.array(prepared_data[0][i])
    # for i in range(1, len(prepared_data), 1):
    #     for j, key in enumerate(data.keys()):
    #         data[key] = np.concatenate([data[key], prepared_data[i][j]], axis=0)
    
    return X_train, Y_train, X_test, Y_test, norm_params

In [8]:
X_train, Y_train, X_test, Y_test, norm_params = prepare_data()

# Create and fit a CNN neural network

We create a 1D CNN architechture for learning with 2 convolution-filter.

In [9]:
# Creating one dimensional convolution net
def make_1d_convnet(window_size, filter_length, nb_input_series=1, nb_outputs=1, nb_filter=[40, 40]):
    #probably more filter layers should be added

    model = Sequential()
    model.add(Conv1D(filters=nb_filter[0], kernel_size=filter_length, activation='relu', input_shape=(window_size, nb_input_series)))
    model.add(Conv1D(filters=nb_filter[1], kernel_size=filter_length, activation='relu'))
    model.add(Flatten())
    model.add(Dense(nb_outputs, activation='linear'))

    optimizer=Adam(learning_rate=0.001)
    
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

In [10]:
#Parameters for the CNN network
# one data point covers 2 * 200 ms of eeg signal
filter_length = 2 #covers 4*8*400 sec of eeg
window_size = 2 * 2 #we have two 1d max pool layers
epochs = 100
batch_size = 128
pt=0

early_stopping=EarlyStopping(patience=50, verbose=1)
checkpointer=ModelCheckpoint(filepath='1dcnn.hdf5', save_best_only=True, verbose=1)

# 50 scalar value is included in one timestep of input data, 23 freq component in one timestep of the output spectogram
nb_filter = [40, 40] #probably should be between the nb_input and nb_output so that we don't lose data as a result of compression

model = make_1d_convnet(window_size=window_size, filter_length=filter_length, nb_filter=nb_filter, nb_input_series=50, nb_outputs=23)
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=2)

#summary
model.summary()
# saving the model
if os.path.isfile('models/1d_cnn.h5') is False:
    model.save('models/1d_cnn.h5')

Epoch 1/100
1888/1888 - 12s - loss: 0.8415 - mae: 0.6750 - val_loss: 0.8428 - val_mae: 0.6088 - 12s/epoch - 6ms/step
Epoch 2/100


KeyboardInterrupt: 

# Evaluate the preformance of the model

We evaluate the performace of the neural network using the Mean Absolute error metrics. After that we visualized the results on a spectrogram.

In [None]:
preds = model.predict(X_test[0])
loss = mean_absolute_error(Y_test[0], preds)
print(f'Mean absolute error of test set:{loss}')

In [None]:
# Viz spectrograms
rec_spec = preds
spectrogram = Y_test[0]

cm='viridis'
fig, ax = plt.subplots(2, sharex=True)

#Plot spectrograms
ax[0].imshow(np.flipud(spectrogram.T), cmap=cm, interpolation=None,aspect='auto')
ax[0].set_ylabel('Log Mel-Spec Bin')
ax[1].imshow(np.flipud(rec_spec.T), cmap=cm, interpolation=None,aspect='auto')
ax[1].set_ylabel('Log Mel-Spec Bin')