# Home assignment Milestone 2 -- BRAIN2SPEECH

During the project we would like to syntetise speech from brain signal.

For this we use the Intractranial EEG datasets that can be found here: https://osf.io/nrgx6/

The main tasks after the data preparation:
- Create a CNN network for the task
- Evaulate the results of the prediction using the MAE metrics

We used the research of NeuralinterfacingLab as the starting point of our project, the research lab used LineaRegression for syntetise speech from the brain signal and we will use deep learning for the same exercise. During the projects we used some scripts created by NeuralinterfacingLab.

In [1]:
# Import data_prepare modul that contains functions that we created.
from data_prepare import train_test_split
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import SGD, Adam
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import os

# Prepare data for learning

We prepare the data for learning neural networks. We use a function that execute the results of the last milestone get a train-ready dataset.

In [None]:
def generate_datasets(pt, feat_path=r'./features'):
    
    r"""
    Generate train, validation and test datasets from the one sample of the raw data.
    This function also filter the datasets and remove silence from the dataset.
    param pt: name of the sample
    param feat_path: path of the raw data file
  """
    
    spectrogram = np.load(os.path.join(feat_path,f'{pt}_spec.npy'))
    data = np.load(os.path.join(feat_path,f'{pt}_feat.npy'))
    labels = np.load(os.path.join(feat_path,f'{pt}_procWords.npy'))
    featName = np.load(os.path.join(feat_path,f'{pt}_feat_names.npy'))
    
    # Standardize data
    mu = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data = (data-mu)/std
    
    # Reduce Dimensions
    pca = PCA()
    pca.fit(data)
    data = np.dot(data, pca.components_[:50,:].T)

    # Creating data for CNN: stacking window_size input data and connecting it to the output
    # at the end of the window
    window_size = 4
    input = np.zeros((data.shape[0] - window_size, window_size, data.shape[1]))
    output = np.zeros((spectrogram.shape[0] - window_size, spectrogram.shape[1]))
    for i in range(data.shape[0] - window_size):
      output[i, :] = spectrogram[i + window_size, :]
      input[i, :, :] = data[i:i+window_size, :]

    # Generate train, validation and test datasets
    X_train, X_test, Y_train, Y_test = train_test_split(input, output, test_size=0.1)
    
    # Standardize input
    mu_input = np.mean(X_train, axis=0)
    std_input = np.std(X_train, axis=0)
    X_train = (X_train-mu_input)/std_input
    X_test = (X_test-mu_input)/std_input

    # Standardize output
    mu_output = np.mean(Y_train, axis=0)
    std_output = np.std(Y_train, axis=0)
    Y_train = (Y_train-mu_output)/std_output
    Y_test = (Y_test-mu_output)/std_output

    return (X_train, Y_train, X_test, Y_test, mu_input, std_input, mu_output, std_output)

In [None]:
def prepare_data():
    # Get lists of samples
    pts = ['sub-%02d'%i for i in range(1,11)]
    # New list for the prepared datasets
    norm_params = {}
    X_train = []
    Y_train = []
    X_test = []
    Y_test = []
    
    # Prepare data from all sample for deep learning
    # In addition to the previous data preparation steps, we also filter out the silence
    for i, pt in enumerate(pts):
        #Prepare data for learning
        (x_train, y_train, x_test, y_test, mu_input, std_input, mu_output, std_output) = generate_datasets(pt)
        X_train.append(x_train)
        Y_train.append(y_train)
        X_test.append(x_test)
        Y_test.append(y_test)
        norm_params[str(i)]={'mu_input':mu_input, 'std_input':std_input, 'mu_output':mu_output, 'std_output':std_output}

        
    # data = {}
    # for i, key in enumerate(["Xtrain", "Ytrain", "Xtest", "Ytest"]):
    #     data[key] = np.array(prepared_data[0][i])
    # for i in range(1, len(prepared_data), 1):
    #     for j, key in enumerate(data.keys()):
    #         data[key] = np.concatenate([data[key], prepared_data[i][j]], axis=0)
    
    return X_train, Y_train, X_test, Y_test, norm_params

In [2]:
X_train, Y_train, X_test, Y_test, norm_params = prepare_data()

# Create and fit a CNN neural network

We create a 1D CNN architechture for learning with 2 convolution-filter.

In [3]:
# Creating one dimensional convolution net
def make_1d_convnet(window_size, filter_length, nb_input_series=1, nb_outputs=1, nb_filter=[40, 40]):
    #probably more filter layers should be added

    model = Sequential()
    model.add(Conv1D(filters=nb_filter[0], kernel_size=filter_length, activation='relu', input_shape=(window_size, nb_input_series)))
    model.add(Conv1D(filters=nb_filter[1], kernel_size=filter_length, activation='relu'))
    model.add(Flatten())
    model.add(Dense(nb_outputs, activation='linear'))

    optimizer=Adam(lr=0.001)
    
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

In [None]:
#Parameters for the CNN network
# one data point covers 2 * 200 ms of eeg signal
filter_length = 2 #covers 4*8*400 sec of eeg
window_size = 2 * 2 #we have two 1d max pool layers
epochs = 100
batch_size = 128

# 50 scalar value is included in one timestep of input data, 23 freq component in one timestep of the output spectogram
nb_filter = [40, 40] #probably should be between the nb_input and nb_output so that we don't lose data as a result of compression

model = make_1d_convnet(window_size=window_size, filter_length=filter_length, nb_filter=nb_filter, nb_input_series=50, nb_outputs=23)
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=2)

#summary
model.summary()
# saving the model
if os.path.isfile('models/1d_cnn.h5') is False:
    model.save('models/1d_cnn.h5')

Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


1632/1632 - 6s - loss: 5.0072 - mae: 1.5637 - val_loss: 3.9979 - val_mae: 1.4591 - 6s/epoch - 3ms/step
Epoch 2/100
1632/1632 - 5s - loss: 3.7904 - mae: 1.3837 - val_loss: 3.7533 - val_mae: 1.3546 - 5s/epoch - 3ms/step
Epoch 3/100
1632/1632 - 6s - loss: 3.6126 - mae: 1.3436 - val_loss: 3.6332 - val_mae: 1.3382 - 6s/epoch - 4ms/step
Epoch 4/100
1632/1632 - 5s - loss: 3.4738 - mae: 1.3120 - val_loss: 3.5255 - val_mae: 1.3258 - 5s/epoch - 3ms/step
Epoch 5/100
1632/1632 - 7s - loss: 3.3719 - mae: 1.2881 - val_loss: 3.4441 - val_mae: 1.3094 - 7s/epoch - 4ms/step
Epoch 6/100
1632/1632 - 6s - loss: 3.2937 - mae: 1.2706 - val_loss: 3.3686 - val_mae: 1.2770 - 6s/epoch - 4ms/step
Epoch 7/100
1632/1632 - 5s - loss: 3.2256 - mae: 1.2547 - val_loss: 3.3301 - val_mae: 1.2626 - 5s/epoch - 3ms/step
Epoch 8/100
1632/1632 - 5s - loss: 3.1791 - mae: 1.2433 - val_loss: 3.2931 - val_mae: 1.2568 - 5s/epoch - 3ms/step
Epoch 9/100
1632/1632 - 4s - loss: 3.1353 - mae: 1.2331 - val_loss: 3.2597 - val_mae: 1.2708

Epoch 72/100
1632/1632 - 7s - loss: 2.4556 - mae: 1.0686 - val_loss: 2.7847 - val_mae: 1.1340 - 7s/epoch - 4ms/step
Epoch 73/100
1632/1632 - 7s - loss: 2.4537 - mae: 1.0682 - val_loss: 2.7907 - val_mae: 1.1239 - 7s/epoch - 4ms/step
Epoch 74/100
1632/1632 - 5s - loss: 2.4498 - mae: 1.0668 - val_loss: 2.7814 - val_mae: 1.1400 - 5s/epoch - 3ms/step
Epoch 75/100
1632/1632 - 6s - loss: 2.4458 - mae: 1.0662 - val_loss: 2.7592 - val_mae: 1.1241 - 6s/epoch - 4ms/step
Epoch 76/100
1632/1632 - 6s - loss: 2.4388 - mae: 1.0645 - val_loss: 2.7568 - val_mae: 1.1241 - 6s/epoch - 4ms/step
Epoch 77/100
1632/1632 - 7s - loss: 2.4356 - mae: 1.0635 - val_loss: 2.7701 - val_mae: 1.1401 - 7s/epoch - 4ms/step
Epoch 78/100
1632/1632 - 7s - loss: 2.4334 - mae: 1.0628 - val_loss: 2.8003 - val_mae: 1.1579 - 7s/epoch - 4ms/step
Epoch 79/100
1632/1632 - 6s - loss: 2.4260 - mae: 1.0615 - val_loss: 2.7635 - val_mae: 1.1508 - 6s/epoch - 4ms/step
Epoch 80/100
1632/1632 - 7s - loss: 2.4280 - mae: 1.0613 - val_loss: 2.7

# Evaluate the preformance of the model

We evaluate the performace of the neural network using the Mean Absolute error metrics. After that we visualized the results on a spectrogram.

In [None]:
preds = model.predict(data['Xtest'])
loss = mean_absolute_error(data['Ytest'], preds)
print(f'Mean absolute error of test set:{loss}')

In [None]:
# Viz spectrograms
rec_spec = preds
spectrogram = data['Ytest']

cm='viridis'
fig, ax = plt.subplots(2, sharex=True)

#Plot spectrograms
ax[0].imshow(np.flipud(spectrogram.T), cmap=cm, interpolation=None,aspect='auto')
ax[0].set_ylabel('Log Mel-Spec Bin')
ax[1].imshow(np.flipud(rec_spec.T), cmap=cm, interpolation=None,aspect='auto')
ax[1].set_ylabel('Log Mel-Spec Bin')