In [1]:
from google.colab import drive
import pandas as pd
import torch
from torch.utils.data import DataLoader as DL
from torch.utils.data import TensorDataset as TData
import numpy as np
import scipy


drive.mount('/content/drive')
! unzip "/content/drive/MyDrive/BCICIV_2a_all_patients.csv.zip"
dataset = pd.read_csv("/content/drive/MyDrive/BCICIV_2a_all_patients.csv.zip") # loading in the csv dataset with pandas
dataset = dataset.drop(columns=['EEG-Fz','EEG-2', 'EEG-14', 'EEG-Pz', 'EEG-15', 'EEG-16'])
new_order = ['patient', 'time', 'label', 'epoch', 'EEG-5', 'EEG-0', 'EEG-C3', 'EEG-9',
       'EEG-10', 'EEG-6', 'EEG-1', 'EEG-Cz', 'EEG-11', 'EEG-12',
      'EEG-7', 'EEG-3', 'EEG-4', 'EEG-C4', 'EEG-13', 'EEG-8']
dataset = dataset[new_order]
#may need to change this when more degrees of freedom are added
binary_mi = dataset.loc[(dataset['label'] == 'left') | (dataset['label'] == 'right')]
binary_mi.label.unique() # check that our row filter worked
# get time period and check that it is constant throughout dataframe
t0 = binary_mi.time.iloc[:-1] # get time points except last one
t1 = binary_mi.time.iloc[1:] # series offest by one, so starting at 2nd time point until last
t0.reset_index(drop=True, inplace=True)
t1.reset_index(drop=True, inplace=True)
const_diff = ((t0 - t1 < -0.0039) & (t0 - t1 > -0.0041)) # checking that they are all around -0.004
const_diff[const_diff != True] # getting just the rows where time difference wasn't around -0.004
binary_mi.iloc[200:, 2] # time discrepancy seems to be caused by label change
pruned_mi = binary_mi.drop(columns = ['time', 'epoch'])
patients = binary_mi.patient.unique() # 1-9
labels = binary_mi.label.unique()     # left and right
left_hand = []
right_hand = []

# for each patient, get patient readings, convert to numpy array, and add
# to list corresponding to target (left or right)
for patient in patients:
  left_df = pruned_mi[(pruned_mi['patient'] == patient) & (pruned_mi['label'] == 'left')]
  right_df = pruned_mi[(pruned_mi['patient'] == patient) & (pruned_mi['label'] == 'right')]

  left_hand.append(left_df.iloc[:, 2:].to_numpy().T)
  right_hand.append(right_df.iloc[:, 2:].to_numpy().T)

right_hand[0].shape # shows shape of each list element
# get sequence length/length of each signal
left_len = left_hand[0].shape[1]
right_len = right_hand[0].shape[1]
# portion of sequence used for validation/testing set
left_eval_len = int(0.4 * left_hand[0].shape[1])
right_eval_len = int(0.4 * right_hand[0].shape[1])

# select random starting indexes for sub-sequences for validation and testing sets
left_hand_partition = np.random.randint(0, left_len - left_eval_len, len(left_hand))
right_hand_partition = np.random.randint(0, right_len - right_eval_len, len(right_hand))

# initialize lists to hold respective signals
left_train = []
left_val = []
left_test = []
right_train = []
right_val = []
right_test = []

# create training, validation, and testing sets for left hand signals
for i, num in enumerate(left_hand_partition):
  # get ending index of subsequence being cropped for validation and testing sets
  upper_end = num + left_eval_len
  # add portion of sequence excluding num:upper_end (portion used for evaluation)
  left_train.append(left_hand[i][:, list(range(num)) + list(range(upper_end, left_len))])
  # first half of subsequence used for validation
  left_val.append(left_hand[i][:, num:num+(left_eval_len//2)])
  # second half used for test set
  left_test.append(left_hand[i][:, num+(left_eval_len//2):num + left_eval_len])

# create training, validation, and testing sets for right hand signals
for i, num in enumerate(right_hand_partition):
  # get ending index of subsequence being cropped for validation and testing sets
  upper_end = num + right_eval_len
  # add portion of sequence excluding num:upper_end (portion used for evaluation)
  right_train.append(right_hand[i][:, list(range(num)) + list(range(upper_end, right_len))])
  # first half of subsequence used for validation
  right_val.append(right_hand[i][:, num:num+(right_eval_len//2)])
  # second half used for test set
  right_test.append(right_hand[i][:, num+(right_eval_len//2):num + right_eval_len])

# function to bandpass filter signals
def bandpass_filter(signal, crit_freq = [5, 40], sampling_freq = 125):
  order = 4
  b, a = scipy.signal.butter(order, crit_freq, btype = 'bandpass', fs = sampling_freq)
  processed_signal = scipy.signal.filtfilt(b, a, signal, 1)
  return processed_signal

# segment a signal using sliding window technique, specifying sample frequency, window size, and window shift
def segmentation(signal, sampling_freq=125, window_size=1, window_shift=0.016):
  w_size = int(sampling_freq * window_size)
  w_shift = int(sampling_freq * window_shift)
  segments = []
  i = 0
  while i + w_size <= signal.shape[1]:
    segments.append(signal[:, i: i + w_size])
    i += w_shift
  return segments

# apply preprocessing steps in sequence to each signal
def preprocess(signals, crit_freq=[5,35], fs=250):
  preprocessed = []
  for signal in signals:
    # perform bandpass filter on each signal
    filtered_signal = bandpass_filter(signal, crit_freq, fs)
    # channel-wise z-score normalization
    normed_signal = (filtered_signal - filtered_signal.mean(1, keepdims=True)) / filtered_signal.std(1, keepdims=True)
    # segmentation of signals
    segments = segmentation(normed_signal, fs)
    # add signals to list
    preprocessed.extend(segments)
  return preprocessed
# generate preprocessed segments for training, validation, and testing
train_left = preprocess(left_train)
val_left = preprocess(left_val)
test_left = preprocess(left_test)
train_right = preprocess(right_train)
val_right = preprocess(right_val)
test_right = preprocess(right_test)
# combine left and right signals to generate signal training, validation, and testing set
train_eeg = train_left + train_right
val_eeg = val_left + val_right
test_eeg = test_left + test_right
train_labels = [0 for i in range(len(train_left))] + [1 for i in range(len(train_right))]
val_labels = [0 for i in range(len(val_left))] + [1 for i in range(len(val_right))]
test_labels = [0 for i in range(len(test_left))] + [1 for i in range(len(test_right))]
# create torch tensor of zeros to hold data
train_eeg_tensor = torch.zeros((len(train_eeg), train_eeg[0].shape[0], train_eeg[0].shape[1]))
valid_eeg_tensor = torch.zeros((len(val_eeg), val_eeg[0].shape[0], val_eeg[0].shape[1]))
test_eeg_tensor = torch.zeros((len(test_eeg), test_eeg[0].shape[0], test_eeg[0].shape[1]))

# add each sample in train, validation, and test lists to appropriate tensor at correct index
for i in range(len(train_eeg)):
  tens = torch.from_numpy(train_eeg[i].copy())
  train_eeg_tensor[i] = tens
for i in range(len(val_eeg)):
  tens = torch.from_numpy(val_eeg[i].copy())
  valid_eeg_tensor[i] = tens
for i in range(len(test_eeg)):
  tens = torch.from_numpy(test_eeg[i].copy())
  test_eeg_tensor[i] = tens

# create zero tensor for one hot encoded labels
#train_labels should now equal 12
train_label_tensor = torch.zeros(len(train_labels), 2)
valid_label_tensor = torch.zeros(len(val_labels), 2)
test_label_tensor = torch.zeros(len(test_labels), 2)


# add labels to tensor at correct index
for i in range(len(train_labels)):
  label = train_labels[i]
  train_label_tensor[i][label] = 1
for i in range(len(val_labels)):
  label = val_labels[i]
  valid_label_tensor[i][label] = 1
for i in range(len(test_labels)):
  label = test_labels[i]
  test_label_tensor[i][label] = 1

# convert input, target tensors to Tensor Dataset from torch
train_ds = TData(train_eeg_tensor, train_label_tensor)
valid_ds = TData(valid_eeg_tensor, valid_label_tensor)
test_ds = TData(test_eeg_tensor, test_label_tensor)
# create dataloaders to hold batched data (batch size chosen was 64)
train_dl = DL(train_ds, batch_size=64, shuffle= True, drop_last = True)
valid_dl = DL(valid_ds, batch_size=64, shuffle= True, drop_last = True)
test_dl = DL(test_ds, batch_size=64, shuffle = True, drop_last = True)

ModuleNotFoundError: No module named 'google'