# Data gathering

In [3]:
import mne
import numpy as np 
import matplotlib.pyplot as plt
import torch
import os
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score, train_test_split
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms

from Data import download_EEGBCI


ModuleNotFoundError: No module named 'Data'

In [None]:
if input("Are you sure you want to download the entire dataset? (Y/N)") in "Yy":
    total_subjects = list(range(1, 110))
    runs = list(range(1, 15))
    download_EEGBCI(total_subjects, runs, './EEGData', False)
    print("Downloaded everything!")

else:
    print("Download cancelled.")

In [5]:
main_folder = './EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0'
subdirectories = [f.path for f in os.scandir(main_folder) if f.is_dir()]

opened_files = []
closed_files = []

i = 0
for subdirectory in subdirectories:
    files = os.listdir(subdirectory)
    
    if len(files) > 0 and i not in [13, 50, 68, 96, 108]: # 13, 50, 68, 96, 108 idx does not have 9760 seq len
        for file in files:
            
            if file[-6:] == '01.edf':
                # This is data for eyes opened
                eyes_opened = os.path.join(subdirectory, file)
                print(eyes_opened)
                opened_files.append(eyes_opened)

            if file[-6:] == '02.edf':
                # This is data for eyes closed
                eyes_closed = os.path.join(subdirectory, file)
                print(eyes_closed)
                closed_files.append(eyes_closed)
    else:
        print(f"No files found in {subdirectory}")
    
    i += 1

./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S001\S001R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S001\S001R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S002\S002R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S002\S002R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S003\S003R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S003\S003R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S004\S004R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S004\S004R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S005\S005R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S005\S005R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S006\S006R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S006\S006R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S007\S007R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S007\S007R02.edf
./EEGData/MNE-eegbci-data/files/eegmmidb/1.0.0\S008\S008R01.edf
./EEGData/MNE-eegbci-data/files/eegmmidb

In [50]:
large_open_data = []    # This is a list of all the data for eyes opened
large_closed_data = []  # This is a list of all the data for eyes closed

for data in opened_files:
    value_open = mne.io.read_raw_edf(data, preload=True, verbose=False).get_data(verbose=False)
    if(len(value_open) == 64):
        large_open_data.append(value_open)

for data in closed_files:
    value_closed = mne.io.read_raw_edf(data, preload=True, verbose=False).get_data(verbose=False)
    if(len(value_closed) == 64):
        large_closed_data.append(value_closed)

print(large_open_data.__len__())   # This is the number of files for eyes opened
print(large_closed_data.__len__()) # This is the number of files for eyes closed

        
training_data_open = np.array(large_open_data)
training_data_closed = np.array(large_closed_data)
print(np.shape(training_data_open))
print(np.shape(training_data_closed))


104
104
(104, 64, 9760)
(104, 64, 9760)


In [51]:
def get_fft(data, nperseg=256, noverlap=128, channels=64):
    all_fft = []

    # Create window function
    window = torch.hann_window(nperseg, dtype=torch.float)

    for x in data:
        avg_psds_db = []
        
        for ch in range(channels):
            chx = x[ch]

            # Separate x into overlapping segments
            x_segs = chx.unfold(0, nperseg, nperseg - noverlap)

            # Apply window function to each segment
            windowed_segs = x_segs * window

            # Compute power spectral density for each windowed segment
            seg_psds = torch.fft.rfft(windowed_segs, dim=1)
            seg_psds = torch.abs(seg_psds)**2

            # Average PSDs over all segments
            avg_psds = torch.mean(seg_psds, axis=0)

            # Convert to decibels
            avg_psds_db.append(torch.log10(avg_psds + 1e-10))

        avg_psds_db = torch.stack(avg_psds_db)
        all_fft.append(avg_psds_db)

    all_fft = torch.stack(all_fft, dim=0)
    return all_fft



def normalize_data(input_data):
    
    # Scale the data to range 0-1
    input_data = (input_data - np.min(input_data)) / (np.max(input_data) - np.min(input_data))
    
    # Then scale from range 0-1 to range -1 to 1
    input_data = 2*input_data - 1
    
    return input_data


In [59]:

data_np = []
for idx,data in enumerate(training_data_closed):
    if idx in [3,6,7,9,14,15,16,18,19,22,23,25,26,27,28,29,32,33,40,41,42,45,46,48,49,51,52,56,57,59,63,66,70,71,73,75,76,77,78,80,89,95,99,100,101]: # filter out bad data
        data_np.append(data[:,3152:6304])

short_data_normalized = []
for d in data_np:
    channels = []
    for ch in range(64):
        channels.append(normalize_data(d[ch]))
    short_data_normalized.append(channels)

np.save('normalized-training-closed-64ch', short_data_normalized)


In [62]:

data_np = []
for idx,data in enumerate(training_data_open):
    if idx in [3,4,5,7,9,10,11,12,14,15,16,17,18,19,21,22,24,25,26,34,37,41,45,48,49,52,56,57,60,63,64,65,66,73,80,87,89,90,96,99,100,101]: # filter out bad data
        data_np.append(data[:,3152:6304])

short_data_normalized = []
for d in data_np:
    channels = []
    for ch in range(64):
        channels.append(normalize_data(d[ch]))
    short_data_normalized.append(channels)

np.save('normalized-training-open-64ch', short_data_normalized)
