In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mne
import os
import seaborn as sns
from glob import glob
import warnings
from autopreprocess_pipeline import *
from autopreprocessing import dataset as ds

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPool1D, Dense, Flatten, Dropout

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
main_path = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\'

In [3]:
df = pd.read_csv(os.path.join(main_path, 'TDBRAIN_participants.csv')) # convert the .xlsx file into a .csv file beforehand
df_subset = df[['participants_ID', 'formal_status']] # only participants' ID and their status are needed from all columns
df_filtered = df_subset[df_subset['formal_status'].isin(['HEALTHY', 'ADHD'])] # out of the 5+ statuses (classes), only healthy and adhd ones are needed

filtered_file_path = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_ID_and_status.csv'
df_filtered.to_csv(filtered_file_path, index=False) # save the .csv file

In [4]:
import shutil

folders_path = os.path.join(main_path, 'TDBRAIN_derivatives_csv\\derivatives')

folders = os.listdir(folders_path)

participant_ids = df_filtered['participants_ID'].tolist() # only the healthy and adhd participants' folders are needed. their IDs are contained in the
                                                          # participant_ID column of the df_filtered data frame

for folder in folders:
    if folder not in participant_ids:
        folder_path = os.path.join(folders_path, folder)
        if os.path.isdir(folder_path):
            print(f"Removing folder: {folder}")
            shutil.rmtree(folder_path)
        else:
            print(f"Skipping {folder} as it is not a directory.")

# the dataset is now significantly smaller, from 90 GB to 8 GB

In [5]:
df_filtered
df_filtered.head()

Unnamed: 0,participants_ID,formal_status
245,sub-87974617,HEALTHY
246,sub-87974621,HEALTHY
247,sub-87974665,HEALTHY
248,sub-87974709,HEALTHY
249,sub-87974841,HEALTHY


In [6]:
text = 'Fp1,Fp2,F7,F3,Fz,F4,F8,FC3,FCz,FC4,T7,C3,Cz,C4,T8,CP3,CPz,CP4,P7,P3,Pz,P4,P8,O1,Oz,O2,VPVA,VNVB,HPHL,HNHR,Erbs,OrbOcc,Mass' # electrode names copied from one .csv eeg recordings file
channel_names = text.split(',')  # split electrode names with commas
channel_names = [f"{name}" for name in channel_names] # create a list of strings 
print(channel_names)

['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FC3', 'FCz', 'FC4', 'T7', 'C3', 'Cz', 'C4', 'T8', 'CP3', 'CPz', 'CP4', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'Oz', 'O2', 'VPVA', 'VNVB', 'HPHL', 'HNHR', 'Erbs', 'OrbOcc', 'Mass']


In [7]:
sourcepath = folders_path
preprocpath = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects'

In [8]:
varargsin = {
    'sourcepath' : folders_path,
    'preprocpath' : preprocpath
}

In [9]:
autopreprocess_standard(varargsin=varargsin)

D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\derivatives
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\processed_subjects
250 files listed
122 subjects
[INFO]: processing subject: 0 of 122
sub-87974617_ses-1_task-restEC_eeg.csv
Eye artifact correction: correcting 59 VEOG eye artifact(s)
Eye artifact correction: correcting 15 HEOG eye artifact(s)
EMG detection: detected 5 artifact(s)
Jump/ baseline shift : 0 jumps/baselineshifts detected
kurtosis: 1 samples with kurtosis detected
swing-detection: 1 samples with extreme voltage swing detected
EB detection: detected 0 artifact(s)
saving data 

(1, 32, 60287)
1
3
HERE
sub-87974617_ses-1_task-restEO_eeg.csv
Eye artifact correction: correcting 29 VEOG eye artifact(s)
Eye artifact correction: correcting 57 HEOG eye artifact(s)
EMG detection: detected 12 artifact(s)
Jump/ baseline shift : 0 jumps/baselineshifts detected
kurtosis: 0 samples with kurtosis detected
swing-detection: 0 samples with swing-detection
EB detection:

In [6]:
def chop_string_from_end(string, char, flip):
    


    index = string.rfind(char)
    if not flip:
        if index != -1:
            chopped_string = string[index:]
        else:
            chopped_string = string

        return chopped_string
    else:
        if index != -1:
            chopped_string = string[:index]
        else:
            chopped_string = string

        return chopped_string



In [24]:
def segment_csv(path_to_file, path_to_dir, window_length=5000, stride=500):
   
    print(path_to_file)
    print(path_to_dir)
    df = pd.read_csv(path_to_file)
    
    df = df.drop(columns=['artifacts', 'VEOG', 'HEOG', 'Erbs', 'OrbOcc', 'Mass'], axis=1)
    i=0 
    for i in range(110):
        #while (df.iloc[i*STRIDE + WINDOW_LENGTH] is not None):
        sub_df = df.iloc[i*stride : i*stride + window_length]
        i+=1
        subject_name = chop_string_from_end(path_to_file,"\\", flip=0)
        clean_name = chop_string_from_end(subject_name,"eeg_csv",flip=1)

        #print(clean_name)
        seg_path = path_to_dir + "\\" + clean_name + "_seg_" + str(i) + ".csv"

        # print(f"saving segment {i} of the patient to {seg_path}, window that captures row {i*stride} to {i*stride + window_length}")
        
        
        sub_df.to_csv(seg_path)       
    

In [8]:
path_to_dir = "D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\segmented_subjects"

In [7]:

def find_csv_files(directory):
    """
    Recursively searches for .csv files in the given directory and its subdirectories.
    Returns a list of paths to the found .csv files.
    """
    csv_files = []

    # Traverse through all the files and directories in the given directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file has a .csv extension
            if file.endswith(".csv"):
                # If found, append the path to the list of csv_files
                csv_files.append(os.path.join(root, file))
                
    return csv_files            


In [10]:
csv_files = find_csv_files("D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects")

In [11]:
csv_files

['D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects\\sub-87974617\\ses-1\\eeg\\csv_data_csv_120.574s\\sub-87974617_ses-1_task-restEC_eeg_csv_120.574scsv.csv',
 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects\\sub-87974617\\ses-1\\eeg\\csv_data_csv_120.578s\\sub-87974617_ses-1_task-restEO_eeg_csv_120.578scsv.csv',
 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects\\sub-87974621\\ses-1\\eeg\\csv_data_csv_120.574s\\sub-87974621_ses-1_task-restEO_eeg_csv_120.574scsv.csv',
 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects\\sub-87974621\\ses-1\\eeg\\csv_data_csv_120.64s\\sub-87974621_ses-1_task-restEC_eeg_csv_120.64scsv.csv',
 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects\\sub-87974665\\ses-1\\eeg\\csv_data_csv_120.572s\\sub-87974665_ses-1_task-restEO_eeg_csv_120.572scsv.csv',
 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\

In [20]:
for file in csv_files: 
    segment_csv(path_to_file = file, path_to_dir = path_to_dir)

D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\processed_subjects\sub-87974617\ses-1\eeg\csv_data_csv_120.574s\sub-87974617_ses-1_task-restEC_eeg_csv_120.574scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\segmented_subjects
\sub-87974617_ses-1_task-restEC_
saving segment 1 of the patient to D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\segmented_subjects\\sub-87974617_ses-1_task-restEC__seg_1.csv, window that captures row 1000 to 6000
\sub-87974617_ses-1_task-restEC_
saving segment 2 of the patient to D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\segmented_subjects\\sub-87974617_ses-1_task-restEC__seg_2.csv, window that captures row 2000 to 7000
\sub-87974617_ses-1_task-restEC_
saving segment 3 of the patient to D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\segmented_subjects\\sub-87974617_ses-1_task-restEC__seg_3.csv, window that captures row 3000 to 8000
\sub-87974617_ses-1_task-restEC_
saving segment 4 of the patient to D:\TDBR

In [8]:
print(df_filtered)
df_dict = df_filtered.set_index('participants_ID')['formal_status'].to_dict()
df_dict

     participants_ID formal_status
245     sub-87974617       HEALTHY
246     sub-87974621       HEALTHY
247     sub-87974665       HEALTHY
248     sub-87974709       HEALTHY
249     sub-87974841       HEALTHY
...              ...           ...
1265    sub-88073205          ADHD
1277    sub-88074021          ADHD
1292    sub-88075053       HEALTHY
1293    sub-88075101          ADHD
1321    sub-88076989          ADHD

[124 rows x 2 columns]


{'sub-87974617': 'HEALTHY',
 'sub-87974621': 'HEALTHY',
 'sub-87974665': 'HEALTHY',
 'sub-87974709': 'HEALTHY',
 'sub-87974841': 'HEALTHY',
 'sub-87974973': 'HEALTHY',
 'sub-87976193': 'HEALTHY',
 'sub-87976369': 'HEALTHY',
 'sub-87976413': 'HEALTHY',
 'sub-87976457': 'HEALTHY',
 'sub-87976461': 'HEALTHY',
 'sub-87976505': 'HEALTHY',
 'sub-87976641': 'HEALTHY',
 'sub-87976773': 'HEALTHY',
 'sub-87976817': 'HEALTHY',
 'sub-87976953': 'HEALTHY',
 'sub-87977045': 'HEALTHY',
 'sub-87980197': 'HEALTHY',
 'sub-87980241': 'HEALTHY',
 'sub-87980329': 'HEALTHY',
 'sub-87980373': 'HEALTHY',
 'sub-87980417': 'HEALTHY',
 'sub-87980689': 'HEALTHY',
 'sub-87980869': 'HEALTHY',
 'sub-87980913': 'HEALTHY',
 'sub-87982225': 'HEALTHY',
 'sub-87982849': 'HEALTHY',
 'sub-88008997': 'HEALTHY',
 'sub-88015117': 'ADHD',
 'sub-88015565': 'ADHD',
 'sub-88024205': 'ADHD',
 'sub-88025421': 'ADHD',
 'sub-88025917': 'ADHD',
 'sub-88026949': 'ADHD',
 'sub-88028433': 'ADHD',
 'sub-88029425': 'ADHD',
 'sub-88029557':

In [10]:
source_dir = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\processed_subjects'
target_dir_adhd = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\adhd'
target_dir_healthy = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\healthy'

In [11]:

for subject in os.listdir(source_dir):
   
    # index = file.rfind("/")
    # subject_name = file[index+1:]
    # subject_index = subject_name.find("_")
    # subject_name = subject_name[:subject_index]
   

    if df_dict[subject] == "HEALTHY":
        shutil.move(os.path.join(source_dir, subject), target_dir_healthy)
    elif df_dict[subject] == "ADHD":
        shutil.move(os.path.join(source_dir, subject), target_dir_adhd)   
                          


In [15]:
adhd_dir = os.listdir(target_dir_adhd)
healthy_dir = os.listdir(target_dir_healthy)

In [18]:
adhd_dir = [os.path.join(target_dir_adhd,subject) for subject in adhd_dir]
healthy_dir = [os.path.join(target_dir_healthy,subject) for subject in healthy_dir]

In [19]:
training_path = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\training'
validation_path = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\validation'
testing_path = 'D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\testing'


In [20]:

for i in range (len(healthy_dir)): #healthy_dir is the smaller set, so we will balance by using its length
    if i<31: # alternate between healthy and adhd subject and add 31 samples of each into training data 
        shutil.move(healthy_dir[i], training_path)
        shutil.move(adhd_dir[i], training_path)
    elif i >= 31 and i < 39:  # alternate between healthy and adhd subject and add 8 samples of each into validation data 
        shutil.move(healthy_dir[i], validation_path)
        shutil.move(adhd_dir[i], validation_path)
    elif i>=39 and i < 47: # alternate between healthy and adhd subject and add 8 samples of each into testing data
        shutil.move(healthy_dir[i], testing_path)
        shutil.move(adhd_dir[i], testing_path)

In [21]:
training_csv_files = find_csv_files(training_path)
validation_csv_files = find_csv_files(validation_path)
testing_csv_files = find_csv_files(testing_path)

In [9]:
training_segmented_path =   "D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\training_segmented"
validation_segmented_path =  "D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\validation_segmented"
testing_segmented_path = "D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\testing_segmented"

In [25]:
for training_file in training_csv_files:
    segment_csv(path_to_file = training_file, path_to_dir = training_segmented_path)

D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training\sub-87974617\ses-1\eeg\csv_data_csv_120.574s\sub-87974617_ses-1_task-restEC_eeg_csv_120.574scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training\sub-87974617\ses-1\eeg\csv_data_csv_120.578s\sub-87974617_ses-1_task-restEO_eeg_csv_120.578scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training\sub-87974621\ses-1\eeg\csv_data_csv_120.574s\sub-87974621_ses-1_task-restEO_eeg_csv_120.574scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training\sub-87974621\ses-1\eeg\csv_data_csv_120.64s\sub-87974621_ses-1_task-restEC_eeg_csv_120.64scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\training_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_de

In [None]:
for validation_file in validation_csv_files:
    segment_csv(path_to_file = validation_file, path_to_dir = validation_segmented_path)
   

D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation\sub-88042705\ses-1\eeg\csv_data_csv_120.016s\sub-88042705_ses-1_task-restEC_eeg_csv_120.016scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation\sub-88042705\ses-1\eeg\csv_data_csv_120.016s\sub-88042705_ses-1_task-restEO_eeg_csv_120.016scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation\sub-88044189\ses-1\eeg\csv_data_csv_120.016s\sub-88044189_ses-1_task-restEC_eeg_csv_120.016scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation\sub-88044189\ses-1\eeg\csv_data_csv_120.016s\sub-88044189_ses-1_task-restEO_eeg_csv_120.016scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\validation_segmented
D:\TDBRAIN_dataset\TDBRA

In [None]:
for testing_file in testing_csv_files:
    segment_csv(path_to_file = testing_file, path_to_dir = testing_segmented_path) 

D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing\sub-88052645\ses-1\eeg\csv_data_csv_119.976s\sub-88052645_ses-1_task-restEC_eeg_csv_119.976scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing\sub-88052645\ses-1\eeg\csv_data_csv_119.986s\sub-88052645_ses-1_task-restEO_eeg_csv_119.986scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing\sub-88053137\ses-1\eeg\csv_data_csv_119.652s\sub-88053137_ses-1_task-restEO_eeg_csv_119.652scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing\sub-88053137\ses-1\eeg\csv_data_csv_119.674s\sub-88053137_ses-1_task-restEC_eeg_csv_119.674scsv.csv
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivatives_csv\testing_segmented
D:\TDBRAIN_dataset\TDBRAIN_data\TDBRAIN_derivati

In [19]:
training_filepaths = [os.path.join(training_segmented_path,file) for file in os.listdir(training_segmented_path)]
validation_filepaths = [os.path.join(validation_segmented_path,file) for file in os.listdir(validation_segmented_path)]
testing_filepaths = [os.path.join(testing_segmented_path,file) for file in os.listdir(testing_segmented_path)]

X_train = []
X_val = []
X_test = [] 

y_train = []
y_val = []
y_test = [] 

In [20]:
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler() 

In [26]:
warnings.filterwarnings('ignore')
from scipy.stats import kurtosis,skew, entropy
from scipy.signal import welch 

def extract_features(segment):
    mean_values = segment.mean()
    std_values = segment.std()
    skewness_values = segment.apply(skew)
    kurtosis_values = segment.apply(kurtosis)
    entropy_values = segment.apply(lambda x: entropy(np.histogram(x,bins='auto')[0]))

    feature_df = pd.DataFrame()
    
    for col_name in segment.columns:
        feature_df.loc[0,f'mean_{col_name}'] = mean_values[col_name]
        feature_df.loc[0,f'std_{col_name}'] = std_values[col_name]
        feature_df.loc[0,f'skew_{col_name}'] = skewness_values[col_name]
        feature_df.loc[0,f'kurtosis_{col_name}'] = kurtosis_values[col_name]
        feature_df.loc[0,f'entropy_{col_name}'] = entropy_values[col_name] 
    freq_values, psd_values = welch(segment, axis=0)
    
    for i, col_name in enumerate(segment.columns):
        for j, freq in enumerate(freq_values):
            feature_df.loc[0,f'psd_{col_name}_{freq:.2f}Hz'] = psd_values[j, i]
            
  
    return feature_df

In [None]:
warnings.filterwarnings('ignore')
for csv in training_filepaths: 
    
    index = csv.rfind("\\")
    subject_name = csv[index+1:]
    subject_index = subject_name.find("_")
    subject_name = subject_name[:subject_index]
   

    if df_dict[subject_name] == "HEALTHY":
        y_train.append(1)
    elif df_dict[subject_name] == "ADHD":
         y_train.append(0)

    
    segment = pd.read_csv(csv)
    segment = segment.drop(columns = ['Unnamed: 0.1', 'Unnamed: 0'])

    feature_file = extract_features(segment)
        
    segment_as_numpy = feature_file.to_numpy(dtype=np.float32)
    
    scaler.fit(segment_as_numpy)
    scaled_segment = scaler.transform(segment_as_numpy)
        
    X_train.append(scaled_segment)

print (f"Training data loaded! Training set: {len(X_train)}, labels: {len(y_train)}")

In [None]:
warnings.filterwarnings('ignore')
for csv in validation_filepaths: 
    
    index = csv.rfind("\\")
    subject_name = csv[index+1:]
    subject_index = subject_name.find("_")
    subject_name = subject_name[:subject_index]
   

    if df_dict[subject_name] == "HEALTHY":
        y_val.append(1)
    elif df_dict[subject_name] == "ADHD":
         y_val.append(0)

    
    segment = pd.read_csv(csv)
    segment = segment.drop(columns = ['Unnamed: 0.1', 'Unnamed: 0'])
    
    feature_file = extract_features(segment)
        
    segment_as_numpy = feature_file.to_numpy(dtype=np.float32)

    print(feature_file)
    
    scaler.fit(segment_as_numpy)
    scaled_segment = scaler.transform(segment_as_numpy)
        
    X_val.append(scaled_segment)

print (f"Validation data loaded! Validation set: {len(X_val)}, labels: {len(y_val)}")

In [None]:
warnings.filterwarnings('ignore')
for csv in testing_filepaths: 
    
    index = csv.rfind("\\")
    subject_name = csv[index+1:]
    subject_index = subject_name.find("_")
    subject_name = subject_name[:subject_index]
   

    if df_dict[subject_name] == "HEALTHY":
        y_test.append(1)
    elif df_dict[subject_name] == "ADHD":
         y_test.append(0)

    
    segment = pd.read_csv(csv)
    segment = segment.drop(columns = ['Unnamed: 0.1', 'Unnamed: 0'])
    
    feature_file = extract_features(segment)
    segment_as_numpy = feature_file.to_numpy(dtype=np.float32)
    
    scaler.fit(segment_as_numpy)
    scaled_segment = scaler.transform(segment_as_numpy)
        
    X_test.append(scaled_segment)

print (f"Testing data loaded! Testing set: {len(X_test)}, labels: {len(y_test)}")

In [16]:
for i in range (len(X_train)): # Make sure data points have the same shape, drop segments which have other shapes
    if X_train[i].shape[0] != 5000:
        X_train.pop(i)
        y_train.pop(i)

In [17]:

for i in range (len(X_test)):
    if X_test[i].shape[0] != 5000:
        X_test.pop(i)
        y_test.pop(i)

In [18]:
for i in range (len(X_val)):
    if X_val[i].shape[0] != 5000:
        X_val.pop(i)
        y_val.pop(i)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\X_train_feat.npy', X_train)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\X_val_feat.npy', X_val)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\X_test_feat.npy', X_test)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\y_train.npy', y_train)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\y_val.npy', y_val)

In [None]:
np.save('D:\\TDBRAIN_dataset\\TDBRAIN_data\\TDBRAIN_derivatives_csv\\y_test.npy', y_test)

In [26]:
print(X_train[0])

[[-0.06853217  0.13024262 -0.26404363 ... -0.2552741  -0.30648053
  -0.24734335]
 [ 0.18882382  0.46046364 -0.17605898 ... -0.12242406 -0.09219442
  -0.150283  ]
 [ 0.07247509  0.47129804 -0.47226825 ... -0.24597633 -0.13356379
  -0.299339  ]
 ...
 [-0.54245245 -0.36154056 -1.5062174  ...  1.6792283   1.558501
   0.26918095]
 [-0.30273855  0.02557625 -1.2416021  ...  1.7155733   1.5622216
   0.24125347]
 [-0.05825629  0.342113   -0.7361453  ...  1.9700726   1.7540317
   0.503733  ]]
