<a href="https://colab.research.google.com/github/Janina712/MLTSA_FinalProject/blob/main/Preprocess_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [1]:
pip install mne matplotlib

Collecting mne
  Downloading mne-1.0.2-py3-none-any.whl (7.5 MB)
[K     |████████████████████████████████| 7.5 MB 13.6 MB/s 
Installing collected packages: mne
Successfully installed mne-1.0.2


In [42]:
import numpy as np
import mne 
import pandas as pd
import os
import pylab as pl
import re
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from tqdm import tqdm
import shutil
from collections import Counter, defaultdict

from joblib import Parallel, delayed

tqdm.pandas()

Uploading files

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
drive_tuh_eeg_loc = Path('/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg')

In [6]:
working_folder = Path('/content/drive/MyDrive/Classes/22S-PHYS-667/project/temp_data')

Creating a dataframe that will contain the name of all the files

In [7]:
downloaded_files = list(Path(drive_tuh_eeg_loc).glob('*'))
downloaded_files[:3]

[PosixPath('/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg/train__02_tcp_le__058__00005804__s002_2008_11_05__00005804_s002_t003.tse'),
 PosixPath('/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg/train__02_tcp_le__058__00005804__s002_2008_11_05__00005804_s002_t004.tse'),
 PosixPath('/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg/train__02_tcp_le__058__00005804__s002_2008_11_05__00005804_s002_t005.tse')]

In [8]:
# how many files we have in total
len(downloaded_files)

13270

In [None]:
files_raw_df = pd.DataFrame(
  [fpath.name for fpath in downloaded_files], 
  columns = ['filename']
)

In [None]:
files_raw_df

Unnamed: 0,filename
0,train__02_tcp_le__058__00005804__s002_2008_11_...
1,train__02_tcp_le__058__00005804__s002_2008_11_...
2,train__02_tcp_le__058__00005804__s002_2008_11_...
3,train__02_tcp_le__058__00005804__s002_2008_11_...
4,train__02_tcp_le__060__00006083__s003_2010_10_...
...,...
13265,dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13266,dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13267,dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13268,dev__01_tcp_ar__085__00008512__s002_2012_01_09...


In [None]:
files_raw_df['filename'][4].split('__')

['train',
 '02_tcp_le',
 '060',
 '00006083',
 's003_2010_10_11',
 '00006083_s003_t001.tse']

In [None]:
def split_fname(row):

  split_row_str = row['filename'].split('__')

  fname, file_ext = split_row_str[-1].split('.')

  row['category'] = split_row_str[0]
  row['1_idk'] = split_row_str[1]
  row['2_idk'] = split_row_str[2]
  row['3_idk'] = split_row_str[3]
  row['4_idk'] = split_row_str[4]
  row['file_name'] = fname #fname WITHOUT file extension
  row['file_ext'] = file_ext #file extension (.edf or .tse)

  return row

In [None]:
# not the most efficient way, but it's expressive

files_raw_df = files_raw_df.progress_apply(
  split_fname, 
  axis=1
)

100%|██████████| 13270/13270 [01:06<00:00, 198.96it/s]


In [None]:
# saving result
files_raw_df.to_pickle(
    Path(working_folder).joinpath('files_raw_df.pkl')
)

In [9]:
# loading result

files_raw_df = pd.read_pickle(
    Path(working_folder).joinpath('files_raw_df.pkl')
)
files_raw_df = files_raw_df.rename(
  columns = {
    '1_idk':'configuration',
    '2_idk':'index',
    '3_idk':'patient_#',
    '4_idk':'session'
  }
)

Creating a dictionary with edf files.

In [10]:
files_raw_df['filename']

0        train__02_tcp_le__058__00005804__s002_2008_11_...
1        train__02_tcp_le__058__00005804__s002_2008_11_...
2        train__02_tcp_le__058__00005804__s002_2008_11_...
3        train__02_tcp_le__058__00005804__s002_2008_11_...
4        train__02_tcp_le__060__00006083__s003_2010_10_...
                               ...                        
13265    dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13266    dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13267    dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13268    dev__01_tcp_ar__085__00008512__s002_2012_01_09...
13269    dev__01_tcp_ar__085__00008512__s002_2012_01_09...
Name: filename, Length: 13270, dtype: object

In [None]:
file_path = drive_tuh_eeg_loc.joinpath(files_raw_df['filename'][7332])
file_path

PosixPath('/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg/train__02_tcp_le__053__00005347__s001_2008_11_24__00005347_s001_t001.edf')

In [None]:
edf_fnames_list = files_raw_df[
  files_raw_df['file_ext'] == 'edf'
]['filename']

raw_files_dict = dict()
for fname in tqdm(edf_fnames_list[:50]):
  data_obj = mne.io.read_raw_edf(
    file_path,
    verbose=False,
  )
  raw_files_dict[fname] = data_obj

100%|██████████| 50/50 [00:01<00:00, 25.79it/s]


In [None]:
## access by filename
raw_files_dict[
  'train__03_tcp_ar_a__072__00007221__s003_2010_11_01__00007221_s003_t004.edf'
]

0,1
Measurement date,"November 24, 2008 13:24:15 GMT"
Experimenter,Unknown
Digitized points,0 points
Good channels,41 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available
Sampling frequency,250.00 Hz
Highpass,0.00 Hz
Lowpass,125.00 Hz


Filling our dataframe with the data from time series.

In [20]:
#The channels that should overlap across different sessions based on the article CITE 
channels = [
  'EEG FP1-REF',
  'EEG FP2-REF',
  'EEG F3-REF',
  'EEG F4-REF',
  'EEG C3-REF',
  'EEG C4-REF',
  'EEG P3-REF',
  'EEG P4-REF',
  'EEG O1-REF',
  'EEG O2-REF',
  'EEG F7-REF',
  'EEG F8-REF',
  'EEG T3-REF',
  'EEG T4-REF',
  'EEG T5-REF',
  'EEG T6-REF',
  'EEG FZ-REF',
  'EEG CZ-REF',
  'EEG PZ-REF',
  'EEG A1-REF',
  'EEG A2-REF'
]

In [72]:
work_data = files_raw_df.copy()

From here we will take a subset of the data: only AR configuration and get rid of the tse names.

In [78]:
work_data_AR = work_data[
  (work_data['configuration'] == '01_tcp_ar') & \
  (work_data['file_ext'] == 'edf')
]

In [79]:
work_data_AR

Unnamed: 0,filename,category,configuration,index,patient_#,session,file_name,file_ext
8000,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,00010563,s002_2013_07_17,00010563_s002_t003,edf
8001,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,00010563,s002_2013_07_17,00010563_s002_t004,edf
8002,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,00010563,s002_2013_07_17,00010563_s002_t005,edf
8003,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,00010563,s003_2013_07_17,00010563_s003_t000,edf
8004,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,00010563,s003_2013_07_17,00010563_s003_t001,edf
...,...,...,...,...,...,...,...,...
13265,dev__01_tcp_ar__085__00008512__s002_2012_01_09...,dev,01_tcp_ar,085,00008512,s002_2012_01_09,00008512_s002_t000,edf
13266,dev__01_tcp_ar__085__00008512__s002_2012_01_09...,dev,01_tcp_ar,085,00008512,s002_2012_01_09,00008512_s002_t001,edf
13267,dev__01_tcp_ar__085__00008512__s002_2012_01_09...,dev,01_tcp_ar,085,00008512,s002_2012_01_09,00008512_s002_t002,edf
13268,dev__01_tcp_ar__085__00008512__s002_2012_01_09...,dev,01_tcp_ar,085,00008512,s002_2012_01_09,00008512_s002_t003,edf


## `Preprocessing`

In [71]:
def preprocessing(file, text_output=False):
  #read the data
  data = mne.io.read_raw_edf(
      file, 
      verbose=text_output
  )

  data = data.copy().load_data()
  
  #notch filter 60 Hz - to get rid of the noise because of the power outlets
  eeg_picks = mne.pick_types(data.info, eeg=True)
  freqs = []
  for i in range(0, int(data.info['sfreq']/2), 60):
    if i != 0:
      freqs.append(i)
  data_notch = data.notch_filter(freqs=np.asarray(freqs), picks=eeg_picks, verbose=text_output)

  #high pass filter
  data_filter = data_notch.filter(l_freq=0.2, h_freq=None, verbose=text_output)

  #downsampling
  data_done = data_filter.resample(sfreq=200, verbose=text_output)

  #channel reduction
  #print(data.ch_names)
  kick_out = "yes"
  channels2drop = []
  for j in range(0, len(data_done.ch_names)):
    for i in range(0, len(channels)):
      if data_done.ch_names[j] == channels[i]:
        kick_out = "no"
        break
      else:
        kick_out = "yes"
    if kick_out == "yes":
      #print(f"kick out {j}")
      channels2drop.append(j)
    #else:
      #print(f"keep {j}")
    names2drop = []
  for i in range(0,len(channels2drop)):
    names2drop.append(data_done.ch_names[channels2drop[i]])
  for drop in names2drop:
    data_done.drop_channels(data_done.ch_names[data_done.ch_names.index(drop)])

  return data_done

### working with dicts

In [81]:
edf_fnames_list = work_data_AR['filename']
len(edf_fnames_list)

4913

In [82]:
def Extract_bands(fname):

  #for fname in tqdm(fnames_list):
  file_path = drive_tuh_eeg_loc.joinpath(fname)
  dict_key = file_path.stem.split('__')[-1]
  print(dict_key)

  timeseries = preprocessing(file_path)

  channels_dict = dict(
    zip(
      timeseries.ch_names,
      timeseries.get_data()
    )
  )
    
    #freqs_dict[dict_key] = channels_dict

  return (dict_key, channels_dict)

#### Parallel processing

In [39]:
import multiprocessing

In [41]:
n_cpus = multiprocessing.cpu_count()
n_cpus

2

In [None]:
%%time
results = Parallel(n_jobs=n_cpus)(delayed(Extract_bands)(fname) for fname in tqdm(edf_fnames_list[:]))

  3%|▎         | 152/4913 [02:36<2:52:15,  2.17s/it]

In [60]:
channels_small_df = pd.DataFrame(dict(results)).T

In [62]:
channels_small_df.shape

(20, 19)

### `.tse` files

In [116]:
fnames_list[8000]

'train__01_tcp_ar__105__00010563__s002_2013_07_17__00010563_s002_t003.edf'

In [125]:
tse_dict = dict()

for fname in tqdm(fnames_list):
  file_path = drive_tuh_eeg_loc.joinpath(fname).with_suffix('.tse')
  dict_key = file_path.stem.split('__')[-1]

  labels = pd.read_csv(
    file_path,
    sep=" ", 
    index_col=False, 
    header=0, 
    names = ['start','stop','seizure']
  ).to_numpy()

  tse_dict[dict_key] = labels

  return func(*args, **kwargs)
100%|██████████| 20/20 [00:00<00:00, 138.06it/s]


In [126]:
tse_dict

{'00010563_s002_t003': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s002_t004': array([[0.0, 300.0, 'bckg']], dtype=object),
 '00010563_s002_t005': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s003_t000': array([[0.0, 300.0, 'bckg']], dtype=object),
 '00010563_s003_t001': array([[0.0, 30.0, 'bckg']], dtype=object),
 '00010563_s003_t002': array([[0.0, 562.0, 'bckg']], dtype=object),
 '00010563_s003_t003': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s003_t005': array([[0.0, 775.0, 'bckg']], dtype=object),
 '00010563_s003_t006': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s003_t007': array([[0.0, 300.0, 'bckg']], dtype=object),
 '00010563_s003_t008': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s004_t000': array([[0.0, 300.0, 'bckg']], dtype=object),
 '00010563_s004_t001': array([[0.0, 601.0, 'bckg']], dtype=object),
 '00010563_s004_t002': array([[0.0, 458.0, 'bckg']], dtype=object),
 '00010584_s003_t000': array([[0.0, 3526.0, 'bckg

### Multiple seizure example

In [128]:
## sometimes we may have multiple seizures in one .tse file...

labels = pd.read_csv(
  '/content/drive/MyDrive/Classes/22S-PHYS-667/project/isip_tuh_eeg/train__01_tcp_ar__104__00010418__s012_2014_05_30__00010418_s012_t001.tse', 
  sep=" ", 
  index_col=False, 
  header=0, 
  names = ['start','stop','seizure']
)

  return func(*args, **kwargs)


In [130]:
labels

Unnamed: 0,start,stop,seizure
0,0.0000,4.9724,bckg
1,4.9724,36.3315,fnsz
2,36.3315,41.8066,bckg
3,41.8066,88.4144,fnsz
4,88.4144,92.0718,bckg
...,...,...,...
100,2140.1160,2192.0442,bckg
101,2192.0442,2222.0110,fnsz
102,2222.0110,2294.9724,bckg
103,2294.9724,2346.5856,fnsz


## Putting everything together

In [147]:
files_raw_small_df = (
  files_raw_df[
    (files_raw_df['file_name'].isin(channels_small_df.index)) & \
    (files_raw_df['file_ext'] == 'edf')
  ]
  .drop('file_ext', axis=1)
  .set_index('file_name')
)
files_raw_small_df

Unnamed: 0_level_0,filename,category,configuration,index,patient_#,session
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00010563_s002_t003,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17
00010563_s002_t004,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17
00010563_s002_t005,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17
00010563_s003_t000,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t001,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t002,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t003,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t005,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t006,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17
00010563_s003_t007,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17


In [155]:
files_raw_channels_small_df = files_raw_small_df.join(channels_small_df)

In [156]:
files_raw_channels_small_df

Unnamed: 0_level_0,filename,category,configuration,index,patient_#,session,EEG FP1-REF,EEG FP2-REF,EEG F3-REF,EEG F4-REF,...,EEG F8-REF,EEG T3-REF,EEG T4-REF,EEG T5-REF,EEG T6-REF,EEG FZ-REF,EEG CZ-REF,EEG PZ-REF,EEG A1-REF,EEG A2-REF
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00010563_s002_t003,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[2.7726555877600295e-10, 1.3483732842699443e-0...","[-3.906051578001337e-10, 2.4807970640912983e-0...","[-1.9284389162122937e-10, 1.0694757868255472e-...","[-5.096480914428079e-11, 4.047438658809519e-06...",...,"[-6.779612036502368e-10, 1.01314964825149e-06,...","[4.146017629996594e-09, -2.0413263864026096e-0...","[1.444056012902292e-09, 3.817044838007959e-07,...","[-2.910958452221951e-09, -3.250484807099009e-0...","[1.8423372397872572e-09, -7.1615811255855e-07,...","[-1.364304852525069e-10, 3.5260483039175664e-0...","[1.3531881762796245e-10, 6.983863806835304e-07...","[-2.3495119248263287e-10, -7.617383775562379e-...","[9.596059492877374e-10, -1.1888197912083615e-0...","[1.0519972902783143e-11, -2.1400490121479406e-..."
00010563_s002_t004,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[-1.0278586830940462e-08, -1.7158036103551026e...","[2.7648617251111243e-09, -2.8490217427264524e-...","[3.288941204731503e-09, 4.617119704569293e-07,...","[3.399964489573073e-09, -5.345430061192608e-06...",...,"[-3.791814708396617e-09, -2.3557223077320197e-...","[8.658498750608017e-09, 1.7031133962228388e-07...","[1.780158122957924e-09, -2.152228442797806e-06...","[-4.916698865387427e-10, -2.9856703770832174e-...","[-6.258704822517387e-09, -1.8593219919890405e-...","[9.815260117792147e-10, -2.4137912796958633e-0...","[-2.357383015178091e-10, -5.355394184505534e-0...","[-1.0067458116184742e-09, -1.6493666048317114e...","[-4.7017904553916475e-09, -1.2061141044168896e...","[-1.1048988390260309e-08, -1.0048047870747276e..."
00010563_s002_t005,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[3.7111311706420487e-09, -1.0474257222754713e-...","[7.219823200366963e-09, -3.1951117881894467e-0...","[2.1782993270312925e-09, -8.533492755460114e-0...","[-1.365985882998853e-09, 7.092433104769807e-06...",...,"[3.154214444439315e-09, 1.3830401655190407e-06...","[-8.948886072381209e-10, -7.939269490082801e-0...","[3.07454363042913e-09, 1.4140713109054442e-06,...","[6.674844103884688e-09, -4.037450817687943e-06...","[-3.61287734508141e-09, 6.247470115147276e-06,...","[-2.062432726832931e-09, 5.42573298081137e-07,...","[-8.346167589202069e-10, -3.2631357231385514e-...","[2.2304470728923623e-10, 3.7421245926520404e-0...","[1.4396886880297788e-09, -7.661145779436604e-0...","[6.327147966320168e-10, 1.1833053986962973e-05..."
00010563_s003_t000,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-4.837242318016071e-09, -1.9639659553544583e-...","[-1.7168631924658783e-09, 1.0420288689012175e-...","[-2.8357483900477646e-09, 1.3444340192373916e-...","[-4.3997702726288667e-10, -4.612748503245249e-...",...,"[-1.381539345352619e-09, -3.90994710010459e-06...","[-1.140736912419709e-09, 7.245232568610588e-07...","[2.6767669024911234e-09, 1.6102607124345142e-0...","[4.665380888551219e-09, 9.506710232528226e-07,...","[2.351093927809803e-09, 2.4800401575907515e-06...","[-1.0594241716063995e-10, -8.909057146788971e-...","[-4.790726036099662e-10, -1.4461463449301e-06,...","[4.02520870030525e-10, 1.1526165134603241e-08,...","[-5.771001920778108e-09, 3.624165205087055e-08...","[4.244714350965895e-09, -2.003986424867263e-06..."
00010563_s003_t001,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-5.683698910306182e-08, 2.654920991845486e-06...","[-3.337798055224077e-08, 2.8095137800880105e-0...","[3.7954605921399086e-08, 1.4489212326297262e-0...","[-1.2525189500901834e-08, 3.12269697219255e-06...",...,"[-4.327304264203067e-08, -2.5217132311470646e-...","[8.124202650339955e-08, 5.883211872988577e-07,...","[-4.9073445727403074e-08, 1.3883104676019777e-...","[1.9867273141848352e-08, 1.6805965603429214e-0...","[-6.673399352104671e-08, -1.2191897638578423e-...","[3.058758964062181e-09, 1.9130796523937236e-06...","[-1.39453577867126e-09, 1.223609540591494e-06,...","[-9.876131746069375e-09, 1.3391255464096026e-0...","[7.511533652103386e-08, -4.399158557750114e-06...","[-7.913696451822553e-08, -1.2159126319610358e-..."
00010563_s003_t002,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-4.613358915999018e-10, -5.243869036036797e-0...","[-4.1246462276526514e-10, -9.543901320175684e-...","[-5.484526684402299e-10, -2.1174855547093227e-...","[-3.5085980731332755e-10, -7.3871225385397745e...",...,"[-3.409403487688364e-10, -6.641916749552722e-0...","[-2.471222288174396e-10, -1.5998212353519763e-...","[3.560273512375617e-10, -5.359288920287434e-06...","[-6.567244381443758e-10, 2.879367831613315e-06...","[1.510845125331402e-10, 2.0258697505688547e-06...","[-8.613313612695906e-11, -5.150451641934368e-0...","[9.799790565080853e-11, -4.0385994802705475e-0...","[-2.1126261850481188e-11, 2.457331504765216e-0...","[4.1481001466082793e-10, -9.948579663495728e-0...","[8.651925698029443e-10, 9.84267632527315e-07, ..."
00010563_s003_t003,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-3.1030153232894724e-09, 2.6443218951453036e-...","[-8.971146409032446e-10, 1.1031042093479806e-0...","[5.2315126296753265e-11, 7.656501271459317e-07...","[-2.652004470779339e-10, -2.9984080510449806e-...",...,"[-2.6741713828135756e-10, 1.1047148881648794e-...","[1.3567402272723733e-10, 1.2025867222383958e-0...","[-5.777699439523222e-10, 4.932800750813714e-07...","[6.369467279348021e-11, 1.4661917689292248e-06...","[-6.289645788535529e-10, 8.893596887272733e-09...","[-1.421038842592268e-10, 8.620853763148133e-07...","[-1.9155916188028957e-10, 1.0979258115392094e-...","[-3.289868598740529e-10, 3.2878389130604496e-0...","[-5.085247983235884e-10, 1.7416438684886157e-0...","[-4.6283627546002407e-10, -2.4509360997565706e..."
00010563_s003_t005,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-1.5638016623179072e-09, 2.5693829738035775e-...","[-6.282785829763435e-10, 1.3231054135216873e-0...","[-1.2295303642273871e-09, -7.797051892109223e-...","[3.14323865033923e-10, 3.397631854061873e-06, ...",...,"[8.203941375688234e-10, 7.724395611754991e-07,...","[5.306540093799605e-10, -2.992699653322829e-06...","[1.1852913655373385e-09, -6.634447350287491e-0...","[-9.570734073316434e-11, -2.0605144650521068e-...","[-2.861695760260764e-10, -9.603862578819737e-0...","[4.5248088755976866e-10, 1.947185483198898e-06...","[7.810247989331348e-10, 2.440118159558692e-06,...","[2.356757986622703e-10, -1.1219438983667394e-0...","[-1.182091692651481e-10, -1.8698989032631304e-...","[-6.857753535389122e-10, 5.091381682199744e-07..."
00010563_s003_t006,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-6.495787518410752e-09, -7.356208953377029e-0...","[-1.0568681881728678e-08, 5.467890300245419e-0...","[-8.309821840474762e-09, 6.1922162598058515e-0...","[1.327336900887148e-08, -2.5080112879921364e-0...",...,"[-1.7739357684241724e-08, 1.661927726339644e-0...","[-2.576003418465825e-09, -6.911241103431908e-0...","[-3.5655980368991606e-09, -5.055351012705168e-...","[-9.678191348083156e-09, -6.402695204906061e-0...","[1.0491993458324128e-08, -1.0338859821020052e-...","[3.596380080829031e-09, -6.27598042190159e-06,...","[2.3427878116237836e-09, -9.985513967624007e-0...","[7.797417129399957e-10, -5.386643567678042e-07...","[2.3756813219638975e-09, 1.0324806520045635e-0...","[-7.589743523939152e-10, -2.1477608529645205e-..."
00010563_s003_t007,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-1.0278586830940462e-08, -1.7158036103551026e...","[2.7648617251111243e-09, -2.8490217427264524e-...","[3.288941204731503e-09, 4.617119704569293e-07,...","[3.399964489573073e-09, -5.345430061192608e-06...",...,"[-3.791814708396617e-09, -2.3557223077320197e-...","[8.658498750608017e-09, 1.7031133962228388e-07...","[1.780158122957924e-09, -2.152228442797806e-06...","[-4.916698865387427e-10, -2.9856703770832174e-...","[-6.258704822517387e-09, -1.8593219919890405e-...","[9.815260117792147e-10, -2.4137912796958633e-0...","[-2.357383015178091e-10, -5.355394184505534e-0...","[-1.0067458116184742e-09, -1.6493666048317114e...","[-4.7017904553916475e-09, -1.2061141044168896e...","[-1.1048988390260309e-08, -1.0048047870747276e..."


In [157]:
files_raw_channels_small_df['labels'] = (
    files_raw_channels_small_df
      .index
      .map(
          lambda x: tse_dict[x] if x in tse_dict else np.NaN
      )
)

In [160]:
## your grive folder

working_folder

PosixPath('/content/drive/MyDrive/Classes/22S-PHYS-667/project/temp_data')

In [159]:
#saving

files_raw_channels_small_df.to_pickle(
    working_folder.joinpath('files_raw_channels_small_df.pkl')
)

In [161]:
#reading

files_raw_channels_small_df = pd.read_pickle(
    working_folder.joinpath('files_raw_channels_small_df.pkl')
)

In [162]:
files_raw_channels_small_df

Unnamed: 0_level_0,filename,category,configuration,index,patient_#,session,EEG FP1-REF,EEG FP2-REF,EEG F3-REF,EEG F4-REF,...,EEG T3-REF,EEG T4-REF,EEG T5-REF,EEG T6-REF,EEG FZ-REF,EEG CZ-REF,EEG PZ-REF,EEG A1-REF,EEG A2-REF,labels
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00010563_s002_t003,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[2.7726555877600295e-10, 1.3483732842699443e-0...","[-3.906051578001337e-10, 2.4807970640912983e-0...","[-1.9284389162122937e-10, 1.0694757868255472e-...","[-5.096480914428079e-11, 4.047438658809519e-06...",...,"[4.146017629996594e-09, -2.0413263864026096e-0...","[1.444056012902292e-09, 3.817044838007959e-07,...","[-2.910958452221951e-09, -3.250484807099009e-0...","[1.8423372397872572e-09, -7.1615811255855e-07,...","[-1.364304852525069e-10, 3.5260483039175664e-0...","[1.3531881762796245e-10, 6.983863806835304e-07...","[-2.3495119248263287e-10, -7.617383775562379e-...","[9.596059492877374e-10, -1.1888197912083615e-0...","[1.0519972902783143e-11, -2.1400490121479406e-...","[[0.0, 601.0, bckg]]"
00010563_s002_t004,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[-1.0278586830940462e-08, -1.7158036103551026e...","[2.7648617251111243e-09, -2.8490217427264524e-...","[3.288941204731503e-09, 4.617119704569293e-07,...","[3.399964489573073e-09, -5.345430061192608e-06...",...,"[8.658498750608017e-09, 1.7031133962228388e-07...","[1.780158122957924e-09, -2.152228442797806e-06...","[-4.916698865387427e-10, -2.9856703770832174e-...","[-6.258704822517387e-09, -1.8593219919890405e-...","[9.815260117792147e-10, -2.4137912796958633e-0...","[-2.357383015178091e-10, -5.355394184505534e-0...","[-1.0067458116184742e-09, -1.6493666048317114e...","[-4.7017904553916475e-09, -1.2061141044168896e...","[-1.1048988390260309e-08, -1.0048047870747276e...","[[0.0, 300.0, bckg]]"
00010563_s002_t005,train__01_tcp_ar__105__00010563__s002_2013_07_...,train,01_tcp_ar,105,10563,s002_2013_07_17,"[3.7111311706420487e-09, -1.0474257222754713e-...","[7.219823200366963e-09, -3.1951117881894467e-0...","[2.1782993270312925e-09, -8.533492755460114e-0...","[-1.365985882998853e-09, 7.092433104769807e-06...",...,"[-8.948886072381209e-10, -7.939269490082801e-0...","[3.07454363042913e-09, 1.4140713109054442e-06,...","[6.674844103884688e-09, -4.037450817687943e-06...","[-3.61287734508141e-09, 6.247470115147276e-06,...","[-2.062432726832931e-09, 5.42573298081137e-07,...","[-8.346167589202069e-10, -3.2631357231385514e-...","[2.2304470728923623e-10, 3.7421245926520404e-0...","[1.4396886880297788e-09, -7.661145779436604e-0...","[6.327147966320168e-10, 1.1833053986962973e-05...","[[0.0, 601.0, bckg]]"
00010563_s003_t000,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-4.837242318016071e-09, -1.9639659553544583e-...","[-1.7168631924658783e-09, 1.0420288689012175e-...","[-2.8357483900477646e-09, 1.3444340192373916e-...","[-4.3997702726288667e-10, -4.612748503245249e-...",...,"[-1.140736912419709e-09, 7.245232568610588e-07...","[2.6767669024911234e-09, 1.6102607124345142e-0...","[4.665380888551219e-09, 9.506710232528226e-07,...","[2.351093927809803e-09, 2.4800401575907515e-06...","[-1.0594241716063995e-10, -8.909057146788971e-...","[-4.790726036099662e-10, -1.4461463449301e-06,...","[4.02520870030525e-10, 1.1526165134603241e-08,...","[-5.771001920778108e-09, 3.624165205087055e-08...","[4.244714350965895e-09, -2.003986424867263e-06...","[[0.0, 300.0, bckg]]"
00010563_s003_t001,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-5.683698910306182e-08, 2.654920991845486e-06...","[-3.337798055224077e-08, 2.8095137800880105e-0...","[3.7954605921399086e-08, 1.4489212326297262e-0...","[-1.2525189500901834e-08, 3.12269697219255e-06...",...,"[8.124202650339955e-08, 5.883211872988577e-07,...","[-4.9073445727403074e-08, 1.3883104676019777e-...","[1.9867273141848352e-08, 1.6805965603429214e-0...","[-6.673399352104671e-08, -1.2191897638578423e-...","[3.058758964062181e-09, 1.9130796523937236e-06...","[-1.39453577867126e-09, 1.223609540591494e-06,...","[-9.876131746069375e-09, 1.3391255464096026e-0...","[7.511533652103386e-08, -4.399158557750114e-06...","[-7.913696451822553e-08, -1.2159126319610358e-...","[[0.0, 30.0, bckg]]"
00010563_s003_t002,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-4.613358915999018e-10, -5.243869036036797e-0...","[-4.1246462276526514e-10, -9.543901320175684e-...","[-5.484526684402299e-10, -2.1174855547093227e-...","[-3.5085980731332755e-10, -7.3871225385397745e...",...,"[-2.471222288174396e-10, -1.5998212353519763e-...","[3.560273512375617e-10, -5.359288920287434e-06...","[-6.567244381443758e-10, 2.879367831613315e-06...","[1.510845125331402e-10, 2.0258697505688547e-06...","[-8.613313612695906e-11, -5.150451641934368e-0...","[9.799790565080853e-11, -4.0385994802705475e-0...","[-2.1126261850481188e-11, 2.457331504765216e-0...","[4.1481001466082793e-10, -9.948579663495728e-0...","[8.651925698029443e-10, 9.84267632527315e-07, ...","[[0.0, 562.0, bckg]]"
00010563_s003_t003,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-3.1030153232894724e-09, 2.6443218951453036e-...","[-8.971146409032446e-10, 1.1031042093479806e-0...","[5.2315126296753265e-11, 7.656501271459317e-07...","[-2.652004470779339e-10, -2.9984080510449806e-...",...,"[1.3567402272723733e-10, 1.2025867222383958e-0...","[-5.777699439523222e-10, 4.932800750813714e-07...","[6.369467279348021e-11, 1.4661917689292248e-06...","[-6.289645788535529e-10, 8.893596887272733e-09...","[-1.421038842592268e-10, 8.620853763148133e-07...","[-1.9155916188028957e-10, 1.0979258115392094e-...","[-3.289868598740529e-10, 3.2878389130604496e-0...","[-5.085247983235884e-10, 1.7416438684886157e-0...","[-4.6283627546002407e-10, -2.4509360997565706e...","[[0.0, 601.0, bckg]]"
00010563_s003_t005,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-1.5638016623179072e-09, 2.5693829738035775e-...","[-6.282785829763435e-10, 1.3231054135216873e-0...","[-1.2295303642273871e-09, -7.797051892109223e-...","[3.14323865033923e-10, 3.397631854061873e-06, ...",...,"[5.306540093799605e-10, -2.992699653322829e-06...","[1.1852913655373385e-09, -6.634447350287491e-0...","[-9.570734073316434e-11, -2.0605144650521068e-...","[-2.861695760260764e-10, -9.603862578819737e-0...","[4.5248088755976866e-10, 1.947185483198898e-06...","[7.810247989331348e-10, 2.440118159558692e-06,...","[2.356757986622703e-10, -1.1219438983667394e-0...","[-1.182091692651481e-10, -1.8698989032631304e-...","[-6.857753535389122e-10, 5.091381682199744e-07...","[[0.0, 775.0, bckg]]"
00010563_s003_t006,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-6.495787518410752e-09, -7.356208953377029e-0...","[-1.0568681881728678e-08, 5.467890300245419e-0...","[-8.309821840474762e-09, 6.1922162598058515e-0...","[1.327336900887148e-08, -2.5080112879921364e-0...",...,"[-2.576003418465825e-09, -6.911241103431908e-0...","[-3.5655980368991606e-09, -5.055351012705168e-...","[-9.678191348083156e-09, -6.402695204906061e-0...","[1.0491993458324128e-08, -1.0338859821020052e-...","[3.596380080829031e-09, -6.27598042190159e-06,...","[2.3427878116237836e-09, -9.985513967624007e-0...","[7.797417129399957e-10, -5.386643567678042e-07...","[2.3756813219638975e-09, 1.0324806520045635e-0...","[-7.589743523939152e-10, -2.1477608529645205e-...","[[0.0, 601.0, bckg]]"
00010563_s003_t007,train__01_tcp_ar__105__00010563__s003_2013_07_...,train,01_tcp_ar,105,10563,s003_2013_07_17,"[-1.0278586830940462e-08, -1.7158036103551026e...","[2.7648617251111243e-09, -2.8490217427264524e-...","[3.288941204731503e-09, 4.617119704569293e-07,...","[3.399964489573073e-09, -5.345430061192608e-06...",...,"[8.658498750608017e-09, 1.7031133962228388e-07...","[1.780158122957924e-09, -2.152228442797806e-06...","[-4.916698865387427e-10, -2.9856703770832174e-...","[-6.258704822517387e-09, -1.8593219919890405e-...","[9.815260117792147e-10, -2.4137912796958633e-0...","[-2.357383015178091e-10, -5.355394184505534e-0...","[-1.0067458116184742e-09, -1.6493666048317114e...","[-4.7017904553916475e-09, -1.2061141044168896e...","[-1.1048988390260309e-08, -1.0048047870747276e...","[[0.0, 300.0, bckg]]"


In [163]:
files_raw_channels_small_df.columns

Index(['filename', 'category', 'configuration', 'index', 'patient_#',
       'session', 'EEG FP1-REF', 'EEG FP2-REF', 'EEG F3-REF', 'EEG F4-REF',
       'EEG C3-REF', 'EEG C4-REF', 'EEG P3-REF', 'EEG P4-REF', 'EEG O1-REF',
       'EEG O2-REF', 'EEG F7-REF', 'EEG F8-REF', 'EEG T3-REF', 'EEG T4-REF',
       'EEG T5-REF', 'EEG T6-REF', 'EEG FZ-REF', 'EEG CZ-REF', 'EEG PZ-REF',
       'EEG A1-REF', 'EEG A2-REF', 'labels'],
      dtype='object')

In [164]:
files_raw_channels_small_df.at['00010563_s003_t007', 'EEG FP1-REF']

array([-1.02785868e-08, -1.71580361e-06, -2.19807984e-06, ...,
       -3.40245002e-06, -3.05912752e-06, -8.48780747e-07])

In [167]:
files_raw_df['file_name'].drop_duplicates()

0       00005804_s002_t003
1       00005804_s002_t004
2       00005804_s002_t005
3       00005804_s002_t006
4       00006083_s003_t001
               ...        
6995    00009578_s033_t009
6996    00009578_s033_t010
6997    00009578_s033_t011
6998    00009578_s033_t012
6999    00009578_s034_t000
Name: file_name, Length: 6635, dtype: object