In [2]:
import pandas as pd
import mat73
from datetime import datetime,timedelta
import numpy as np
from tqdm import tqdm
from utils import *
import h5py

In [3]:
df_brandon = pd.read_csv('work_tables/EEGs_And_Reports_20231024.csv')

In [4]:
df = pd.read_excel('tables/batch1.xlsx')
df = df.rename({'file':'HashFolderName'},axis=1)
df = df.rename({'time':'eventTime'},axis=1)
df=df.merge(df_brandon[['HashFolderName','SiteID','SessionID_new','EEGFolder']],on='HashFolderName')

# generate event time relative to event start
df['startTime'] = df.HashFolderName.apply(get_time_from_filename)
df['relativeTime'] = ((df.eventTime-df.startTime).dt.total_seconds()).astype(int)

In [5]:
# df[['HashFolderName', 'class', 'annotation', 'eventTime', 'startTime', 'relativeTime']].to_excel('tables_with_relative_time/batch1.xlsx',index=False)

In [6]:
# process dataframe
root = 'bdsp/opendata/EEG/data/'
windowsize = 15
desired_channels= ['Fp1','F3','C3','P3','F7','T3','T5','O1','Fz','Cz','Pz','Fp2','F4','C4','P4','F8','T4','T6','O2']
                  
hf = h5py.File('SauronData.h5', 'w')

In [7]:
def process_EEG_file(root,SiteID,EEGFolder,HashFolderName,eeg_start_time,next=True):
    # generate path to file and filename
    path = SiteID+'/'+EEGFolder+'/'+HashFolderName+'/'
    index=0
    # insert 0 to create Hashfoldername_0_date_time
    if next:
        eeg_start_time = eeg_start_time+timedelta(hours=12)
        index = 1
    eeg_start_time_str = datetime.strftime(eeg_start_time,'%Y%m%d_%H%M%S')
    filename = '_'.join([HashFolderName.split('_')[0],str(index),eeg_start_time_str])+'.mat'
    
    # load signal and data
    signal = mat73.loadmat(root+path+filename)
    data=signal['data']
    Fs=signal['Fs']
    channels=signal['channels']

    return data,Fs,channels

In [8]:
# process EEG file
n=0
for HashFolderName in tqdm(df.HashFolderName.unique()[20:100]):
    # get all events from same hashfolder
    sub_df = df[df.HashFolderName==HashFolderName]
    sub_df = sub_df.sort_values(by='relativeTime')
    # get knowledge about file location
    SiteID,EEGFolder,HashFolderName = sub_df.iloc[0][['SiteID','EEGFolder','HashFolderName']].to_list()
    # load file and metadata
    eeg_start_time = sub_df.iloc[0].startTime
    data,Fs,available_channels = process_EEG_file(root,SiteID,EEGFolder,HashFolderName,eeg_start_time,next=False)
    if Fs!=200:
        print('FS is not 200!!!!')
    # save all events from the eeg file
    for j in tqdm(range(len(sub_df)),leave=False):
        n+=1
        # get time relative to beginning of file 
        eeg_start_time = sub_df.iloc[j].startTime
        eventTime = sub_df.iloc[j].eventTime 
        relativeTime = int((eventTime-eeg_start_time).total_seconds())
        if relativeTime > 1e6:
            continue
        # load next file if relative event time > 12h, since engineers broke up the files!
        if relativeTime > data.shape[1]/Fs:
            print(f'data shape in seconds {data.shape[1]/200}')
            print(f'relative time in seconds {relativeTime}')
            data,Fs,available_channels = process_EEG_file(root,SiteID,EEGFolder,HashFolderName,eeg_start_time,next=True)
        # get snippet from eeg file and save it somewhere
        snippet = get_right_event_time(data=data,
                                    relative_event_time=relativeTime,
                                    windowsize=windowsize,
                                    Fs=Fs)
        snippet = get_right_event_channels(data=snippet,
                                        desired_channels=desired_channels,
                                        available_channels=available_channels)
        name = HashFolderName.split('_')[0]+'_'+datetime.strftime(eventTime,'%Y%m%d_%H%M%S')
        if name in hf:
            continue
        # save in h5py file
        hf.create_dataset(name=name,data=snippet,dtype='f4',compression='gzip')
        np.save('SauronData/'+name,snippet)


  0%|          | 0/80 [00:00<?, ?it/s]

  9%|▉         | 7/80 [00:17<02:58,  2.45s/it]

data shape in seconds 43200.0
relative time in seconds 68197




data shape in seconds 42513.58
relative time in seconds 68197




data shape in seconds 42513.58
relative time in seconds 68197




data shape in seconds 42513.58
relative time in seconds 68197




data shape in seconds 42513.58
relative time in seconds 68203




data shape in seconds 42513.58
relative time in seconds 68203




data shape in seconds 42513.58
relative time in seconds 68217




data shape in seconds 42513.58
relative time in seconds 68217




data shape in seconds 42513.58
relative time in seconds 68221




data shape in seconds 42513.58
relative time in seconds 68221




data shape in seconds 42513.58
relative time in seconds 68227




data shape in seconds 42513.58
relative time in seconds 68227




data shape in seconds 42513.58
relative time in seconds 68229




data shape in seconds 42513.58
relative time in seconds 68229




data shape in seconds 42513.58
relative time in seconds 68233




data shape in seconds 42513.58
relative time in seconds 68233




data shape in seconds 42513.58
relative time in seconds 68235




data shape in seconds 42513.58
relative time in seconds 68235




data shape in seconds 42513.58
relative time in seconds 68237




data shape in seconds 42513.58
relative time in seconds 68237




data shape in seconds 42513.58
relative time in seconds 68240




data shape in seconds 42513.58
relative time in seconds 68240




data shape in seconds 42513.58
relative time in seconds 68242




data shape in seconds 42513.58
relative time in seconds 68242




data shape in seconds 42513.58
relative time in seconds 68251




data shape in seconds 42513.58
relative time in seconds 68251




data shape in seconds 42513.58
relative time in seconds 68253




data shape in seconds 42513.58
relative time in seconds 68253




data shape in seconds 42513.58
relative time in seconds 68254




data shape in seconds 42513.58
relative time in seconds 68254




data shape in seconds 42513.58
relative time in seconds 68260




data shape in seconds 42513.58
relative time in seconds 68260




data shape in seconds 42513.58
relative time in seconds 68266




data shape in seconds 42513.58
relative time in seconds 68266




data shape in seconds 42513.58
relative time in seconds 68267




data shape in seconds 42513.58
relative time in seconds 68267




data shape in seconds 42513.58
relative time in seconds 68269


  9%|▉         | 7/80 [07:15<1:15:45, 62.27s/it]


KeyboardInterrupt: 

In [50]:
24648.625/3600

6.846840277777778

In [35]:

def get_right_event_time(data,relative_event_time,windowsize,Fs):
    # takes a piece of signal of shape [channel, ts], the timestamp of the event and the windowsize
    # returns the event pm windowsize, shape [channel, windowsize]
    
    snippet_start = int((relative_event_time-windowsize/2)*Fs)
    snippet_end = int((relative_event_time+windowsize/2)*Fs)
    return data[:,snippet_start:snippet_end]

In [24]:
g = h5py.File('SauronData.h5','r')

In [26]:
g[name]

<HDF5 dataset "000876e332515afc590efb5235e6970af4b9e12b564d60dd424c15c362fd5888_20100315_092048": shape (20, 3000), type "<f4">