In [1]:
import warnings
warnings.simplefilter('ignore')
from pyedflib import EdfReader
from matplotlib import pyplot as plt
from tsfresh.feature_extraction import extract_features, MinimalFCParameters
import pandas as pd
import numpy as np

In [2]:
WINDOW_LENGTH=20             #Длина скользящего окна в секундах(нечётное значение)
PATH='data/'
CHANNELS_LIST='channels.xlsx'

In [4]:
class EmgFCPParameters(MinimalFCParameters):
    """
    Класс настройки списка извлекаемых признаков
    """
    def __init__(self):
        MinimalFCParameters.__init__(self)
        
        to_del=['length']
        for param in to_del:
            del self[param]

In [5]:
def movie_window(file,chnum,wsec=WINDOW_LENGTH):
    """
    Функция скользящего окна заданной длительности, на выходе получаем numpy.array,
    содержащий оконные срезы для каждой секунды
    """
    frq=file.getSampleFrequency(chnum)
    record=file.readSignal(chnum)
    rlen=file.getNSamples()[chnum]
    
    rsec=rlen//frq
    record_windows=np.zeros((rsec-(wsec//2)*2,wsec*frq))
    
    for i,row in enumerate(record_windows):
        record_windows[i]=record[i*frq:(i+wsec)*frq]
    
    return record_windows

In [6]:
def to_tsfresh_DataFrame(record_windows,wsec=WINDOW_LENGTH):
    """
    Функция формирующая DataFrame для извлечения признаков с помощью tsfresh,
    из numpy.array поллученного movie_window
    """
    df=pd.DataFrame(record_windows)
    df=df.stack(level=0).reset_index()
    df.columns=['wtime_sec','samples','values']
    df['wtime_sec']=df['wtime_sec']+WINDOW_LENGTH//2+1
    return df

In [7]:
def get_params(records,params=EmgFCPParameters()):
    """
    Функция извлечения признаков из DataFrame, полученного с помощью to_tsfresh_DataFrame
    """
    return extract_features(records,
                            default_fc_parameters=params,
                            column_id='wtime_sec',
                            column_sort="samples")

In [8]:
def movie_window_params(filename,chnum,path=PATH,wsec=WINDOW_LENGTH):
    """
    """
    EDFData=EdfReader(path+filename)
    record_windows=movie_window(EDFData,chnum,wsec=WINDOW_LENGTH)
    dataf=to_tsfresh_DataFrame(record_windows)
    params=get_params(dataf)
    params['File']=filename
    params['Channel']=chnum
    EDFData._close()
    return params

In [9]:
def multifiles_params(channels=CHANNELS_LIST,path=PATH):
    channels_df=pd.read_excel(path+channels)
    params_df_list=[]
    
    for i,row in channels_df.iterrows():
        print('File '+str(row['File'])+', Channel '+str(row['Channel']))
        %%time
        params_df_list.append(movie_window_params(str(row['File'])+'.edf',row['Channel']))
    
    params_df = pd.concat(params_df_list).reset_index()
    params_df.index.names=['Index']
    params_df.to_csv(PATH+'channels'+'_params.csv')
    
    return params_df

In [10]:
params_df_list=multifiles_params()

File 1, Channel 19
CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 5.96 µs


Feature Extraction: 100%|██████████| 10/10 [00:06<00:00,  1.48it/s]


File 2, Channel 7
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 3.81 µs


Feature Extraction: 100%|██████████| 10/10 [00:07<00:00,  1.29it/s]


File 2, Channel 9
CPU times: user 1e+03 ns, sys: 1 µs, total: 2 µs
Wall time: 4.29 µs


Feature Extraction: 100%|██████████| 10/10 [00:07<00:00,  1.30it/s]


File 3, Channel 7
CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 3.81 µs


Feature Extraction: 100%|██████████| 10/10 [00:06<00:00,  1.54it/s]


File 3, Channel 8
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 4.05 µs


Feature Extraction: 100%|██████████| 10/10 [00:06<00:00,  1.49it/s]


File 3, Channel 9
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs


Feature Extraction: 100%|██████████| 10/10 [00:06<00:00,  1.54it/s]


File 4, Channel 15
CPU times: user 1 µs, sys: 0 ns, total: 1 µs
Wall time: 4.05 µs


Feature Extraction: 100%|██████████| 10/10 [00:07<00:00,  1.31it/s]


In [None]:
len(params_df_list)

In [None]:
params_df_list[0].head()

In [None]:
result = pd.concat(params_df_list).reset_index()

In [None]:
result.to_csv(PATH+'channels'+'_params.csv')

In [None]:
result.index.names=['Index']