In [22]:
import pywt
import math
import numpy as np
import pandas as pd
from pyentrp import entropy
from tqdm.notebook import tqdm
from scipy.signal import welch
from scipy.integrate import simps
from scipy.stats import skew, kurtosis, variation
from sklearn.utils import shuffle

In [23]:
from ipynb.fs.full.Preprocessing import removeNonNumericValues, dimentionalityReduction, featureNormalization, undersamplingClusterCentroids, oversamplingSMOTE

TIme Domain Features

In [24]:
def computeTimeDomainFeatures (x):
    mean = np.mean(x)
    var = np.var(x)
    sk = skew(x)
    kurt = kurtosis(x)
    std = np.std(x)
    median = np.median(x)
    zcr = ((x[:-1] * x[1:]) < 0).sum() / len(x)
    if x.mean() != 0:
        cv = variation(x)
    else:
        cv = math.nan
    if x.size > 0:
        rms = np.sqrt(x.dot(x)/x.size)
    else:
        rms = math.nan
    p2p = x.max() - x.min()
    sampEn = entropy.sample_entropy(x, 1)[0]
    return mean, var, sk, kurt, std, median, zcr, cv, rms, p2p, sampEn

Spectral Features

In [25]:
'''
Compute the average bandpower of an EEG signal
https://raphaelvallat.com/bandpower.html
'''

def psd (x, fs, win):
    bands = [0.5, 4, 8, 12, 30, 100]
    freqs, psd = welch(x, fs, nperseg = win)
    avg_power=[]
    while len(bands)>1:
        idx = np.logical_and(freqs >= bands[0], freqs <= bands[1])
        power_simps = simps(psd[idx], dx=bands[1]-bands[0])
        avg_power.append(power_simps)
        bands = np.copy(bands[1:])
    for p in avg_power:
        yield p

Correlation Features

In [26]:
def compute_correlation (left, right):
    return abs(np.correlate(left, right, 'full')).max()

Feature Extraction

In [27]:
def feature_extraction (df, sample_rate, step, pca_tolerance, undersampling_rate, oversampling_neighbors):
    
    print('Feature Extraction')
    ft = pd.DataFrame()
    c = 0
    for i in tqdm(range (0, df.shape[0], step)):
        temp = df.iloc[i:i+step]
        for j in range(0, df.shape[1]-1):
            s = np.array(temp.iloc[:, j])

            # Time Domain Features
            ft.loc[c, 'mean'+str(j)], ft.loc[c, 'var'+str(j)], ft.loc[c, 'skew'+str(j)],ft.loc[c, 'kurt'+str(j)], ft.loc[c, 'std'+str(j)], ft.loc[c, 'median'+str(j)], ft.loc[c, 'zcr'+str(j)], ft.loc[c, 'cv'+str(j)], ft.loc[c, 'rms'+str(j)], ft.loc[c, 'p2p'+str(j)],ft.loc[c, 'sampEn'+str(j)] = computeTimeDomainFeatures(s)

            # Frequency Domain Features
            ft.loc[c, 'deltaPower'+str(j)], ft.loc[c, 'thetaPower'+str(j)], ft.loc[c, 'alphaPower'+str(j)], ft.loc[c, 'betaPower'+str(j)], ft.loc[c, 'gammaPower'+str(j)] = psd(s, sample_rate, s.shape[0])

        ft.loc[c, 'seizure'] = temp['seizure'].value_counts().idxmax()
        c = c + 1

    removeNonNumericValues(ft)

    ft = featureNormalization(ft)
    print('Normalized features')

    removeNonNumericValues(ft)

    size = ft.shape
    print('Reducing features dimension')
    ft = dimentionalityReduction(ft, pca_tolerance)
    removeNonNumericValues(ft)
    print('Dimensions reduced from', size, 'to', ft.shape)
    size = ft.seizure.value_counts()

    print('Undersampling the majority class using Cluster Centroid Method')
    ft = undersamplingClusterCentroids(ft.loc[:, ft.columns != 'seizure'], ft['seizure'], undersampling_rate)
    removeNonNumericValues(ft)
    print('Majority class downsampled from (', size[0], ', ', ft.shape[1], ') to ', ft.shape, sep = '')
    
    size = ft.shape
    print('Oversampling the minority class using SMOTE')
    ft = oversamplingSMOTE(ft.loc[:, ft.columns != 'seizure'], ft['seizure'], oversampling_neighbors)
    ft = shuffle(ft)
    ft.reset_index(drop = True, inplace = True)
    removeNonNumericValues(ft)
    print('Minority class upsampled from (', size[0], ', ', ft.shape[1], ') to ', ft.shape, sep='')
    
    print('Writing features to a csv file\n')
    ft.to_csv('Features.csv', index = False)

    return ft