In [30]:
# Imports

import numpy as np
import pandas as pd
import math
from antropy import entropy
import scipy
import scipy.signal
from envelope_derivative_operator.energy_operators import edo
from scipy.signal import periodogram, welch
from math import factorial, log
from sklearn.preprocessing import StandardScaler

In [31]:
# Install necessary git repositories and pip installations

!git clone https://github.com/ktapani/Neonatal_Seizure_Detection.git
!git clone https://github.com/raphaelvallat/antropy.git
!git clone https://github.com/otoolej/envelope_derivative_operator.git
!pip install numba


fatal: destination path 'Neonatal_Seizure_Detection' already exists and is not an empty directory.
fatal: destination path 'antropy' already exists and is not an empty directory.
fatal: destination path 'envelope_derivative_operator' already exists and is not an empty directory.


Defaulting to user installation because normal site-packages is not writeable


In [32]:
%%time
# Read your respective window lengths file

df = pd.read_csv('filtered_babydf8sec.csv')

Wall time: 1min 6s


In [33]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376
0,0.232631,0.039205,0.098541,0.079062,0.083939,0.090445,0.110909,0.107838,0.137610,0.171921,...,0.234945,0.261150,0.315332,0.332926,0.366597,0.366889,0.397063,0.412831,0.448537,0.0
1,0.539992,0.460765,0.553275,0.487640,0.442014,0.517044,0.593288,0.578477,0.573702,0.583565,...,0.672798,0.744710,0.813363,0.846694,0.876487,0.868818,0.833288,0.735268,0.639516,0.0
2,0.362426,0.397163,0.397632,0.458057,0.454505,0.446639,0.453360,0.461071,0.437925,0.427263,...,0.661528,0.704895,0.758532,0.772183,0.788902,0.758788,0.732208,0.665439,0.603165,0.0
3,0.415197,0.490017,0.218434,0.499493,0.399304,0.286767,0.460972,0.713644,0.743057,1.000000,...,0.798678,0.834783,0.883986,0.898679,0.918330,0.884407,0.851774,0.787249,0.736760,0.0
4,0.476225,0.479276,0.606380,0.533275,0.570582,0.685238,0.803167,0.762588,0.728798,0.963084,...,0.721789,0.695447,0.686879,0.665844,0.655183,0.638625,0.654217,0.638667,0.632710,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,0.488102,0.502134,0.588716,0.607865,0.718876,0.831261,0.752249,0.735296,0.740510,0.731361,...,0.784606,0.695721,0.687957,0.724864,0.722990,0.667013,0.587140,0.513907,0.461304,0.0
47872,0.539242,0.508276,0.531474,0.609622,0.610614,0.657697,0.654039,0.631143,0.622604,0.625237,...,0.470962,0.439785,0.402310,0.370513,0.344387,0.340144,0.355831,0.393229,0.440012,0.0
47873,0.448058,0.472335,0.507852,0.534972,0.740174,0.807509,0.844256,0.602954,0.703639,0.735538,...,0.510859,0.516461,0.510747,0.516636,0.511382,0.517539,0.512646,0.519028,0.514399,0.0
47874,0.489793,0.480871,0.490465,0.481622,0.491105,0.482328,0.491704,0.482982,0.492256,0.483578,...,0.531250,0.529430,0.531520,0.529182,0.532820,0.533967,0.537935,0.538004,0.542877,0.0


<ins>Features considered in this notebook:</ins>

Total number of features considered = 11

- Mean
- Standard Deviation
- Kurtosis
- Skewness
- Shannon Entropy
- Activity
- Mobility
- Complexity
- Permutation Entropy
- Sample Entropy
- Approximate Entropy

In [34]:
# Hyperparams

window_length = 256

In [35]:
%%time
class NeonatalSeizureFeatures:
    def __init__(self, row):
        self.row = row
        
    def skewness(self):
        row = np.array(self.row)
        row = row[:-1]
        row = np.reshape(row, (21, window_length))
        return (pd.Series(scipy.stats.skew(x, axis = 0, bias = False) for x in row))
    
    
df_new = df.apply(lambda row: NeonatalSeizureFeatures(row).skewness(), axis = 1)

df_new

Wall time: 2min 9s


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,2.088194,2.123238,3.120415,2.744761,2.108340,1.829132,4.429741,2.663853,1.620943,0.692711,...,0.018254,0.853528,0.643709,-0.225833,2.264196,1.101369,1.523516,2.150685,0.455329,-0.258997
1,-0.197330,0.316557,-0.134352,-0.540564,-0.175964,0.303900,0.204909,-0.404367,-0.226587,-0.421878,...,0.378744,-0.008765,-0.186909,0.240875,-0.090522,0.107907,0.302043,-0.206059,0.862391,-0.523474
2,0.825586,0.287372,1.285785,-0.174788,0.767077,0.129310,-0.135743,0.575171,0.134405,0.648657,...,0.097237,0.147250,0.158343,0.060428,0.302851,-0.040672,-0.025404,0.140460,0.860181,-0.330807
3,0.461669,0.141922,0.060863,0.253853,0.878535,0.223711,-0.084912,0.372252,0.427899,0.229068,...,-0.503395,0.189591,-0.060631,0.389600,0.093202,0.150008,-0.154486,0.255622,0.802439,-0.445344
4,-0.000342,-0.745698,-0.138074,-0.673309,-0.230322,-0.418107,-0.066131,-0.215620,-0.210343,0.315491,...,-1.132955,-0.205406,-0.852046,-0.650675,-0.790878,-0.874377,-0.892368,-1.163513,0.521628,-0.540062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,-0.102955,-0.128181,-0.006627,0.042244,-0.233012,0.304780,-0.573856,-0.422882,-0.059915,0.013442,...,-0.148186,0.160760,0.095981,-0.386310,-0.393453,0.008656,-0.139752,0.183041,-1.832759,0.934717
47872,-0.412135,-0.142941,-0.403287,-0.754478,0.405512,-0.065443,-0.843650,0.444632,-0.386131,0.125943,...,0.396117,-0.108831,-0.112750,0.402102,0.105658,0.063346,-0.276880,0.207832,-1.297717,0.223776
47873,0.238373,-0.072595,0.441496,-0.231579,-0.182844,-0.104527,-0.382736,0.214196,0.132875,-0.001261,...,0.021898,0.222613,0.118806,0.329106,-0.007343,0.123478,-0.192110,0.059401,-2.006171,-0.098951
47874,-0.114141,0.770778,0.661211,0.993502,0.771859,1.011869,0.863772,0.660351,0.992457,0.740917,...,0.654195,1.062996,0.975580,0.964626,1.186280,0.855589,0.822121,0.929133,-2.084958,-1.856468


### Helper Methods:

In [36]:
def hMob(x):
    row = np.array(x)
    return (np.sqrt(np.var(np.gradient(x)) / np.var(x)))

### Feature Methods:

In [37]:
# Feature Methods

def feature_mean(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(np.mean(x, axis = 0) for x in row))

def feature_stddev(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(np.std(x, axis = 0) for x in row))

def kurtosis(row):
    row = np.array(row)
    annotation = row[-1]
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(scipy.stats.kurtosis(x, axis = 0, bias = False) for x in row))

def skewness(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(scipy.stats.skew(x, axis = 0, bias = False) for x in row))
    
def spectral_entropy(row, sf = 32, nperseg = window_length, axis = 1):
    row = np.array(row)
    annotation = row[-1]
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    _, psd = welch(row, sf, nperseg=nperseg, axis=axis)
    psd_norm = psd / psd.sum(axis=axis, keepdims=True)
    se = - np.where(psd_norm == 0, 0, psd_norm * np.log(psd_norm) / np.log(2)).sum(axis=axis)
    return pd.Series(se)

def hjorthActivity(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(np.var(x, axis = 0) for x in row))

def hjorthMobility(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(np.sqrt(np.var(np.gradient(x)) / np.var(x)) for x in row))

def hjorthComplexity(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series((hMob(np.gradient(x)) / hMob(x)) for x in row))

def permutation_entropy(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(entropy.perm_entropy(x) for x in row))

def sample_entropy(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(entropy.sample_entropy(x) for x in row))

def approximate_entropy(row):
    row = np.array(row)
    row = row[:-1]
    row = np.reshape(row, (21, window_length))
    return (pd.Series(entropy.app_entropy(x) for x in row))

In [38]:
list_of_feature_methods = [feature_mean, feature_stddev, kurtosis, skewness, spectral_entropy, hjorthActivity, hjorthMobility, 
                          hjorthComplexity, permutation_entropy, sample_entropy, approximate_entropy]

In [39]:
%%time
df_list = list()
for i, j in zip(list_of_feature_methods, range(len(list_of_feature_methods))):
    print("Epoch %d ..." % (j+1))
    df_temp = df.apply(lambda row: i(row), axis = 1)
    df_list.append(df_temp)

new_df = pd.concat(df_list, axis = 1)
new_df

Epoch 1 ...
Epoch 2 ...
Epoch 3 ...
Epoch 4 ...
Epoch 5 ...
Epoch 6 ...
Epoch 7 ...
Epoch 8 ...
Epoch 9 ...
Epoch 10 ...
Epoch 11 ...
Wall time: 29min 23s


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,0.237731,0.271438,0.231115,0.205129,0.327846,0.298531,0.155484,0.231727,0.258646,0.391060,...,0.904726,0.958898,0.916750,0.908312,0.691119,0.835005,0.876676,0.913564,0.998378,0.802795
1,0.525592,0.472005,0.559939,0.545135,0.515483,0.434181,0.408683,0.525175,0.517135,0.577948,...,0.884518,1.069202,0.927021,0.829646,0.796822,0.812203,0.905807,0.838565,0.966738,0.560600
2,0.362238,0.453657,0.367739,0.569930,0.386825,0.446326,0.533002,0.406928,0.497937,0.409507,...,0.898972,0.921123,0.896250,0.894655,0.996300,0.986507,1.018279,0.934882,0.774752,0.506077
3,0.426183,0.443488,0.503179,0.446013,0.308886,0.478375,0.502533,0.460015,0.477389,0.515927,...,0.769690,1.004485,0.893069,0.775634,0.976204,0.856234,0.889472,0.878188,0.711451,0.497412
4,0.443872,0.547165,0.449588,0.596705,0.471281,0.568231,0.481293,0.445354,0.469777,0.465266,...,0.681920,0.722205,0.789209,0.753635,0.792819,0.815069,0.695286,0.873937,0.763576,0.521764
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,0.517827,0.569006,0.498979,0.476836,0.569441,0.439424,0.564618,0.598256,0.473253,0.501800,...,1.124100,1.039616,1.063122,1.129749,1.049808,1.010132,1.103496,0.966013,0.775144,0.381793
47872,0.550222,0.532460,0.568019,0.599673,0.431463,0.509925,0.600544,0.381907,0.539138,0.447469,...,0.999687,1.094155,1.118183,0.961279,1.029952,1.010014,1.094238,1.025693,0.895212,0.457229
47873,0.429925,0.529781,0.431279,0.478944,0.519904,0.506410,0.524140,0.519448,0.437079,0.460148,...,1.020006,0.952002,1.028193,0.997588,1.088019,0.926116,0.970077,0.991673,0.938932,0.537083
47874,0.486987,0.470565,0.479240,0.461068,0.475725,0.455243,0.461272,0.483860,0.451289,0.477916,...,0.207869,0.159339,0.179733,0.174451,0.158623,0.135969,0.183733,0.176790,0.140877,0.134715


In [40]:
feature_df = pd.concat([new_df, df[df.columns[-1]]], axis = 1)
feature_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,5376
0,0.237731,0.271438,0.231115,0.205129,0.327846,0.298531,0.155484,0.231727,0.258646,0.391060,...,0.958898,0.916750,0.908312,0.691119,0.835005,0.876676,0.913564,0.998378,0.802795,0.0
1,0.525592,0.472005,0.559939,0.545135,0.515483,0.434181,0.408683,0.525175,0.517135,0.577948,...,1.069202,0.927021,0.829646,0.796822,0.812203,0.905807,0.838565,0.966738,0.560600,0.0
2,0.362238,0.453657,0.367739,0.569930,0.386825,0.446326,0.533002,0.406928,0.497937,0.409507,...,0.921123,0.896250,0.894655,0.996300,0.986507,1.018279,0.934882,0.774752,0.506077,0.0
3,0.426183,0.443488,0.503179,0.446013,0.308886,0.478375,0.502533,0.460015,0.477389,0.515927,...,1.004485,0.893069,0.775634,0.976204,0.856234,0.889472,0.878188,0.711451,0.497412,0.0
4,0.443872,0.547165,0.449588,0.596705,0.471281,0.568231,0.481293,0.445354,0.469777,0.465266,...,0.722205,0.789209,0.753635,0.792819,0.815069,0.695286,0.873937,0.763576,0.521764,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,0.517827,0.569006,0.498979,0.476836,0.569441,0.439424,0.564618,0.598256,0.473253,0.501800,...,1.039616,1.063122,1.129749,1.049808,1.010132,1.103496,0.966013,0.775144,0.381793,0.0
47872,0.550222,0.532460,0.568019,0.599673,0.431463,0.509925,0.600544,0.381907,0.539138,0.447469,...,1.094155,1.118183,0.961279,1.029952,1.010014,1.094238,1.025693,0.895212,0.457229,0.0
47873,0.429925,0.529781,0.431279,0.478944,0.519904,0.506410,0.524140,0.519448,0.437079,0.460148,...,0.952002,1.028193,0.997588,1.088019,0.926116,0.970077,0.991673,0.938932,0.537083,0.0
47874,0.486987,0.470565,0.479240,0.461068,0.475725,0.455243,0.461272,0.483860,0.451289,0.477916,...,0.159339,0.179733,0.174451,0.158623,0.135969,0.183733,0.176790,0.140877,0.134715,0.0


In [52]:
feature_df.to_csv('Full_feature_data8sec.csv', index = False)

In [53]:
feature_df.columns = [i for i in range(232)]
feature_df.columns

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            222, 223, 224, 225, 226, 227, 228, 229, 230, 231],
           dtype='int64', length=232)

In [54]:
np.isinf(feature_df).values.any()

False

In [55]:
feature_df1 = feature_df.replace([np.inf, -np.inf], np.nan)
feature_df1.dropna(inplace = True)

In [56]:
feature_df1.reset_index(drop = True, inplace = True)

## Principal Component Analysis (PCA):

In [57]:
# Imports

from sklearn.decomposition import PCA

In [58]:
# Set hyperparams for PCA

n_components = 20
random_state = 32

### PCA 20

In [62]:
pca_20 = PCA(n_components = n_components, random_state = random_state)
feature_df_20 = pca_20.fit_transform(feature_df1[feature_df1.columns[:-1]])
# feature_df_20 = pca_20.fit_transform(feature_df1[feature_df1[[-1]]])

In [63]:
pca_20_df = pd.DataFrame(data = feature_df_20)
pca_20_df = pd.concat([pca_20_df, feature_df1[feature_df1.columns[-1]]], axis = 1)
pca_20_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,231
0,25.022356,-6.799958,-5.016250,4.581315,-11.306539,4.426036,13.180123,2.459563,8.429075,-1.679232,...,-1.494234,4.459849,-3.105590,-2.073293,-1.532261,4.878658,-2.927456,7.759248,6.571644,0.0
1,-2.844736,-1.265252,1.738377,0.484612,0.022045,0.443640,-0.925560,-0.217946,0.753162,0.682743,...,-0.814074,0.368062,1.119930,0.535137,1.137608,-0.489140,-0.113873,-0.023101,-0.634012,0.0
2,-3.071476,-1.015506,2.156695,-2.202537,-4.595725,1.106969,-2.112005,-0.310945,3.380887,-1.529304,...,-0.904437,0.732868,0.189963,-0.246212,0.543588,-0.534095,-0.192307,0.155150,0.785082,0.0
3,-4.920069,-1.329368,1.986788,-0.895050,2.066811,2.509160,-0.354943,0.878185,0.526922,1.098313,...,-0.291981,1.617216,-0.228556,-0.022398,-0.291541,0.186455,0.086554,-0.927143,0.308171,0.0
4,0.770259,-1.559405,2.545578,-1.118795,0.540490,3.438093,-0.584477,-2.273167,-3.289307,-0.905880,...,0.524261,0.664633,-1.651213,-1.361999,-1.512542,0.667240,-0.696905,-0.379874,0.483805,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,-7.751959,0.775206,-1.339623,-1.686491,1.232281,0.097654,0.221657,-0.863374,0.297561,-0.053956,...,-0.157246,-0.393818,-0.650976,-0.662818,0.515276,0.427902,0.185611,-0.440108,0.142428,0.0
47872,-3.741647,-1.106958,-0.421251,-0.477359,0.693464,-0.077072,0.238473,1.366970,1.266009,-0.304988,...,-0.439573,-0.249050,1.318808,-1.624189,0.165268,0.033013,0.355185,-0.390979,-0.268773,0.0
47873,-6.544203,1.804694,-1.886439,-1.361567,0.314316,-1.266477,-0.810861,0.313862,-1.334900,-0.490203,...,0.001488,-0.108921,-0.897002,0.157056,-0.289176,-0.042475,0.363187,-0.053521,-0.137937,0.0
47874,19.540495,16.103253,-7.299638,-4.989192,3.433336,3.265255,0.011561,-0.609568,-0.453411,0.539367,...,-0.946517,-0.247224,-0.455614,0.385181,-0.063998,1.154159,-0.096725,-0.039695,2.445721,0.0


In [71]:
pca_20_df.to_csv('PCA_20_features.csv', index = False)

### PCA 50

In [73]:
n_components = 50

pca_50 = PCA(n_components = n_components, random_state = random_state)
feature_df_50 = pca_50.fit_transform(feature_df1[feature_df1.columns[:-1]])

pca_50_df = pd.DataFrame(data = feature_df_50)
pca_50_df = pd.concat([pca_50_df, feature_df1[feature_df1.columns[-1]]], axis = 1)
pca_50_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,231
0,25.022356,-6.799958,-5.016250,4.581315,-11.306539,4.426036,13.180123,2.459563,8.429075,-1.679232,...,-0.267965,-0.033910,-1.107760,-0.685061,0.584467,0.821474,0.709462,-0.498239,0.016951,0.0
1,-2.844736,-1.265252,1.738377,0.484612,0.022045,0.443640,-0.925560,-0.217946,0.753162,0.682743,...,0.007542,0.184258,-0.098010,0.121566,-0.111186,-0.746491,0.038035,-0.119754,0.093184,0.0
2,-3.071476,-1.015506,2.156695,-2.202537,-4.595725,1.106969,-2.112005,-0.310945,3.380887,-1.529304,...,-0.349426,-0.632636,0.014604,0.587698,0.154267,0.198887,0.001966,0.624264,0.068145,0.0
3,-4.920069,-1.329368,1.986788,-0.895050,2.066811,2.509160,-0.354943,0.878185,0.526922,1.098313,...,-0.289939,-0.153739,-0.083112,-0.078163,0.506903,0.076371,-0.216515,-0.036905,-0.369917,0.0
4,0.770259,-1.559405,2.545578,-1.118795,0.540490,3.438093,-0.584477,-2.273167,-3.289307,-0.905880,...,-0.138687,-0.455890,-0.243039,-0.050084,-0.337291,-0.299087,-0.465478,0.163042,0.087136,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47871,-7.751959,0.775206,-1.339623,-1.686491,1.232281,0.097654,0.221657,-0.863374,0.297561,-0.053956,...,-0.235280,-0.113446,0.814575,-0.261449,0.170903,0.221106,0.267184,-0.433643,0.458923,0.0
47872,-3.741647,-1.106958,-0.421251,-0.477359,0.693464,-0.077072,0.238473,1.366970,1.266009,-0.304988,...,-0.528106,-0.367886,-0.090466,0.639809,0.465343,-0.570838,0.232643,-0.072028,-0.510053,0.0
47873,-6.544203,1.804694,-1.886439,-1.361567,0.314316,-1.266477,-0.810861,0.313862,-1.334900,-0.490203,...,-0.420342,-0.505307,0.278991,0.161658,-0.269128,0.201929,0.163739,-0.138535,-0.209167,0.0
47874,19.540495,16.103253,-7.299638,-4.989192,3.433336,3.265255,0.011561,-0.609568,-0.453411,0.539367,...,0.138311,-0.054185,-0.010468,-0.055329,-0.014803,0.339775,0.253236,-0.012497,0.090463,0.0


In [75]:
pca_50_df.to_csv('PCA_50_features.csv', index = False)

### PCA 70

In [77]:
n_components = 70

pca_70 = PCA(n_components = n_components, random_state = random_state)
feature_df_70 = pca_70.fit_transform(feature_df1[feature_df1.columns[:-1]])

pca_70_df = pd.DataFrame(data = feature_df_70)
pca_70_df = pd.concat([pca_70_df, feature_df1[feature_df1.columns[-1]]], axis = 1)

pca_70_df.to_csv('PCA_70_features.csv', index = False)

### PCA 100

In [78]:
n_components = 100

pca_100 = PCA(n_components = n_components, random_state = random_state)
feature_df_100 = pca_100.fit_transform(feature_df1[feature_df1.columns[:-1]])

pca_100_df = pd.DataFrame(data = feature_df_100)
pca_100_df = pd.concat([pca_100_df, feature_df1[feature_df1.columns[-1]]], axis = 1)

pca_100_df.to_csv('PCA_100_features.csv', index = False)