Test of the functions for extracting spectral features on real data
(PSD functions in module 'extract_spectral_features.py').

This example signal is also used in the script 'classification_wavelets_fourier'.

In [1]:

import pandas as pd
import numpy as np
from scipy.signal import find_peaks

from extract_spectral_features import welch64, PSD_welch64, get_first_n, get_first_n_peaks

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
file_path  = '../dat/Epileptic_Seizure_Recognition.csv'
data = pd.read_csv(file_path)
print(data.shape)  # (11500, 180)
del file_path

# remove the 1st column (Unnamed)
data.drop(columns=[list(data)[0]], inplace=True)

(11500, 180)


In [3]:
target = list(data)[-1]  # "y"

features = list(data)[0:-1]
print(len(features))  # 178

178


In [4]:
# example of a signal for testing feature extraction.

ex_signal = data[features].loc[2]
print(type(ex_signal))  # Series
print(ex_signal.shape)  # (178,)

ex_signal.plot()

<class 'pandas.core.series.Series'>
(178,)


<Axes: >

In [5]:
ex_frex, ex_PSD = welch64(ex_signal)
print(type(ex_frex))  # ndarray
print(ex_frex.shape)  # (33,)
print(type(ex_PSD))  # ndarray
print(ex_PSD.shape)  # (33,)

ex_PSD2 = PSD_welch64(ex_signal)
print('Is the PSD identical?', np.array_equal(ex_PSD, ex_PSD2))  # True

print(ex_frex[0:5]) # [0, 1/64, 1/32., 3/64, 1/16 ]
print(ex_PSD[0:6]) 
# [3800, 10400, 14600, 15000, 16200, 8700 ]

plt.semilogy(ex_frex, ex_PSD)  # max at the frequency number 3: T == 64/3

<class 'numpy.ndarray'>
(33,)
<class 'numpy.ndarray'>
(33,)
Is the PSD identical? True
[0.       0.015625 0.03125  0.046875 0.0625  ]
[ 3811.94293955 10356.35574327 14642.3711002  14988.33882263
 16193.69692577  8743.34763352]


[<matplotlib.lines.Line2D at 0x7c5e85fec050>]

In [6]:
ex_all_peak_indices = find_peaks(ex_PSD)[0]
print(type( ex_all_peak_indices))  # ndarray
print(ex_all_peak_indices.shape)   # (4,)
print(ex_all_peak_indices)  # [4 13 19 28]

ex_4_peak_indices = get_first_n(np.sort(ex_all_peak_indices))
print(type(ex_4_peak_indices))  # list
print(ex_4_peak_indices) # [4, 13, 19, 28]

ex_ind_values = get_first_n_peaks(ex_PSD)
print(type(ex_ind_values))  # list
print(ex_ind_values) # [4. , 13. , 19. , 28. , 16200., 130., 49., 70.]
# (the last 4 numbers above are approximations)

<class 'numpy.ndarray'>
(4,)
[ 4 13 19 28]
<class 'list'>
[4, 13, 19, 28]
<class 'numpy.ndarray'>
[4.00000000e+00 1.30000000e+01 1.90000000e+01 2.80000000e+01
 1.61936969e+04 1.29670915e+02 4.89831752e+01 7.03900495e+01]
