## 09/03/2019

- Comparing the progression of different feature values over the course of the EP study for AF and non-AF patients.

In [1]:
import sys
sys.path.insert(0, '/Users/matthewashman/github/MasterProject2018')

# Import necessary modules. Set settings. Import data.
import math
import numpy as np
import pandas as pd
import random
import pywt
import matplotlib.pyplot as plt
from statsmodels.robust import mad
from tsfresh.feature_extraction import feature_calculators
from FeatureExtraction.feature_tools import detect_peaks
from IPython.display import display, clear_output, HTML

import pdb

plt.style.use('default')

X = pd.read_pickle('/Users/matthewashman/github/MasterProject2018/Data/extracted_segments.pkl')

  from pandas.core import datetools


In [2]:
X.columns

Index(['Channel', 'Coupling Interval', 'Data', 'Patient', 'S1/S2', 'Type'], dtype='object')

In [19]:
feature_list = []


# Extract features
for i, row in X_S2.iterrows():
    clear_output(wait=True)
    display('Extracting Features: ' + str(round(100*i/X_S2.index[-1],3)) + '%')

    # Get typical response for this patient and channel
    # Bad apples
    if (((row['Type'] + row['Patient']) == 'af8') & (row['Channel'] == 'CS5-6')):
        typical_response = X_S1[(X_S1['Type']==row['Type']) & 
                           (X_S1['Patient']==row['Patient']) &
                           (X_S1['Channel']==row['Channel'])
                           ].sort_values(by=['Coupling Interval'], ascending=False).iloc[2]
    elif (((row['Type'] + row['Patient']) == 'at1') & (row['Channel'] == 'CS1-2')):
        typical_response = X_S1[(X_S1['Type']==row['Type']) & 
                           (X_S1['Patient']==row['Patient']) &
                           (X_S1['Channel']==row['Channel'])
                           ].sort_values(by=['Coupling Interval'], ascending=False).iloc[4]
    else:
        typical_response = X_S1[(X_S1['Type']==row['Type']) & 
                               (X_S1['Patient']==row['Patient']) &
                               (X_S1['Channel']==row['Channel'])
                               ].sort_values(by=['Coupling Interval'], ascending=False).iloc[0]

    typical_feature_dict = get_good_feature_dict(typical_response['Data'])
    feature_dict = get_good_feature_dict(row['Data'])

    # Normalise by subtracting 'typical' feature values
    for k, v in feature_dict.items():
        feature_dict[k] = v - typical_feature_dict[k]

    # Fill in the other column values
    for col, value in row.iteritems():
        feature_dict[col] = value

    feature_list.append(feature_dict)

'Extracting Features: 100.0%'

In [20]:
features = pd.DataFrame(feature_list)

In [17]:
feature_evolution_list = []
filled_feature_evolution_list = []

patient_types = features['Type'].unique()
cis = features['Coupling Interval'].unique()
cis = np.sort(cis)
feature_names = features.drop(['Type', 'Patient', 'Coupling Interval', 'Channel', 'Data', 'S1/S2'], axis=1).columns

for patient_type in patient_types:
    patients = features[features['Type']==patient_type]['Patient'].unique()
    for patient in patients:
        patient_features = features[(features['Patient']==patient) & (features['Type']==patient_type)]
        patient_cis = patient_features['Coupling Interval'].unique()
        patient_cis = np.sort(patient_cis)
        channels = patient_features['Channel'].unique()
                
        for channel in channels:
            patient_feature_evolution_dict = {}
            filled_patient_feature_evolution_dict = {}
            for feature in feature_names:
                feature_evolution = np.empty(len(cis))
                filled_feature_evolution = np.empty(len(cis))
                
                filled_feature_evolution[:] = np.nan
                feature_evolution[:] = np.nan
                for i, ci in enumerate(cis):
                    if ci in patient_cis:
                        try:
                            feature_evolution[i] = patient_features.loc[(patient_features['Coupling Interval']==ci) & (patient_features['Channel']==channel)][feature].values[0]
                        except IndexError:
                             continue

                patient_feature_evolution_dict[feature] = feature_evolution
                
                # Fill in the missing values up to the last coupling interval (want to retain this information)
                try:
                    first_ci = np.where(~np.isnan(feature_evolution))[0][0]
                    last_ci = np.where(~np.isnan(feature_evolution))[0][-1]
                except IndexError:
                    first_ci = 0
                    last_ci = 0
                
                filled_feature_evolution[last_ci+1:] = 0
                
                for i in range(last_ci, first_ci, -1):
                    if np.isnan(feature_evolution[i]):
                        filled_feature_evolution[i] = feature_evolution[i+1]
                    else:
                        filled_feature_evolution[i] = feature_evolution[i]
                        
                filled_patient_feature_evolution_dict[feature] = filled_feature_evolution
        
            patient_feature_evolution_dict['Patient'] = patient
            patient_feature_evolution_dict['Type'] = patient_type
            patient_feature_evolution_dict['Channel'] = channel
            
            filled_patient_feature_evolution_dict['Patient'] = patient
            filled_patient_feature_evolution_dict['Type'] = patient_type
            filled_patient_feature_evolution_dict['Channel'] = channel
            
            feature_evolution_list.append(patient_feature_evolution_dict)
            filled_feature_evolution_list.append(filled_patient_feature_evolution_dict)

In [18]:
# Convert feature_evolution_list to a DataFrame
feature_evolution_df = pd.DataFrame(feature_evolution_list)
filled_feature_evolution_df = pd.DataFrame(filled_feature_evolution_list)

In [19]:
filled_feature_evolution_df.head()

Unnamed: 0,Approximate Entropy: m=3 r=0.7,Channel,Conduction Delay: set_thresh=False,Index Mass Quantile: q=0.6,Label 1,Label 2,Location of Maximum Energy: M=14,Number of Peaks: set_thresh=False,Patient,Percentage Fractionation: thresh=0.01,Power Spectral Entropy,Ratio Beyond 1xSTD,Sample Entropy Around Max Energy: width=60 r=0.025,Type,"Width of Maximum Energy: M=14, width_thresh=0.2"
0,"[nan, 0.016678350605747405, 0.0049198855794055...",CS1-2,"[nan, 16.0, 8.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0,...","[nan, 0.08730158730158732, 0.03968253968253971...","[nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 18.0, 9.0, 4.0, 3.0, 2.0, 1.0, 0.0, -1.0...","[nan, -1.0, 0.0, -1.0, 1.0, -1.0, -1.0, -2.0, ...",1,"[nan, 2.3999999999999995, 8.8, 1.6000000000000...","[nan, -0.12913011359661297, -0.258766185088053...","[nan, 0.031746031746031744, 0.0079365079365079...","[nan, -0.1357682250390539, -0.2027192925820090...",af,"[nan, 24.0, 15.0, -8.0, -10.0, -11.0, -12.0, -..."
1,"[nan, 0.010876864735488767, 0.0133671729837686...",CS3-4,"[nan, 26.0, 16.0, 6.0, 3.0, 1.0, 1.0, 0.0, -1....","[nan, 0.17460317460317462, 0.11111111111111113...","[nan, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 19.0, 11.0, 5.0, 2.0, 0.0, 0.0, -1.0, -2...","[nan, 0.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -...",1,"[nan, 1.5999999999999996, 0.7999999999999989, ...","[nan, 0.05019840700969436, 0.2829384265266688,...","[nan, 0.015873015873015872, 0.0238095238095238...","[nan, 0.004074206569512115, 0.1295203477630996...",af,"[nan, 1.0, 3.0, 2.0, -2.0, -1.0, -2.0, -1.0, -..."
2,"[nan, 0.13403687631292605, 0.16319505003577706...",CS5-6,"[nan, 31.0, 34.0, -1.0, 6.0, 1.0, -1.0, -1.0, ...","[nan, 0.35714285714285715, 0.25396825396825395...","[nan, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, 33.0, 35.0, 15.0, 3.0, 2.0, 1.0, 0.0, -1...","[nan, 5.0, 6.0, 7.0, 4.0, 1.0, 1.0, 1.0, 1.0, ...",1,"[nan, 16.8, 18.400000000000002, 28.0, 20.00000...","[nan, -0.15624194597589458, -0.168556300993880...","[nan, 0.1349206349206349, 0.12698412698412698,...","[nan, 0.8471307620494434, 0.3528782720083229, ...",af,"[nan, 38.0, 49.0, 43.0, 10.0, 7.0, 4.0, 4.0, 2..."
3,"[nan, nan, nan, -0.0011435893217871529, -0.008...",CS1-2,"[nan, nan, nan, 5.0, 0.0, 0.0, -3.0, -3.0, -4....","[nan, nan, nan, 0.05555555555555558, 0.0079365...","[nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, nan, nan, -1.0, -7.0, 0.0, -2.0, -3.0, -...","[nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",2,"[nan, nan, nan, -7.2, -7.2, 0.0, 0.0, 5.599999...","[nan, nan, nan, -0.4948837666185901, -0.320082...","[nan, nan, nan, 0.047619047619047616, 0.031746...","[nan, nan, nan, 0.28728606644994353, 0.2610252...",af,"[nan, nan, nan, 6.0, 4.0, 3.0, 1.0, 0.0, 3.0, ..."
4,"[nan, nan, nan, 0.020679915205361055, 0.011600...",CS3-4,"[nan, nan, nan, 9.0, 3.0, 4.0, 2.0, 2.0, 0.0, ...","[nan, nan, nan, 0.047619047619047616, 0.0, 0.0...","[nan, nan, nan, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[nan, nan, nan, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[nan, nan, nan, 6.0, 0.0, 2.0, 3.0, 2.0, 1.0, ...","[nan, nan, nan, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, ...",2,"[nan, nan, nan, -1.5999999999999988, -1.599999...","[nan, nan, nan, 0.1900065619864657, 0.27280862...","[nan, nan, nan, 0.0, -0.015873015873015872, -0...","[nan, nan, nan, 0.7074216381219381, 0.13119579...",af,"[nan, nan, nan, 0.0, -2.0, -1.0, 7.0, -1.0, 0...."


In [105]:
%matplotlib qt
# Comparing fractionation label curves between patient types
colours = ['#e25238', '#6997e5']
patients_of_interest = ['af', 'ep']
labels = [1,0]

features_to_remove = ['Label 1', 'Label 2', 'Conduction Delay: set_thresh=False', 'Power Spectral Entropy']
features_of_interest = [f for f in feature_names if f not in features_to_remove]

features_of_interest1 = features_of_interest[:(round(len(features_of_interest)/2))]

fig, axes = plt.subplots(nrows=len(features_of_interest1), ncols=3, figsize=(16, 9))
for patient_type, colour in zip(patients_of_interest, colours):
    patients = feature_evolution_df[feature_evolution_df['Type']==patient_type]['Patient'].unique()
    for patient in patients:
        for i,feature in enumerate(features_of_interest1):
            x = np.arange(230, 410, 10)
            try:
                cs12_feature_ev = np.squeeze(feature_evolution_df.loc[(feature_evolution_df['Type']==patient_type) & 
                                                                      (feature_evolution_df['Patient']==patient) &
                                                                      (feature_evolution_df['Channel']=='CS1-2')
                                                                      ][feature].values[0])
            except IndexError:
                cs12_feature_ev = np.empty(len(x))
                cs12_feature_ev[:] = np.nan
                
            x_cs12 = x[~np.isnan(cs12_feature_ev)]
            cs12_feature_ev = cs12_feature_ev[~np.isnan(cs12_feature_ev)]
            
            try:
                cs34_feature_ev = np.squeeze(feature_evolution_df.loc[(feature_evolution_df['Type']==patient_type) & 
                                                                      (feature_evolution_df['Patient']==patient) &
                                                                      (feature_evolution_df['Channel']=='CS3-4')
                                                                      ][feature].values[0])
            except IndexError:
                cs34_feature_ev = np.empty(len(x))
                cs34_feature_ev[:] = np.nan
                
            x_cs34 = x[~np.isnan(cs34_feature_ev)]
            cs34_feature_ev = cs34_feature_ev[~np.isnan(cs34_feature_ev)]
            
            try:
                cs56_feature_ev = np.squeeze(feature_evolution_df.loc[(feature_evolution_df['Type']==patient_type) & 
                                                                     (feature_evolution_df['Patient']==patient) &
                                                                     (feature_evolution_df['Channel']=='CS5-6')
                                                                     ][feature].values[0])
            except IndexError:
                cs56_feature_ev = np.empty(len(x))
                cs56_feature_ev[:] = np.nan
                
            x_cs56 = x[~np.isnan(cs56_feature_ev)]
            cs56_feature_ev = cs56_feature_ev[~np.isnan(cs56_feature_ev)]

            # Plot conduction delay curves for patient
            axes[i,0].plot(x_cs12, cs12_feature_ev, color=colour, alpha=0.7)
            axes[i,1].plot(x_cs34, cs34_feature_ev, color=colour, alpha=0.7)
            axes[i,2].plot(x_cs56, cs56_feature_ev, color=colour, alpha=0.7)

for ax, feature in zip(axes[:,1], features_of_interest1):
    ax.set_title(feature, fontsize=8)

for ax in axes.flatten():
    ax.set_xlim(200,400)
    ax.grid()
    
        
plt.suptitle('Fractionation Labels for AF (red) and EP (blue).')
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.05, hspace=0.35)

plt.show()

## Current average curves

In [11]:
%matplotlib qt
# Comparing fractionation label curves between patient types
colours = ['#e25238', '#6997e5']
patients_of_interest = ['af', 'avnrt']
labels = [1,0]

features_to_remove = ['Label 2', 'Conduction Delay: set_thresh=False', 'Power Spectral Entropy']
features_of_interest = [f for f in feature_names if f not in features_to_remove]

features_of_interest1 = features_of_interest[:(round(len(features_of_interest)/2))]
features_of_interest2 = features_of_interest[(round(len(features_of_interest)/2)):]
features_of_interest3 = ['Location of Maximum Energy: M=14', 'Number of Peaks: set_thresh=False']

fig, axes = plt.subplots(nrows=len(features_of_interest3), ncols=3, figsize=(16, 9))
for patient_type, colour in zip(patients_of_interest, colours):
    patients = filled_feature_evolution_df[filled_feature_evolution_df['Type']==patient_type]['Patient'].unique()
    for patient in patients:
        for i,feature in enumerate(features_of_interest3):
            x = np.arange(230, 410, 10)
            try:
                cs12_feature_ev = np.squeeze(filled_feature_evolution_df.loc[(filled_feature_evolution_df['Type']==patient_type) & 
                                                                            (filled_feature_evolution_df['Patient']==patient) &
                                                                            (filled_feature_evolution_df['Channel']=='CS1-2')
                                                                            ][feature].values[0])
            except IndexError:
                cs12_feature_ev = np.empty(len(x))
                cs12_feature_ev[:] = np.nan
                
            x_cs12 = x[~np.isnan(cs12_feature_ev)]
            cs12_feature_ev = cs12_feature_ev[~np.isnan(cs12_feature_ev)]
            
            # Store running mean of feature values
            cs12_feature_rm = np.zeros(len(cs12_feature_ev))
            
            for j in range(len(cs12_feature_ev)):
                end_idx = min(len(cs12_feature_ev), j+3)
                cs12_feature_rm[j] = np.mean(cs12_feature_ev[j:end_idx])
                            
            try:
                cs34_feature_ev = np.squeeze(filled_feature_evolution_df.loc[(filled_feature_evolution_df['Type']==patient_type) & 
                                                                            (filled_feature_evolution_df['Patient']==patient) &
                                                                            (filled_feature_evolution_df['Channel']=='CS3-4')
                                                                            ][feature].values[0])
            except IndexError:
                cs34_feature_ev = np.empty(len(x))
                cs34_feature_ev[:] = np.nan
                
            x_cs34 = x[~np.isnan(cs34_feature_ev)]
            cs34_feature_ev = cs34_feature_ev[~np.isnan(cs34_feature_ev)]
            
            # Store running mean of feature values
            cs34_feature_rm = np.zeros(len(cs34_feature_ev))
            
            for j in range(len(cs34_feature_ev)):
                end_idx = min(len(cs12_feature_ev), j+3)
                cs34_feature_rm[j] = np.mean(cs34_feature_ev[j:end_idx])
            
            try:
                cs56_feature_ev = np.squeeze(filled_feature_evolution_df.loc[(filled_feature_evolution_df['Type']==patient_type) & 
                                                                           (filled_feature_evolution_df['Patient']==patient) &
                                                                           (filled_feature_evolution_df['Channel']=='CS5-6')
                                                                           ][feature].values[0])
            except IndexError:
                cs56_feature_ev = np.empty(len(x))
                cs56_feature_ev[:] = np.nan
                
            x_cs56 = x[~np.isnan(cs56_feature_ev)]
            cs56_feature_ev = cs56_feature_ev[~np.isnan(cs56_feature_ev)]
            
            # Store running mean of feature values
            cs56_feature_rm = np.zeros(len(cs56_feature_ev))
            
            for j in range(len(cs56_feature_ev)):
                end_idx = min(len(cs12_feature_ev), j+3)
                cs56_feature_rm[j] = np.mean(cs56_feature_ev[j:end_idx])

            # Plot conduction delay curves for patient
            axes[i,0].plot(x_cs12, cs12_feature_rm, color=colour, alpha=0.7)
            axes[i,1].plot(x_cs34, cs34_feature_rm, color=colour, alpha=0.7)
            axes[i,2].plot(x_cs56, cs56_feature_rm, color=colour, alpha=0.7)

for ax, feature in zip(axes[:,1], features_of_interest3):
    ax.set_title(feature, fontsize=8)

for ax in axes.flatten():
    ax.set_xlim(200,400)
    ax.grid()
    
        
plt.suptitle('Running Average (N=3) Feature Values for AF (red) and AVNRT (blue).')
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.05, hspace=0.35)

plt.show()

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [20]:
cs34_1_peak = features[(features['Number of Peaks: set_thresh=False']==1) & (features['Channel']=='CS3-4')]

In [21]:
%matplotlib qt
patient_types = features['Type'].unique()

patient_types = ['avnrt', 'ep']

for patient_type in patient_types:
    patients = features[features['Type']==patient_type]['Patient'].unique()
    if patient_type == 'avrt':
        patients = patients[1:]
        
    for patient in patients: 
        patient_features = features[(features['Type']==patient_type) & (features['Patient']==patient) & (features['S1/S2']=='S2')]
        
        patient_cis = patient_features['Coupling Interval'].unique()

        fig, axes = plt.subplots(nrows=len(patient_cis), ncols=3, sharex=True, figsize=(16,9))
        
        for i,ci in enumerate(patient_cis):
            cs12 = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS1-2'))]['Data'].values[0]
            cs12_label = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS1-2'))]['Label 1'].values[0]
            cs12_num_peaks = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS1-2'))]['Number of Peaks: set_thresh=False'].values[0]
            cs34 = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS3-4'))]['Data'].values[0]
            cs34_label = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS3-4'))]['Label 1'].values[0]
            cs34_num_peaks = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS3-4'))]['Number of Peaks: set_thresh=False'].values[0]
            cs56 = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS5-6'))]['Data'].values[0]
            cs56_label = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS5-6'))]['Label 1'].values[0]
            cs56_num_peaks = patient_features[(patient_features['Coupling Interval']==ci) & ((patient_features['Channel']=='CS5-6'))]['Number of Peaks: set_thresh=False'].values[0]
            
            cs12_peaks = get_peaks(cs12, 0.2)
            cs34_peaks = get_peaks(cs34, 0.2)
            cs56_peaks = get_peaks(cs56, 0.2)
            
            axes[i][0].plot(np.arange(len(cs12)), cs12)
            axes[i][0].hold(True)
            axes[i][0].plot(cs12_peaks[0], cs12_peaks[1], 'kx')
            plt.text(0.8, 0.3, 'Label: ' + cs12_label, fontweight='bold', fontsize=8, transform=axes[i][0].transAxes)
            plt.text(0.8, 0.6, 'Peaks: ' + str(cs12_num_peaks), fontweight='bold', fontsize=8, transform=axes[i][0].transAxes)
            axes[i][1].plot(np.arange(len(cs34)), cs34)
            axes[i][1].hold(True)
            axes[i][1].plot(cs34_peaks[0], cs34_peaks[1], 'kx')
            plt.text(0.8, 0.3, 'Label: ' + cs34_label, fontweight='bold', fontsize=8, transform=axes[i][1].transAxes)
            plt.text(0.8, 0.6, 'Peaks: ' + str(cs34_num_peaks), fontweight='bold', fontsize=8, transform=axes[i][1].transAxes)
            axes[i][2].plot(np.arange(len(cs56)), cs56)
            axes[i][2].hold(True)
            axes[i][2].plot(cs56_peaks[0], cs56_peaks[1], 'kx')
            plt.text(0.8, 0.3, 'Label: ' + cs56_label, fontweight='bold', fontsize=8, transform=axes[i][2].transAxes)
            plt.text(0.8, 0.6, 'Peaks: ' + str(cs56_num_peaks), fontweight='bold', fontsize=8, transform=axes[i][2].transAxes)

            
        
        plt.suptitle('Convolved signal for: ' + patient_type + ' ' + patient)
        plt.draw()
        plt.waitforbuttonpress()
        plt.close()

    See the API Changes document (http://matplotlib.org/api/api_changes.html)
    for more details.
    See the API Changes document (http://matplotlib.org/api/api_changes.html)
    for more details.
    See the API Changes document (http://matplotlib.org/api/api_changes.html)
    for more details.


IndexError: index 0 is out of bounds for axis 0 with size 0

In [5]:
# A shitty conduction delay detector
def get_delay(x, amp_thresh=None, set_thresh=False):
    if (set_thresh==True):
        if any(abs(x)>amp_thresh):
            return np.argmax(abs(x)>amp_thresh)
        else:
            return len(x)
    else:    
        return np.argmax(abs(x)>(max(abs(x))/2))
    
def denoise(x):
    # Obtain Daubechies N=6 wavelet coefficients
    waveletCoefs = pywt.wavedec(x, 'db7', mode='per')

    # Throw away coefficients corresponding to noise
    sigma = mad(waveletCoefs[-1])
    uThresh = 1*sigma*np.sqrt(2*np.log(len(x)))
    denoised = waveletCoefs[:]
    denoised[1:] = (pywt._thresholding.hard(i, value=uThresh) for i in denoised[1:])

    # Reconstruct the original signal
    xDenoised = pywt.waverec(denoised, 'db7', mode='per')

    return xDenoised

def get_peaks(x, height_thresh, scale_amp=None, set_scale=False, plot = False):
    x = np.array(x)
    
    # Get height_thresh
    if set_scale:
        height_thresh = height_thresh*scale_amp
    else:
        height_thresh = height_thresh*max(abs(x))
    
    # Denoise x
    xdn = denoise(x)

    # Detect peaks using detect_peaks
    pos_peak_idx = detect_peaks(xdn, mph=height_thresh, threshold = 0)
    neg_peak_idx = detect_peaks((-xdn), mph=height_thresh, threshold = 0)
    peak_idx = np.concatenate([pos_peak_idx, neg_peak_idx])
    peak_idx = np.sort(peak_idx)
    # Edge indeces aren't detected
    peak_idx = peak_idx[(peak_idx != 0) & (peak_idx != (len(xdn)-1))]

    new_peak_idx = []
    peak_amp = []
    if (len(peak_idx) > 0):
        new_peak_idx.append(peak_idx[0])
        mp_thresh = 0.2*max(abs(x))
        for i in range(len(peak_idx)-1):
            idx = peak_idx[i]
            idx_next = peak_idx[i+1]
            mid_point = int((idx_next+idx)/2)
            if (max([abs(x[idx_next]-x[mid_point]), abs(x[idx]-x[mid_point])]) > mp_thresh):
                new_peak_idx.append(idx_next)

        peak_idx = np.array(new_peak_idx)
        peak_amp = x[peak_idx]

    if plot == True:
        fig, [ax1] = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(8,8))
        ax1.plot(x, 'b' , xdn, 'r--', peak_idx, peak_amp, 'kx')
        #plt.title(fileName)
        ax1.set_xlabel('Sample')
        ax1.set_ylabel('Normalised amplitude')
        ax1.legend(['Original segment', 'Denoised segment', 'Detected peaks'])

        plt.draw()
        plt.waitforbuttonpress(0) # this will wait for indefinite time
        plt.close(fig)


    return peak_idx, peak_amp

def sample_entropy(U, m, r):

    def _maxdist(x_i, x_j):
        result = max([abs(ua-va) for ua, va in zip(x_i, x_j)])
        return result

    def _phi(m):
        x = np.zeros([N,m-1])
        for i in range(N-m+1):
            x[i,:] = U[i:i+m-1]

        C = 0
        for i in range(len(x)):
            for j in range(len(x)):
                if i != j:
                    if _maxdist(x[i,:], x[j,:]) <= r:
                        C = C + 1

        return C

    U = U/max(abs(U))
    N = len(U)

    return -np.log(_phi(m+1)/_phi(m))

def percentage_fractionation(x, peak_idxs, thresh=0.01, sr=1000):
    # Get peak indexes and amplitude
    peak_idx_diffs = np.diff(peak_idxs)
    frac_time = 0
    frac_time = np.sum(peak_idx_diffs[peak_idx_diffs < thresh*sr])
    prcnt_frac = (frac_time/len(x))*100
    return prcnt_frac

def get_local_sample_entropy(x, centre_idx, width, m=2, r=0.05):
    # Ensure width is odd
    if ((width%2) == 0):
        width += 1
        
    if (centre_idx < (width-1)/2):
        return sample_entropy(x[:width+1], m, r)
    elif (centre_idx > (len(x)-1-(width-1)/2)):
        return sample_entropy(x[len(x)-1-width:], m, r)
    else:
        return sample_entropy(x[int(centre_idx-(width-1)/2):int(centre_idx+(width+1)/2)], m, r)
    
def get_location_of_max_energy(x, M=14):
    v = np.ones(M)
    x_ = np.convolve(abs(x), v)
    return (np.argmax(x_) + math.floor(M/2))
        
def get_local_peaks(x, centre_idx, width=25, height_thresh=0.1):
    if ((width%2) == 0):
        width += 1
        
    if (centre_idx < (width-1)/2):
        return get_peaks(x[:width+1], height_thresh)
    elif (centre_idx > (len(x)-1-(width-1)/2)):
        return get_peaks(x[len(x)-1-width:], height_thresh)
    else:
        return get_peaks(x[int(centre_idx-(width-1)/2):int(centre_idx+(width+1)/2)], height_thresh)
    
def get_pse(x):
    x_fft = np.fft.rfft(x)
    x_P = (1/len(x_fft))*np.absolute(x_fft)**2
    x_p = x_P/sum(x_P)
    pse = np.sum([(-p*np.log2(p)) for p in x_p])
    if pse == np.nan:
        pdb.set_trace()
        print('WTF')
    return pse

def get_local_pse(x, centre_idx, width=50):
    if ((width%2) == 0):
        width += 1
        
    if (centre_idx < (width-1)/2):
        return get_pse(x[:width+1])
    elif (centre_idx > (len(x)-1-(width-1)/2)):
        return get_pse(x[len(x)-1-width:])
    else:
        return get_pse(x[int(centre_idx-(width-1)/2):int(centre_idx+(width+1)/2)])
    
def get_spectral_centroid(x):
    x_fft = np.fft.rfft(x)
    x_spectrum = np.absolute(x_fft)
    normalized_spectrum = x_spectrum/sum(x_spectrum)
    normalized_frequencies = np.arange(0, len(x_spectrum), 1)
    return sum(normalized_frequencies * normalized_spectrum)

def get_local_spectral_centroid(x, centre_idx, width=50):
    if ((width%2) == 0):
        width += 1
        
    if (centre_idx < (width-1)/2):
        return get_spectral_centroid(x[:width+1])
    elif (centre_idx > (len(x)-1-(width-1)/2)):
        return get_spectral_centroid(x[len(x)-1-width:])
    else:
        return get_spectral_centroid(x[int(centre_idx-(width-1)/2):int(centre_idx+(width+1)/2)])
    
def get_local_energy(x, centre_idx, width=60):
    if ((width%2) == 0):
        width += 1
        
    if (centre_idx < (width-1)/2):
        return np.sum(x[:width+1]**2)
    elif (centre_idx > (len(x)-1-(width-1)/2)):
        return np.sum(x[len(x)-1-width:]**2)
    else:
        return np.sum(x[int(centre_idx-(width-1)/2):int(centre_idx+(width+1)/2)]**2)
    
def get_width_max_energy(x, M=14, width_thresh=0.2):
    v = np.ones(M)
    x_ = np.convolve(abs(x), v)
    if any(x_[np.argmax(x_):] < width_thresh*np.max(x_)):
        end_idx = np.argmax(x_) + np.argmax(x_[np.argmax(x_):] < width_thresh*np.max(x_))
    else:
        end_idx = len(x_)-1
    if any(x_[np.argmax(x_)::-1] < width_thresh*np.max(x_)):  
        start_idx = np.argmax(x_) - np.argmax(x_[np.argmax(x_)::-1] < width_thresh*np.max(x_))
    else:
        start_idx = 0
    return (end_idx - start_idx)

In [15]:
def get_good_feature_dict(x, col_prefix=''):
    feature_dict = {}
    height_thresh=0.2
    
#     feature_dict[col_prefix + 'Maximum Absolute Value'] = max(abs(x))
    
    # Hand engineered features
    x = x/max(abs(x))
    feature_dict[col_prefix + 'Conduction Delay: set_thresh=False'] = get_delay(x)
    peaks = get_peaks(x, height_thresh)
    feature_dict[col_prefix + 'Number of Peaks: set_thresh=False'] = len(peaks[0])
    feature_dict[col_prefix + 'Percentage Fractionation: thresh=0.01'] = percentage_fractionation(x, peaks[0], thresh=0.01)
    
    # Denoise x for remaining features
    x = denoise(x)
    max_energy_idx = get_location_of_max_energy(x)
    feature_dict[col_prefix + 'Location of Maximum Energy: M=14'] = max_energy_idx
    feature_dict[col_prefix + 'Sample Entropy Around Max Energy: width=60 r=0.025'] = get_local_sample_entropy(x, max_energy_idx, 60, m=2, r=0.025)
    feature_dict[col_prefix + 'Width of Maximum Energy: M=14, width_thresh=0.2'] = get_width_max_energy(x, M=14, width_thresh=0.2)
    
    # Temporal features
    feature_dict[col_prefix + 'Approximate Entropy: m=3 r=0.7'] = feature_calculators.approximate_entropy(x, 3, 0.7)
    imq = feature_calculators.index_mass_quantile(x, [{'q': 0.6}])
    feature_dict[col_prefix + 'Index Mass Quantile: q=0.6'] = imq[0][1]
    feature_dict[col_prefix + 'Ratio Beyond 1xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 1)
    
    # Spectral features
    feature_dict[col_prefix + 'Power Spectral Entropy'] = get_pse(x)
    
    return feature_dict
    

def get_hand_engineered_feature_dict(x, thresh_cd=None, set_thresh_cd=False, thresh_peaks=None, set_thresh_peaks=False, show_peaks=False, col_prefix = ''):
    feature_dict = {}
    sf = max(abs(x))
    x = x/max(abs(x))

    # Hand engineered features
    if set_thresh_cd:
        thresh_cd = thresh_cd/sf
        feature_dict[col_prefix + 'Conduction Delay: set_thresh=True'] = get_delay(x, thresh_cd, set_thresh_cd)
        feature_dict[col_prefix + 'Conduction Delay: set_thresh=False'] = get_delay(x)
    else:
        feature_dict[col_prefix + 'Conduction Delay: set_thresh=False'] = get_delay(x)
    
    height_thresh=0.1
    if set_thresh_peaks:
        thresh_peaks = thresh_peaks/sf
        peaks = get_peaks(x, height_thresh, thresh_peaks, set_thresh_peaks, plot=False)
        feature_dict[col_prefix + 'Number of Peaks: set_thresh=True'] = len(peaks[0])
        peaks = get_peaks(x, height_thresh)
        feature_dict[col_prefix + 'Number of Peaks: set_thresh=False'] = len(peaks[0])
    else:
        peaks = get_peaks(x, height_thresh)
        feature_dict[col_prefix + 'Number of Peaks: set_thresh=False'] = len(peaks[0])
    
    peaks = get_peaks(x, height_thresh)
    feature_dict[col_prefix + 'Percentage Fractionation: thresh=0.01'] = percentage_fractionation(x, peaks[0], thresh=0.01)
    
    # Denoise x for remaining features
    x = denoise(x)
    
    max_energy_idx = get_location_of_max_energy(x)
    feature_dict[col_prefix + 'Location of Maximum Energy: M=14'] = max_energy_idx
    feature_dict[col_prefix + 'Sample Entropy Around Max Energy: width=60 r=0.025'] = get_local_sample_entropy(x, max_energy_idx, 60, m=2, r=0.025)
    feature_dict[col_prefix + 'Energy Around Max Energy'] = get_local_energy(x, max_energy_idx, 60)
    min_idx = np.argmin(x)
    max_idx = np.argmax(x)
    feature_dict[col_prefix + 'Peaks Between Min and Max'] = len([i for i in peaks[0] if ((i > min_idx) & (i < max_idx))])
    feature_dict[col_prefix + 'Width of Maximum Energy: M=14, width_thresh=0.4'] = get_width_max_energy(x, M=14, width_thresh=0.4)
    feature_dict[col_prefix + 'Width of Maximum Energy: M=14, width_thresh=0.2'] = get_width_max_energy(x, M=14, width_thresh=0.2)

    return feature_dict

def get_spectral_feature_dict(x, col_prefix = ''):
    feature_dict = {}
    # Denoise and normalise x for remaining features
    x = denoise(x)
    x = x/max(abs(x))
    
    feature_dict[col_prefix + 'Power Spectral Entropy'] = get_pse(x)
    feature_dict[col_prefix + 'Spectral Centroid'] = get_spectral_centroid(x)
    max_energy_idx = get_location_of_max_energy(x)
    feature_dict[col_prefix + 'Power Spectral Entropy Around Maximum Energy: width=30'] = get_local_pse(x, max_energy_idx, width=30)
    feature_dict[col_prefix + 'Spectral Centroid Around Maximum Energy: width=30'] = get_local_spectral_centroid(x, max_energy_idx, width=30)
    feature_dict[col_prefix + 'Power Spectral Entropy Around Maximum Energy: width=60'] = get_local_pse(x, max_energy_idx, width=60)
    feature_dict[col_prefix + 'Spectral Centroid Around Maximum Energy: width=60'] = get_local_spectral_centroid(x, max_energy_idx, width=60)
    
    return feature_dict
    
def get_temporal_feature_dict(x, col_prefix = ''):

    feature_dict = {}
    feature_dict[col_prefix + 'Maximum Absolute Value'] = np.max(abs(x))
    
    # Denoise and normalise x for remaining features
    x = denoise(x)
    x = x/max(abs(x))


    erbc = feature_calculators.energy_ratio_by_chunks(x, [{'num_segments':10, 'segment_focus':3}, {'num_segments':10, 'segment_focus':2}])
    feature_dict[col_prefix + 'Energy Ratio by Chunks: num_segments=10 segment_focus=2'] = erbc[1][1]
    feature_dict[col_prefix + 'Energy Ratio by Chunks: num_segments=10 segment_focus=3'] = erbc[0][1]
    feature_dict[col_prefix + 'Approximate Entropy: m=3 r=0.7'] = feature_calculators.approximate_entropy(x, 3, 0.7)
    feature_dict[col_prefix + 'Ratio Beyond 5xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 5)
    feature_dict[col_prefix + 'Ratio Beyond 4xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 4)
    feature_dict[col_prefix + 'Ratio Beyond 3xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 3)
    feature_dict[col_prefix + 'Ratio Beyond 2xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 2)
    feature_dict[col_prefix + 'Ratio Beyond 1xSTD'] = feature_calculators.ratio_beyond_r_sigma(x, 1)
    # A fraction q of the mass lies to the left of i. (Alternative to conduction delay?)
    imq = feature_calculators.index_mass_quantile(x, [{'q': 0.6}, {'q': 0.4}])
    feature_dict[col_prefix + 'Index Mass Quantile: q=0.6'] = imq[0][1]
    feature_dict[col_prefix + 'Index Mass Quantile: q=0.4'] = imq[1][1]
    

    return feature_dict

ERROR! Session/line number was not unique in database. History logging moved to new session 300
