In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import os
import glob

from scipy.signal import find_peaks


from IPython import display

**Summary**

This notebook generate matrix with features for matricies with Fourier spectrum for electrode and optical mapping. 

Features description:

-  *freq i* - frequency of i$^{th}$ heightest peak
-  *height i* - height of i$^{th}$ heightest peak
-  *width i* - width of i$^{th}$ heightest peak
-  *prominence i* - prominence of i$^{th}$ heightest peak
-  *#peaks_th* - number of peaks for given (th) threshhold
-  *low_freq_noise* - presence of low-frequency noise (frequency of one of the n highest peaks in the interval from 0 to lf_thHz)


### Upload data

In [38]:
path = r'C:\Users\Public\Documents\HRS'

In [39]:
el_sig = pd.read_csv(path + '\Full spectrum of EL no noise.csv', index_col=0)
om_sig = pd.read_csv(path + '\Full spectrum of OM no noise.csv', index_col=0)

### Feature generation

In [29]:
"""
Function that generates pd.DataFrame with amount of peaks for (th*100)% threshhold. 

Parameters: 

full_df: Dataframe
Dataframe with fourier spectrum

th: float, from 0 to 1
threshhold

Returns: 

features: DataFrame, shape=(full_df[1]/2, 1)
number of peaks
"""

def number_of_peaks(full_df, th):
    all_props = []
    df = full_df[full_df.columns[::2]][:-1]
    for col in df:
        _, properties = find_peaks(df[col], height=0)
        all_props.append(properties)
    
    num_of_peaks = []
    
    for i in range(len(all_props)):
        try:
            max_height = np.max(all_props[i]['peak_heights'])
            peaks, _ = find_peaks(df.iloc[:,i], threshold=th*max_height)
            num = peaks.shape[0]
        except ValueError:
            num = 0
        num_of_peaks.append(num)
    num_of_peaks = pd.DataFrame(num_of_peaks, columns=['#peaks_' + str(th)])
    return(num_of_peaks)

In [30]:
number_of_peaks(om_sig, 0.1).head()

Unnamed: 0,#peaks_0.1
0,2
1,2
2,2
3,2
4,2


In [31]:
"""
Function that generates pd.DataFrame with different properties for n highest peaks. Properties - values of frequency, 
height, width of the peak for half peak height, prominence and existence of second harmonics. 

In more details about properties https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

Parameters: 

full_df: Dataframe
Dataframe with fourier spectrum

n: int
number of peaks

Returns: 

features: DataFrame, shape=(full_df.shape[1]/2, n*4)
properties of n hightest peaks
"""

def properties_of_peaks(full_df, n):
    
    all_peaks = []
    all_props = []
    
    df = full_df[full_df.columns[::2]][:-1]
    xf = full_df[full_df.columns[1::2]][:-1]
    
    for col in df:
        peaks, properties = find_peaks(df[col], height=0, width=0, prominence=0, rel_height=0.5)
        all_props.append(properties)
        all_peaks.append(peaks)
        
    freq = []
    height = []
    width = []
    prominence = []
    freq_forharm = [] # list for second harmonics detection (binary classification)
    el_om_labels = []
    
    features = pd.DataFrame()
    
    for i in range(len(all_props)):
        try:
            z = np.argsort(all_props[i]['peak_heights'])
            z = z[:-(n+1):-1]
            
            for j in range(n):
                fr = xf.iloc[:,i][all_peaks[i][z][j]] #freqs of max peaks
                freq.append(fr)
            
            h = all_props[i]['peak_heights'][z] #heights of max peaks
            height.append(h)
           
            w = all_props[i]['widths'][z] #width of max peaks
            width.append(w)
            
            p = all_props[i]['prominences'][z]
            prominence.append(p)
            
            fr_forharm = xf.iloc[:,i][all_peaks[i][z]] # list of frequencies for max peaks
            index_forharm = 0 #initiation of second harmonics index (for each i) 
            for q in range(n):
                for p in range(n):
                    try:
                        a=fr_forharm[p]/fr_forharm[q] #frequencies relation
                        if (a<2.1) and (a>1.9): index_forharm = 1 # if relation is 2 plus/minus 5% output 1 
                    except ZeroDivisionError:
                        a=0 
            freq_forharm.append(index_forharm)
            
            el_om_label = 1
            el_om_labels.append(el_om_label)

        except IndexError:
            for j in range(n):
                fr = 0
                freq.append(fr)
                
            h = np.zeros((n))
            height.append(h)
            
            w = np.zeros((n))
            width.append(w)
            
            p = np.zeros((n))
            prominence.append(p)
            
            index_forharm = 0
            freq_forharm.append(index_forharm)
            
            el_om_label = 0
            el_om_labels.append(el_om_label)

    freq = np.reshape(freq, (len(all_props), n)) 
        
    freq = pd.DataFrame(freq, columns=['freq ' + str(i) for i in range(n)])
    height = pd.DataFrame(height, columns=['height ' + str(i) for i in range(n)])
    width = pd.DataFrame(width, columns=['width ' + str(i) for i in range(n)])
    prominence = pd.DataFrame(prominence, columns=['prominence ' + str(i) for i in range(n)])
    freq_forharm = pd.DataFrame(freq_forharm, columns=['second harmonics'])
    el_om_labels = pd.DataFrame(el_om_labels, columns=['Label for OM and EL'])
    
    features = pd.concat([features, freq, height, width, prominence, freq_forharm, el_om_labels], axis=1)
    
    return(features)

In [32]:
properties_of_peaks(om_sig, 3).head(10)

Unnamed: 0,freq 0,freq 1,freq 2,height 0,height 1,height 2,width 0,width 1,width 2,prominence 0,prominence 1,prominence 2,second harmonics,Label for OM and EL
0,8.464329,16.928658,8.101572,10.374795,3.31586,2.178117,2.450946,2.342132,0.763458,10.318862,3.270278,0.785858,1,1
1,8.464329,16.928658,8.101572,9.461274,3.385808,1.963774,2.450297,2.369185,0.776503,9.36424,3.348777,0.751408,1,1
2,8.464329,16.928658,8.827086,8.283743,3.562782,1.764353,2.488626,2.503358,0.870345,8.233088,3.544764,1.078912,1,1
3,8.464329,16.928658,8.101572,10.061106,3.975445,2.17193,2.477451,2.42166,0.759426,10.022571,3.970497,0.721759,1,1
4,8.464329,16.928658,8.101572,10.101136,3.772985,2.169996,2.479566,2.382755,0.774018,10.063779,3.75549,0.770464,1,1
5,11.851852,7.407407,10.37037,6.916545,2.11789,2.042651,2.690968,2.586576,3.665881,6.837914,1.965191,1.725456,0,1
6,11.851852,7.407407,10.37037,5.891806,2.558669,2.488401,2.844981,2.563077,4.57055,5.857605,2.398579,2.159726,0,1
7,8.433048,7.407407,11.965812,3.564296,2.941879,2.921768,2.130969,2.341211,2.653561,3.501452,2.419434,2.89101,0,1
8,11.851852,10.37037,13.333333,8.262071,2.204336,1.949323,2.691714,3.128592,3.402426,8.203706,1.87665,1.407809,0,1
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [33]:
"""
Function that generates final pd.DataFrame with all features

Parameters: 

full_df: Dataframe
Dataframe with fourier spectrum

n: int
number of peaks

th1: float, from 0 to 1
threshhold

th2: float, from 0 to 1
threshhold

path: str
path to save the matrix

download: bool
download or not download feature matrix

Returns: 

features: DataFrame
features for full_df dataframe
"""

def create_feature_df(full_df, n, th1, th2, path, name, download=False):
    features = pd.DataFrame()
    
    properties = properties_of_peaks(full_df=full_df, n=n) 
    num_peak_1 = number_of_peaks(full_df=full_df, th=th1)
    num_peak_2 = number_of_peaks(full_df=full_df, th=th2)
    target = pd.DataFrame(full_df[full_df.columns[::2]].loc['target'].reset_index().drop('index',axis=1))
    
    features = pd.concat([features, num_peak_1, num_peak_2, properties, target], axis=1)
    
    if download == True: 
        features.to_csv(path + name + '.csv')
        return(features)
    else:
        return(features)

In [34]:
def electode_optical_matrix(electrode_df, optical_df, path, name, download=False):
    electrode_df.drop(['target'], axis=1, inplace=True)
    electrode_df.drop(['Label for OM and EL'], axis=1, inplace=True)
    el_om_features = pd.concat([electrode_df, optical_df], axis=1)
    el_om_features = el_om_features[el_om_features['Label for OM and EL'] == 1]
    el_om_features.drop(['Label for OM and EL'], axis=1, inplace=True)
    el_om_features = el_om_features.reset_index().drop('index',axis=1)
    
    if download == True: 
        el_om_features.to_csv(path + name + '.csv')
        return(el_om_features)
    else:
        return(el_om_features)

In [44]:
def download_feature_matrices(path):
    for i in range(3, 6):
        electrode_df = create_feature_df(el_sig, n=i, th1=0.05, th2=0.1, path=path,\
                                         name='\Feature matrix EL no noise ' + str(i) + ' peaks',  download=True)
        optical_df = create_feature_df(om_sig, n=i, th1=0.05, th2=0.1, path=path,\
                                       name='\Feature matrix OM ' + str(i) + ' peaks',  download=False)
        el_om_features = electode_optical_matrix(electrode_df, optical_df,\
                                                 path=path,\
                                                 name='\Feature matrix EL+OM no noise ' + str(i) + ' peaks',\
                                                 download=True)

In [45]:
path = r'C:\Users\Public\Documents\HRS\Features new'
download_feature_matrices(path)

-------------------------------------------------------