In [14]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline


from sklearn.preprocessing import StandardScaler
from scipy.signal import find_peaks
from skimage.restoration import denoise_tv_chambolle, denoise_wavelet
import os

import imageio

from tqdm import tqdm_notebook

import warnings
warnings.filterwarnings("ignore")

In [15]:
path = r'C:\Users\ecath\Desktop\Research\Raw Data'


spec_el_ = pd.read_csv(path + '\Spectrum LD\Spectrum of electrode LD.csv', index_col=0)
spec_opt_ = pd.read_csv(path + '\Spectrum LD\Spectrum of optical LD.csv', index_col=0)

In [16]:
def scaling(df):  
    
    scaler = StandardScaler() 
    
    y_col = [col for col in df.columns if '_yf' in col] 
    d = df[y_col].iloc[:-1].replace(0, np.nan)
    scaled_features = scaler.fit_transform(d.values)
    df_ = pd.DataFrame(scaled_features, columns=df[y_col].iloc[:-1].columns, index=df[y_col].iloc[:-1].index)
    df[y_col] = df_.fillna(value=0, axis=1)
    return(df)

In [17]:
df = spec_el_
drivers = df.transpose()[df.transpose().target == 1].transpose()
scaling(drivers)

Unnamed: 0,1_Bsk_1_F7_yf,1_Bsk_1_F7_xf,1_Bsk_1_F8_yf,1_Bsk_1_F8_xf,1_Bsk_1_G6_yf,1_Bsk_1_G6_xf,1_Bsk_1_G7_yf,1_Bsk_1_G7_xf,1_Bsk_1_G8_yf,1_Bsk_1_G8_xf,...,H96_F88_Bsk_1_H6_yf,H96_F88_Bsk_1_H6_xf,H96_F88_Bsk_1_H7_yf,H96_F88_Bsk_1_H7_xf,H96_F88_Bsk_2_E3_yf,H96_F88_Bsk_2_E3_xf,H96_F88_Bsk_2_F3_yf,H96_F88_Bsk_2_F3_xf,H96_F88_Bsk_2_G3_yf,H96_F88_Bsk_2_G3_xf
0,-1.291127,1.099946,-1.099846,1.099946,-1.046409,1.099946,-1.106613,1.099946,-1.070817,1.099946,...,-0.395176,1.117721,-0.359503,1.117721,-0.558461,1.117721,-0.312555,1.117721,-0.489812,1.117721
1,-1.272198,1.199941,-1.111534,1.199941,-1.006539,1.199941,-1.122157,1.199941,-1.075725,1.199941,...,-0.454204,1.241912,-0.328145,1.241912,-0.393208,1.241912,-0.302892,1.241912,-0.338838,1.241912
2,-1.281188,1.299936,-1.059462,1.299936,-1.009254,1.299936,-1.099897,1.299936,-1.104586,1.299936,...,-0.430619,1.366103,-0.354270,1.366103,-0.350013,1.366103,-0.305570,1.366103,-0.264345,1.366103
3,-1.297038,1.399931,-1.098634,1.399931,-1.034033,1.399931,-1.105758,1.399931,-1.090738,1.399931,...,-0.464070,1.490294,-0.362487,1.490294,-0.532979,1.490294,-0.306378,1.490294,-0.323644,1.490294
4,-1.199581,1.499926,-1.067413,1.499926,-0.987901,1.499926,-1.098889,1.499926,-1.018539,1.499926,...,-0.418477,1.614485,-0.421268,1.614485,-0.395821,1.614485,-0.297293,1.614485,-0.412340,1.614485
5,-1.250774,1.599921,-1.103858,1.599921,-1.069877,1.599921,-1.069169,1.599921,-1.074411,1.599921,...,-0.202318,1.738677,-0.461780,1.738677,-0.301882,1.738677,-0.279178,1.738677,-0.284840,1.738677
6,-1.100144,1.699916,-1.077682,1.699916,-1.025581,1.699916,-1.091084,1.699916,-0.986796,1.699916,...,-0.358006,1.862868,-0.442283,1.862868,-0.486050,1.862868,-0.288676,1.862868,-0.376855,1.862868
7,-1.238088,1.799912,-0.964220,1.799912,-1.006038,1.799912,-1.061373,1.799912,-0.931795,1.799912,...,-0.252649,1.987059,-0.312972,1.987059,-0.514868,1.987059,-0.279262,1.987059,-0.467857,1.987059
8,-1.138315,1.899907,-0.964478,1.899907,-0.997650,1.899907,-1.092279,1.899907,-0.878207,1.899907,...,-0.180904,2.111250,-0.243093,2.111250,-0.309901,2.111250,-0.262661,2.111250,-0.097896,2.111250
9,-1.236461,1.999902,-1.087805,1.999902,-0.893469,1.999902,-1.034889,1.999902,-1.028897,1.999902,...,-0.122811,2.235441,-0.402044,2.235441,-0.401376,2.235441,-0.259625,2.235441,-0.614476,2.235441


In [18]:
def make_subdf(df, sc=False):
    
    drivers = df.transpose()[df.transpose().target == 1].transpose()
    nondrivers = df.transpose()[df.transpose().target == 0].transpose()
    
    if sc == True: 
        drivers = scaling(drivers)
        nondrivers = scaling(nondrivers)
        df = scaling(df) 
    
    drivers = drivers.drop(['target'])
    nondrivers = nondrivers.drop(['target'])
    df = df.drop(['target'])
    
    return(df, drivers, nondrivers)



spec_el, drivers, nondrivers = make_subdf(spec_el_, sc=True)
spec_opt, drivers_opt, nondrivers_opt = make_subdf(spec_opt_, sc=True)

In [20]:
def max_peak_to_noise(df):
    y_col = [col for col in df.columns if '_yf' in col]
    y_df = df[y_col]
    snr = []
        
    for i in range(y_df.shape[1]):          
        s = y_df[y_col[i]][y_df[y_col[i]] != 0]

        _, properties = find_peaks(s, height=0)
        mean_max = np.mean(np.sort(properties['peak_heights'])[-2:])#np.max(properties['peak_heights'])
        sd = s.std(axis=0)
        ratio = np.round(mean_max / sd, 2)
        ratio = np.where(sd == 0, 0, ratio)
        snr.append(ratio)
        
    snr = pd.DataFrame(snr, columns=['SNR'], index=y_df.columns)
   
    return snr

max_peak_to_noise(spec_el)

Unnamed: 0,SNR
1_Bsk_1_F7_yf,3.89
1_Bsk_1_F8_yf,4.62
1_Bsk_1_G6_yf,4.78
1_Bsk_1_G7_yf,5.01
1_Bsk_1_G8_yf,4.43
1_Bsk_1_H6_yf,3.53
1_Bsk_1_H7_yf,3.61
1_Bsk_1_H8_yf,3.74
10_Epi_Bsk_1_D4_yf,8.68
10_Epi_Bsk_1_E4_yf,5.14
