In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from scipy.signal import lfilter, butter

import os
import glob

from tqdm import tqdm
%matplotlib inline

# Upload the data 

In [45]:
path = r'C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files'
os.listdir(path)

['H72_F75_Electrode_KH_OM_KH.csv',
 'H79_F108_csv_annot_OM_KH.csv',
 'H79_F125_csv_annot MISSING H8_OM_KH.csv',
 'H81_F158_csv_annot_KH.csv',
 'H81_F171_csv_annot_OM_KH.csv',
 'H82_F111_csv_annot_OM_KH.csv',
 'H96_F61_csv_annot_OM_KH.csv',
 'H96_F88_csv_annot_OM_KH.csv']

In [47]:
data_list = []
for filename in glob.glob(os.path.join(path, '*.csv')):
#     if filename != path + '\H72_F75_Electrode_KH_OM_KH.csv':
    data = pd.read_csv(filename, header=0)
    print(filename, data.shape)
    data_list.append(data)

C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H72_F75_Electrode_KH_OM_KH.csv (12207, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H79_F108_csv_annot_OM_KH.csv (8270, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H79_F125_csv_annot MISSING H8_OM_KH.csv (8775, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H81_F158_csv_annot_KH.csv (8138, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H81_F171_csv_annot_OM_KH.csv (10173, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H82_F111_csv_annot_OM_KH.csv (10173, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H96_F61_csv_annot_OM_KH.csv (8191, 384)
C:\Users\ecath\Desktop\Research\Raw Data\Ex Vivo CSV Files\H96_F88_csv_annot_OM_KH.csv (8191, 384)


# Division by classes 

In [48]:
def class_splitting(data_list):
    
    drivers = []
    nondrivers = []
    noises = []

    for df in data_list:
        driver = pd.DataFrame()
        nondriver = pd.DataFrame()
        noise = pd.DataFrame()
        for col in df.columns[1::3]:
            curr_index = list(df.columns).index(col)
            prev_index = curr_index - 1
            next_index = curr_index + 1
            prev_col = df.iloc[:,prev_index]
            next_col = df.iloc[:,next_index]
            if df[col][0] == 1:
                driver[df.columns[prev_index]] = prev_col
                driver[df.columns[next_index]] = next_col
            elif df[col][0] == 0:
                nondriver[df.columns[prev_index]] = prev_col
                nondriver[df.columns[next_index]] = next_col
            elif df[col][0] == -1:
                noise[df.columns[prev_index]] = prev_col
                noise[df.columns[next_index]] = next_col
        drivers.append(driver)
        nondrivers.append(nondriver)
        noises.append(noise)
        
    return(drivers, nondrivers, noises)

In [49]:
drivers, nondrivers, noises = class_splitting(data_list)

# Butterworth highpass for 1Hz

In [50]:
def butter_highpass(data, highcut, fs, order=2):
    
    nyq = 0.5 * fs
    high = highcut / nyq
    b, a = butter(order, high, btype='highpass', analog=False)
    y = lfilter(b, a, data)
    
    return y

# Fourier transform

In [51]:
# function that delete NaN and fully zero columns
def del_nul_and_nan(df):
    
    df.fillna(value=0, axis=1, inplace=True)
    
#################add to drop off zero columns######################    
#     for col in df.columns:
#         if df[col].sum() == 0:
#             df.drop([col], axis = 1, inplace = True)  
#################add to drop off zero columns######################    
    
    return df

In [52]:
#create DataFrame with signal spectrum cutted on 20Hz for one dataframe from one class

def spec_and_freq_for_single_df(df, N, F, low_freq=1.0):
    
    fft_out = pd.DataFrame()
    
    for col in df.columns:
        filtered_yf = butter_highpass(df[col], low_freq, F, order=2)
        fft_yf = np.fft.fft(filtered_yf) #spectrum
        fft_xf = np.fft.fftfreq(N, 1/F) #frequencies
        
        fft_20_index = np.argwhere((fft_xf < 20) & (fft_xf > 0))        
        fft_yf_20 = fft_yf[fft_20_index] #cutting on 20Hz
        fft_xf_20 = fft_xf[fft_20_index] #cutting on 20Hz
        
        fft_yf_20 = pd.DataFrame(np.abs(fft_yf_20) / N, columns=[col + '_yf'])
        fft_xf_20 = pd.DataFrame(fft_xf_20, columns=[col + '_xf']) 

        fft_out = pd.concat([fft_out, fft_yf_20, fft_xf_20], axis=1)
        
    return(fft_out)

In [54]:
def el_n_om_spec_n_freq(class_data_list):
    
    all_fft_el = []
    all_fft_om = []

    for k, df in enumerate(class_data_list):

        electrode_signal = df[df.columns[::2]]
        optical_signal = df[df.columns[1::2]]

        electrode_signal = del_nul_and_nan(electrode_signal)
        optical_signal = del_nul_and_nan(optical_signal)        

        N = df.shape[0] # Number of sample points
        Fs_el = 1017.25 # sampling rate
        Fs_om = 1000.0

        fft_el = spec_and_freq_for_single_df(electrode_signal, N, Fs_el, low_freq=1.0)
        fft_om = spec_and_freq_for_single_df(optical_signal, N, Fs_om, low_freq=1.0)
        
        all_fft_el.append(fft_el)
        all_fft_om.append(fft_om)
        
    return(all_fft_el, all_fft_om)

In [55]:
def add_target_and_concat_electrode(class_data_list, label):
    
    all_fft_el, _ = el_n_om_spec_n_freq(class_data_list)
    
    m = []
    
    for i in range(len(all_fft_el)):
        m.append(all_fft_el[i].shape[0])

    for i in tqdm(range(len(all_fft_el))):
        mx = np.max(m) - all_fft_el[i].shape[0]

        zeros = np.zeros(shape=(mx, all_fft_el[i].shape[1]))
        zeros = pd.DataFrame(zeros, columns=all_fft_el[i].columns, index=np.arange(all_fft_el[i].shape[0], np.max(m)))
        
        all_fft_el[i] = all_fft_el[i].append(zeros)
        
        target = np.full(shape=(1, all_fft_el[i].shape[1]), fill_value = label)
        target = pd.DataFrame(target, columns=all_fft_el[i].columns, index=['target'])

        all_fft_el[i] = all_fft_el[i].append(target)
    
    all_fft = pd.DataFrame()
    
    for i in range(len(all_fft_el)):
        all_fft = pd.concat([all_fft, all_fft_el[i]], axis=1)

    return(all_fft)

In [56]:
def add_target_and_concat_optical(dataframe, label):
   
    _, all_fft_om = el_n_om_spec_n_freq(dataframe)
    
    m = []
    
    for i in range(len(all_fft_om)):
        m.append(all_fft_om[i].shape[0])

    for i in tqdm(range(len(all_fft_om))):
        mx = np.max(m) - all_fft_om[i].shape[0]

        zeros = np.zeros(shape=(mx, all_fft_om[i].shape[1]))
        zeros = pd.DataFrame(zeros, columns=all_fft_om[i].columns, index=np.arange(all_fft_om[i].shape[0], np.max(m)))
        
        all_fft_om[i] = all_fft_om[i].append(zeros)

        target = np.full(shape=(1, all_fft_om[i].shape[1]), fill_value = label)
        target = pd.DataFrame(target, columns=all_fft_om[i].columns, index=['target'])

        all_fft_om[i] = all_fft_om[i].append(target)
    
    all_fft = pd.DataFrame()
    
    for i in range(len(all_fft_om)):
        all_fft = pd.concat([all_fft, all_fft_om[i]], axis=1)

    return(all_fft)

# Concat dataframes with spectrum for each class and download them

In [57]:
def spectrum(signal_type, path, name):
    if signal_type == 'electrode':
        driver_spectrum = add_target_and_concat_electrode(drivers, 1)
        nondriver_spectrum = add_target_and_concat_electrode(nondrivers, 0)

        full_spectrum = pd.concat([driver_spectrum, nondriver_spectrum], axis=1)
        full_spectrum.to_csv(path + name, sep=',', index=True)
    
    if signal_type == 'optical':
        driver_spectrum = add_target_and_concat_optical(drivers, 1)
        nondriver_spectrum = add_target_and_concat_optical(nondrivers, 0)

        full_spectrum = pd.concat([driver_spectrum, nondriver_spectrum], axis=1)
        full_spectrum.to_csv(path + name, sep=',', index=True)
    

In [58]:
path = r'C:\Users\Public\Documents\HRS'
spectrum('electrode', path, '\Full spectrum of EL no noise.csv')
spectrum('optical', path, '\Full spectrum of OM no noise.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 520.30it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 260.15it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 244.38it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 199.13it/s]
