In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sample_freq = 30

In [None]:
def preprocessing(signal,accuracy,sigma):
    df = signal
    #df = pd.read_csv(filename, header = 2)
    df = df.loc[:,["y","likelihood"]]
    df = df[df["likelihood"]>accuracy]
    df = df.loc[:,["y"]]
    df1 = (df - df.mean())/df.std()
    df1 = df1.rename(columns = {'y':'zscore'})
    df = df.join(df1)
    df = df[df["zscore"]>-sigma]
    df = df[df["zscore"]<sigma]
    return(df)

In [None]:
data_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/experiments/TimeSeriousSignalClustering/2022-01-22 12-16-24 00_10_00-00_11_00DLC_resnet50_0122_thirsty_1Jan23shuffle1_20000.csv'

data = pd.read_csv(data_path, header=2)
data = preprocessing(data, accuracy=0.8, sigma=3)
data

In [None]:
plt.figure(figsize=[10,3], dpi=144)
plt.plot(data[:100].index/sample_freq, data[:100].y, linewidth=0.5)
plt.show()

In [None]:
from scipy import signal

In [None]:
peaks = signal.find_peaks(data.y, distance = 3)
peaks = peaks[0].astype(int)
peaks.shape

In [None]:
plt.figure(figsize=[10,3], dpi=144)
plt.plot((1+np.arange(len(data)))/sample_freq, data.y, linewidth=0.3)
plt.scatter(peaks/sample_freq, data.iloc[peaks].y, c='r', s=0.5)
plt.show()

In [None]:
def getIntervals(arr):
    assert(len(arr.shape) == 1 and len(arr) > 1)
    intervals = np.append(arr,0) - np.insert(arr,0,0)
    intervals = intervals[:-1]
    out = arr
    for idx in range(len(intervals)):
        if (idx==0):
            out[idx] = intervals[idx]
        elif (idx==len(arr)-1):
            out[idx] = intervals[idx-1]
        else:
            out[idx] = (intervals[idx] + intervals[idx-1]) / 2
    return out

In [None]:
def findBottoms(wave, peaks):
    if len(peaks) == 0:
        bottoms = np.min(wave)
    else:
        bottoms = np.zeros(len(peaks)+1)
        peaks = np.concatenate([[0], peaks, [len(wave)-1]])
        for seg_idx in range(len(bottoms)):
            bottoms[seg_idx] = np.argmin(wave[peaks[seg_idx]:peaks[seg_idx+1]]) + peaks[seg_idx]
    bottoms = bottoms.astype(int)
    return bottoms
            
def getHeartBeats(wave, min_distance):
    peaks = signal.find_peaks(wave, distance=min_distance)
    peaks = peaks[0].astype(int)
    bottoms = findBottoms(wave, peaks)
    
    df = pd.DataFrame({
        'left_width': peaks - bottoms[:-1],
        'right_width': bottoms[1:] - peaks,
        'left_height': wave[peaks] - wave[bottoms[:-1]],
        'right_height': wave[peaks] - wave[bottoms[1:]],
        'peak_y': wave[peaks]})
    return df
    

In [None]:
heart_data = getHeartBeats(data.y.values, min_distance=3)
heart_data

In [None]:
sns.scatterplot(data=heart_data, x='left_width', y='left_height')
plt.show()
sns.scatterplot(data=heart_data, x='right_width', y='right_height')
plt.show()
sns.scatterplot(data=heart_data, x='peak_y', y='right_height')
plt.show()
sns.scatterplot(data=heart_data, x='peak_y', y='left_height')
plt.show()

In [None]:
widths,width_heights,left_ips,right_ips = signal.peak_widths(data.y,peaks,rel_height=0.5)
wave_data = pd.DataFrame({'idx': peaks, 'width': 1000*widths/sample_freq, 'height': width_heights, 
                          'left_ip': 1000*left_ips/sample_freq, 'right_ip': 1000*right_ips/sample_freq, 
                          't': 1000*peaks/sample_freq, 'interval': getIntervals(1000*peaks/sample_freq)})
wave_data

In [None]:
plt.figure(figsize=[10,3], dpi=144)
wave_data.width.hist(bins=200)
plt.show()
plt.figure(figsize=[10,3], dpi=144)
wave_data.interval.hist(bins=20)
plt.show()

In [None]:
plt.figure(figsize=[10,3], dpi=144)
wave_data.width.plot(linewidth=0.5)
plt.ylim([0,600])
plt.show()
plt.figure(figsize=[10,3], dpi=144)
wave_data.interval.plot(linewidth=0.5)
plt.show()

In [None]:
plt.figure(figsize=[10,3], dpi=144)
plt.plot(data.index/sample_freq, data.y, linewidth=0.5)
plt.scatter(data.iloc[wave_data[wave_data.width>2000].idx].index/30, data.iloc[wave_data[wave_data.width>2000].idx].y, c='r', s=3)
plt.show()

In [None]:
wave_stats = pd.Series({'mean': wave_data.width.mean(), 'std': wave_data.width.mean(), 'median': wave_data.width.median()})
wave_stats