In [171]:
import os
import random
import numpy as np
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
# from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import f1_score, log_loss, accuracy_score
import matplotlib
from scipy import signal as sig

In [183]:
f = open(f'/home/taehoon/cs565/modeling/data/arduino_ppg/mixed_hs.log', 'r')
# f = open('/home/taehoon/cs565/modeling/data/arduino_ppg/stress_2.log', 'r')


line = f.readlines()
f.close()
line = line[1:]
signal = []
for l in line:
    l = l.split('\n')[0]
    signal.append(l.split(','))

raw_data = pd.DataFrame({'timestamp': np.array(signal)[:, 0], 'BVP': np.array(signal)[:, 1], 'Temp': np.array(signal)[:, 2]}, dtype='float')

In [184]:
hz = {"BVP": 1000.0/15.0, "TEMP": 1000.0/15.0}
overlap_ratio = 0.9
window_size = 30

In [185]:
max_beat_per_sec = 200 / 60
min_sec_per_beat = 1 / max_beat_per_sec
dist = int(min_sec_per_beat * hz['BVP'])

In [186]:
wnd_ms = window_size * 1000
# raw_data = raw_data[1000:]

startT = raw_data['timestamp'].min()

endT = raw_data['timestamp'].max()

# find the end time of each window by considering the overlapping ratio 
WINDOWS = np.arange(startT + wnd_ms, endT, wnd_ms * (1 - overlap_ratio))
features = []
for sensor in ["BVP", "TEMP"]:
    feature_list = ["min", "max", "mean", "std"]
    if sensor == "BVP":
        feature_list = feature_list + ["peak_f"]
    for feature in feature_list:
        features.append(f"{sensor}_{feature}")
features.append("labels")

In [187]:
raw_data['BVP'].shape, raw_data['Temp'].shape

((16640,), (16640,))

In [188]:
FEATURES_TIME = pd.DataFrame(columns=features)
filtered_data = pd.DataFrame({"timestamp": raw_data['timestamp'], "BVP": raw_data['BVP'], "TEMP": raw_data['Temp']})
height = raw_data['BVP'].mean()

for w in WINDOWS:
    # for a given window, set the start and end time stamps 
    win_start, win_end = w - wnd_ms, w
    
    bucket_size = int(window_size * hz["BVP"])
    
    row = []
    data_wnd = filtered_data.loc[(filtered_data["timestamp"] > win_start) & (filtered_data["timestamp"] <= win_end)]
    bvp_sig_wnd, temp_sig_wnd = data_wnd['BVP'], data_wnd['TEMP']
    
    row.append(bvp_sig_wnd.min())
    row.append(bvp_sig_wnd.max())
    row.append(bvp_sig_wnd.mean())
    row.append(bvp_sig_wnd.std())

    peaks = sig.find_peaks(bvp_sig_wnd, height=height, distance=dist)[0]
    n_peak = len(peaks) / window_size

    row.append(n_peak)

    row.append(temp_sig_wnd.min())
    row.append(temp_sig_wnd.max())
    row.append(temp_sig_wnd.mean())
    row.append(temp_sig_wnd.std())
    row.append(0)

    FEATURES_TIME.loc[len(FEATURES_TIME)] = row

display(FEATURES_TIME)

Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_peak_f,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels
0,472.0,590.0,511.574393,31.348438,1.600000,31.58,38.35,34.126159,0.694451,0.0
1,472.0,590.0,511.882145,31.126257,1.600000,31.58,38.35,34.126808,0.691796,0.0
2,472.0,590.0,511.801720,30.556729,1.600000,31.58,36.09,34.116540,0.688379,0.0
3,472.0,589.0,511.670547,30.045978,1.600000,31.58,36.09,34.118968,0.688283,0.0
4,472.0,589.0,511.694332,29.541384,1.633333,31.58,35.45,34.115420,0.683808,0.0
...,...,...,...,...,...,...,...,...,...,...
70,469.0,595.0,511.024304,30.335767,1.466667,29.65,36.74,34.145478,0.707745,0.0
71,469.0,595.0,511.807186,30.226282,1.500000,29.65,36.74,34.154722,0.708992,0.0
72,469.0,591.0,511.485063,29.640358,1.500000,29.65,36.74,34.149646,0.713148,0.0
73,469.0,590.0,511.402532,29.477371,1.533333,29.65,36.74,34.144749,0.712971,0.0


In [178]:
# f = open(f'/home/taehoon/cs565/modeling/data/arduino_ppg/baseline_2.log', 'r')
f = open('/home/taehoon/cs565/modeling/data/arduino_ppg/stress_2.log', 'r')


line = f.readlines()
f.close()
line = line[1:]
signal = []
for l in line:
    l = l.split('\n')[0]
    signal.append(l.split(','))

raw_data = pd.DataFrame({'timestamp': np.array(signal)[:, 0], 'BVP': np.array(signal)[:, 1], 'Temp': np.array(signal)[:, 2]}, dtype='float')

In [179]:
wnd_ms = window_size * 1000
# raw_data = raw_data[1000:]

startT = raw_data['timestamp'].min()

endT = raw_data['timestamp'].max()

# find the end time of each window by considering the overlapping ratio 
WINDOWS = np.arange(startT + wnd_ms, endT, wnd_ms * (1 - overlap_ratio))

In [180]:
filtered_data = pd.DataFrame({"timestamp": raw_data['timestamp'], "BVP": raw_data['BVP'], "TEMP": raw_data['Temp']})
height = raw_data['BVP'].mean()

for w in WINDOWS:
    # for a given window, set the start and end time stamps 
    win_start, win_end = w - wnd_ms, w
    
    bucket_size = int(window_size * hz["BVP"])
    
    row = []
    data_wnd = filtered_data.loc[(filtered_data["timestamp"] > win_start) & (filtered_data["timestamp"] <= win_end)]
    bvp_sig_wnd, temp_sig_wnd = data_wnd['BVP'], data_wnd['TEMP']
    
    row.append(bvp_sig_wnd.min())
    row.append(bvp_sig_wnd.max())
    row.append(bvp_sig_wnd.mean())
    row.append(bvp_sig_wnd.std())

    peaks = sig.find_peaks(bvp_sig_wnd, height=height, distance=dist)[0]
    n_peak = len(peaks) / window_size

    row.append(n_peak)

    row.append(temp_sig_wnd.min())
    row.append(temp_sig_wnd.max())
    row.append(temp_sig_wnd.mean())
    row.append(temp_sig_wnd.std())
    row.append(1)

    FEATURES_TIME.loc[len(FEATURES_TIME)] = row

display(FEATURES_TIME)

Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_peak_f,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels
0,448.0,583.0,511.181726,21.641745,1.766667,29.65,37.38,33.311416,0.706680,0.0
1,448.0,583.0,511.253928,22.195743,1.766667,29.65,35.77,33.315606,0.700874,0.0
2,448.0,583.0,511.040020,22.608726,1.766667,29.65,35.77,33.310765,0.702675,0.0
3,448.0,583.0,511.063797,22.888080,1.733333,29.65,35.77,33.313980,0.703755,0.0
4,448.0,583.0,511.176292,23.073986,1.700000,29.65,35.77,33.334002,0.700589,0.0
...,...,...,...,...,...,...,...,...,...,...
125,470.0,564.0,511.933097,18.970367,1.766667,30.29,36.74,33.392235,0.686335,1.0
126,470.0,564.0,512.155094,18.767156,1.766667,30.29,36.74,33.395808,0.685584,1.0
127,470.0,564.0,512.362392,18.718854,1.766667,30.29,36.74,33.397755,0.684812,1.0
128,470.0,564.0,511.546883,18.639732,1.733333,30.29,36.74,33.399230,0.685126,1.0


In [189]:

# test = pd.concat([test_stress, test_base], axis = 0)
# test.drop(["BVP_peak_f", "BVP_weight_amp_avg", "BVP_weight_energy_avg", "BVP_power_entropy"], axis = 1, inplace=True)
# test.drop(["BVP_n_peak"], axis = 1, inplace=True)

X_test = FEATURES_TIME.drop(['labels'], axis=1)
y_test = FEATURES_TIME["labels"]

scaler = MinMaxScaler()
scaler.fit(X_test)

print(scaler.scale_)
print(scaler.min_)

test_scaled = scaler.transform(X_test)

[ 0.03846154  0.03448276  0.9601399   0.11026747  5.          0.51813472
  0.25839793  8.67846634 27.73248941]
[ -17.5         -19.93103448 -490.59940324   -2.85611668   -7.33333333
  -15.3626943    -9.16020672 -295.87922175  -18.96369939]
