Libraries

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn import metrics
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import glob
from scipy import stats
import datetime as dt

Data Preparation

In [25]:
# Load data from csv 3 files
# acceleration.txt, heartrate.txt, labeled_sleep.txt
ACC = pd.read_csv('46343_acceleration.txt', sep = ' ',names=['timedelta', 'accX', 'accY', 'accZ'])
HeartR = pd.read_csv('46343_heartrate.txt', sep = ',',names=['timedelta', 'heartrate'])
SleepL = pd.read_csv('46343_labeled_sleep.txt', sep = ' ',names=['timedelta', 'sleep'])

In [26]:
# Check ‘timedelta’ max(), min() of ACC, HeartR, SleepL (ช่วงเวลาที่มีข้อมูลใกล้กัน)
ACC_max_date = ACC['timedelta'].max()
ACC_min_date = ACC['timedelta'].min()
HeartR_max_date = HeartR['timedelta'].max()
HeartR_min_date = HeartR['timedelta'].min()
SleepL_max_date = SleepL['timedelta'].max()
SleepL_min_date = SleepL['timedelta'].min()
# หา start_timedelta, end_timedelta
print("Acc start    : "  + str(ACC_min_date) + " | Acc end    : " + str(ACC_max_date))
print("HeartR start : "  + str(HeartR_min_date) + " | HeartR end : " + str(HeartR_max_date))
print("SleepL start : "  + str(SleepL_min_date) + "             | SleepL end : " + str(SleepL_max_date))

Acc start    : -124489.16105 | Acc end    : 17643.046417
HeartR start : -556410.36066 | HeartR end : 16980.47229
SleepL start : 0             | SleepL end : 16980


In [27]:
# select only intersected timedelta (ACC, HeartR, SleepL) (ช่วงเวลาที่มีข้อมูลใกล้กัน)
ACC_new = ACC[(ACC['timedelta'] > ACC_min_date) & (ACC['timedelta'] < ACC_max_date) ]
HeartR_new = HeartR[(HeartR['timedelta'] > HeartR_min_date) & (HeartR['timedelta'] < HeartR_max_date) ]
SleepL_new = SleepL[(SleepL['timedelta'] > SleepL_min_date) & (SleepL['timedelta'] < SleepL_max_date) ]

In [None]:
# ------------ Rounding ACC (Rounding to 1 sec) -------------------------------
# Convert to datetime and round to second, 
ACC_new['timedelta'] = pd.DataFrame(pd.to_timedelta(ACC_new['timedelta'], timedelta_unit).round('1s'))

# Average rounding duplicated time
df_acc_X = ACC_new.groupby('timedelta')['accX'].mean().reset_index() 
df_acc_Y = ACC_new.groupby('timedelta')['accY'].mean().reset_index()
df_acc_Z = ACC_new.groupby('timedelta')['accZ'].mean().reset_index()

# acc_X, acc_Y, acc_Z
ACC_new2 = pd.concat([df_acc_X, df_acc_Y, df_acc_Z], axis=1)

In [None]:
# ------------ Rounding Heart Rate (Rounding to 1 sec) -------------------------------
HeartR_new['timedelta'] = pd.DataFrame(pd.to_timedelta(HeartR_new['timedelta'], timedelta_unit).round('1s'))

# Resampling every 1s with median with ffill
resample_rule = '1s'
HeartR_new2 = HeartR_new.set_index('timedelta').resample(resample_rule,).median().ffill()

In [None]:
# ------------ Rounding Sleep Label (Rounding to 1 sec) -------------------------------
SleepL_new['timedelta'] = pd.DataFrame(pd.to_timedelta(SleepL_new['timedelta'], timedelta_unit).round('1s'))

# Resampling every 1s with median with ffill
resample_rule = '1s'
SleepL_new2 = SleepL_new.set_index('timedelta').resample(resample_rule,).median().ffill()


Merge Data and Standardized data

In [None]:
# ------------Merge All Data -------------------------------
df = []
df = pd.merge_asof(ACC_new2, HeartR_new2, on='timedelta')
df = pd.merge_asof(df, SleepL_new2, on = 'timedelta')

In [None]:
# Fill NA
# Heart rate
df.Fillna() # using median()
# Sleep Label
df.Fillna() # with 0
# Drop column
df.drop('timedelta')

In [None]:
# Standardized data
feature_columns = ['accX', 'accY', 'accZ', 'heartrate']
label_columns = ['sleep']
df_feature = df[feature_columns] # <= standardized data of df_feature
df_label = df[label_columns]

In [None]:
# Visualize signals
df_feature.plot(), df_label.plot()

Model Preparation (SVM)

In [None]:
# Train Test Split
# Train / Test Preparation (try 2 Option) ปลดคอมเมนต์มาใช้ทีละอัน
# --------------------------------------------------------- #
# Option#1
Test_size = int(np.floor(0.3 * len(X)))
train_size = int(np.floor(0.7 * len(X)))
X_train, X_test = X[0:train_size], X[train_size:len(X)]
Y_train, Y_test = Y[0:train_size], Y[train_size:len(X)]
# --------------------------------------------------------- #
# Option #2
# seed = 4
# X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3, random_state=seed)
# --------------------------------------------------------- #
# Perform Linear Regression -> All variables
lr = LinearRegression()
# Train
lr.fit(X_train, Y_train)
# Validate
# y_pred_lr = lr.predict(x_validate)
# Test
y_test_pred_lr = lr.predict(X_test)
# print(r2_score(y_pred_lr, y_validate))
print(r2_score(y_test_pred_lr, Y_test))
# print(lr.score(x_validate, y_validate))
print(lr.score(X_test, Y_test))

In [None]:
# Model Traing Parameter
# Create SVC model
c_val = 100
gmm = 0.1
d = 2

In [None]:
# Model initialize
svc_lin = SVC(kernel='linear', C=c_val)
svc_rbf = SVC(kernel='rbf', C=c_val, gamma=gmm)
svc_poly = SVC(kernel='poly', C=c_val, degree = d)

In [None]:
# Model Training
svc_rbf_pred = svc_rbf.fit(X_train, Y_train)
svc_poly = svc_poly.fit(X_train, Y_train)

In [None]:
# Model Testing (Predict)
svc_rbf_pred = svc_rbf.predict(X_test)
svc_poly_pred = svc_poly.predict(X_test)

SVM Prediction Report

In [None]:
# Model Confusion Matrix of SVC_rbf, SVC_poly

confusion_matrix(Y_test,svc_rbf_pred)

In [None]:
# Model Classification Report of SVC_rbf, SVC_poly

classification_report(Y_test,svc_rbf_pred)

HYPERPARAMETER TUNING(GRIDSEARCHCV())

In [None]:
# Create Model List
regression = { 'LR': LinearRegression(), 'SVR': SVR(), }



#Create Model Parameter Dictionary for SVC
C_list = [0.1, 1.0, 10.0, 100.0, 200.0, 500.0]
Gamma_list = [0.01, 0.1, 1.0, 10]
d_list = [2, 3]

In [None]:
# Perform GridsearchCV() for each classification model
model = 1
n_jobs = 1
grid = GridSearchCV(estimator = model,
                    n_jobs = 1,
                    verbose = 10,
                    scoring = 'accuracy',
                    cv = 2,
                    param_grid = params)
grid_result = grid.fit(X_train, Y_train)