In [None]:
import pandas as pd
import numpy as np
import warnings
from xgboost import XGBClassifier
import joblib
from app.util import time_parameters, process_wrist, get_freq_intensity, get_rmssd, get_train_data, extract_features, resample
warnings.filterwarnings("ignore")

In [None]:
# Load pretrained model
#classification model
model_classification = XGBClassifier()
model_classification.load_model("./app/classification_model.json")
# regression model
loaded_rf = joblib.load("./app/regression_model.joblib")

In [None]:
# read csv file
df_acc_raw = pd.read_csv('./data/accelerometer.csv')
df_gyro_raw = pd.read_csv('./data/gyroscope.csv')

# resample input data
df_acc_resampled = resample(df_acc_raw[['Time','accX','accY','accZ']], 'Time', 20)
df_gyro_resampled = resample(df_gyro_raw[['Time','rotX','rotY','rotZ']], 'Time', 20)

# add datetime from Unix timestamp
df_acc = process_wrist(df_acc_resampled)
df_gyro = process_wrist(df_gyro_resampled)

# normalize each column to match the scale from original data
targets = ['accX', 'accY', 'accZ']
min_max = {'accX':[38.03060682003315, -33.763857951531044],
           'accY':[34.77019433156978, -43.30149280531167],
           'accZ':[37.98169060088745, -36.9844767541086]}

for each_col in targets:
    max_old = np.max(df_acc[each_col])
    min_old = np.min(df_acc[each_col])
    value = df_acc[each_col]
    max_acc = min_max[each_col][0]
    min_acc = min_max[each_col][1]
    df_acc[each_col] = (max_acc-min_acc)/(max_old-min_old)*(value-max_old)+max_acc
    
# segmentation and feature extraction
st_ceil, et_floor = time_parameters(df_gyro) # get start and end time of gyroscope 

window_size = 60
minute_wrist = []
l_intensity_freq = []
l_intensity_rmssd_l1 = []
data_training = []

start_time = st_ceil

# Examine each minute
for i in range(int((et_floor - st_ceil).seconds / 60)):
    minute_wrist.append(start_time)
    end_time = start_time + pd.DateOffset(minutes=1)

    temp = df_acc.loc[(df_acc['Datetime'] >= start_time) & (df_acc['Datetime'] < end_time)].reset_index(drop=True)
    l_intensity_freq.append(get_freq_intensity(temp, 100, 1, False))
    l_intensity_rmssd_l1.append(get_rmssd(temp, norm = 'l1'))
    
    data_training.append(get_train_data(df_gyro, start_time, window_size, 'gyro') + get_train_data(df_acc, start_time, window_size, 'acc'))
    start_time += pd.DateOffset(minutes=1)
    
        
for i in range(len(data_training)):
        for j in range(6):
            if(data_training[i][j].shape[0] != window_size * 20):
                s_temp = pd.Series([0]*int(window_size * 20-data_training[i][j].shape[0]))
                data_training[i][j] = data_training[i][j].append(s_temp, ignore_index=True)

np_training = np.array(data_training)
data_train = extract_features(np_training)

estimation = []
# 1st stage classification
classification = model_classification.predict(data_train)

# Need Demographic info
gender = 1.0
age = 34
BMI = 36

# 2nd stage regression
for i in range(len(classification)):
    if(classification[i] == 0):
        estimation.append(1.0)
    else:
        # complete feature for regression model (TODO)
        # 'gender', 'age', 'BMI', 'Intensity (Freq)', 'gender_age','gender_BMI','age_BMI','age_gender_BMI', 'Intensity (RMSSD_l1)'
        feature_complete = [gender, age, BMI] + [l_intensity_freq[i]] + [gender*age, gender*BMI, age*BMI, age*gender*BMI] + [l_intensity_rmssd_l1[i]]
        estimation.append(loaded_rf.predict(np.array(feature_complete).reshape(1,9))[0])

In [None]:
pd.DataFrame({'timstamp':minute_wrist, 'mets':estimation})