In [1]:
import pandas as pd
import os
import numpy as np
import pytz
from datetime import datetime
import warnings
import re
from sklearn import linear_model, metrics
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from helper_preprocess import actigraph_add_datetime, watch_add_datetime, get_intensity, get_met_fitbit, get_met_freedson, get_met_vm3, get_metcart, get_met_matcart, get_train_data, extract_features
from helper_extraction import generate_table
from helper_model import get_intensity_coef, build_classification_model, pred_activity, set_realistic_met_estimate
warnings.filterwarnings("ignore")

### Content
<ol>
    <li>Align activity logs with Actigraph and IMU data</li>
    <li>Generate minute level features from Actigraph and IMU</li>
    <li>Get minutel level MET value from MetCart logs</li>
</ol>

In [2]:
ROOT_PATH_FSM = 'Y:/PrevMed/Alshurafa_Lab/Lab_Common/CalorieHarmony/A. Phase 2 Participants/'
PATH_RESAMPLE_ACC = '/Wild/Wrist/Clean/Resampled/Accelerometer/'
PATH_RESAMPLE_GYRO = '/Wild/Wrist/Clean/Resampled/Gyroscope/'

In [3]:
participant_list = ['1000','1002','1003','1004','1005','1006','1007','1008','1009','1010','1011','1012','1013','1014','1015']
met_cart_dic = {'1000':'01/27/2022 13:43:15',
                '1001':'NA',
                '1002':'02/09/2022 10:30:45',
                '1003':'01/12/2022 15:47:36',
                '1004':'01/21/2022 13:49:59',
                '1005':'02/12/2022 11:57:42',
                '1006':'03/22/2022 10:23:05',
                '1007':'03/28/2022 12:49:21',
                '1008':'04/12/2022 12:11:31',
                '1009':'04/15/2022 12:37:46',
                '1010':'05/06/2022 11:14:05',
                '1011':'05/11/2022 12:02:56',
                '1012':'05/24/2022 16:42:47',
                '1013':'05/27/2022 11:34:26',
                '1014':'06/03/2022 11:50:35',
                '1015':'06/06/2022 10:41:15'
}

activity_estimate = {'Typing on a computer while seated':1.3,
                     'Rest':1,
                     'Walking 2 mph on treadmill':2.8,
                     'Walking 3.5 mph on treadmill':4.3,
                     'Standing while fidgeting':1.8,
                     'Squats (shoulder length legs, get down to 90 degree angle)':5,
                     'Reading a book or magazine while reclining':1.3,
                     'General aerobics video':7.3,
                     'Sweeping slowly ':2.3,
                     'Push-ups against the wall':3.8,
                     'Running 4 mph on a treadmill':6,
                     'Lying down while doing nothing':1.3,
                     'Chester Step Test (0.25 m step at a rate of 30 steps per minute)':8}

#### Participant's weight extraction

In [4]:
df_weight = pd.read_excel(ROOT_PATH_FSM + 'Participant Measurement Record.xlsx')
weight_list = []
mix_list = list(df_weight['Google Fit H/W'])
for i in range(len(mix_list)):
    x = re.search(r"/", mix_list[i])
    weight_list.append(float(mix_list[i][x.span()[0]+1:])*0.45359237) # convert lbs to kg
    
participant_weight = {}
for key in list(df_weight['P ID']):
    for value in weight_list:
        participant_weight[str(key)] = value
        weight_list.remove(value)
        break  

### Complete LOSO

In [14]:
#LOSO
participant_list = ['1000','1002','1003','1004','1005','1006','1007','1008','1009','1010','1011','1012','1013','1014','1015']

l_participant = []
l_accuracy = []
l_f1 = []
l_r2_est = []
l_r2_gf = []
l_r2_vm3 = []
l_r2_freedson = []

for participant in participant_list:
    l_participant.append(participant)

    train_list = []
    y_list = []
    regression_coef_list = []

    print('Leaving '+ participant +' out:')
    leftout = participant
    rest = participant_list.copy()
    rest.remove(participant)

    for p in rest:
        data_train, y_train, table = generate_table(PATH_RESAMPLE_ACC, PATH_RESAMPLE_GYRO, ROOT_PATH_FSM, met_cart_dic, participant_weight, activity_estimate, p)
        train_list.append(data_train)
        y_list.append(y_train)
        regression_coef_list.append(get_intensity_coef(table, gt_type='MetCart'))

    data_train_all = np.concatenate(train_list)
    y_train_all = np.concatenate(y_list)
    model = build_classification_model(data_train_all, y_train_all)

    data_test, y_test, table_test = generate_table(PATH_RESAMPLE_ACC, PATH_RESAMPLE_GYRO, ROOT_PATH_FSM, met_cart_dic, participant_weight, activity_estimate, leftout)
    table_pred = pred_activity(data_test, model, table_test.dropna().reset_index(drop=True))
    print("Activity test accuracy: %g" % metrics.accuracy_score(y_test, np.array(table_pred['model_classification'])))
    table_final = set_realistic_met_estimate(table_pred, regression_coef_list)

    l_accuracy.append(metrics.accuracy_score(y_test, np.array(table_pred['model_classification'])))
    l_f1.append(metrics.f1_score(y_test, np.array(table_pred['model_classification']),average='weighted'))
    l_r2_est.append(metrics.r2_score(table_final['MET (MetCart)'], table_final['estimation']))
    l_r2_gf.append(metrics.r2_score(table_final['MET (MetCart)'], table_final['MET (GoogleFit)']))
    l_r2_vm3.append(metrics.r2_score(table_final['MET (MetCart)'], table_final['MET (VM3)']))
    l_r2_freedson.append(metrics.r2_score(table_final['MET (MetCart)'], table_final['MET (Freedson)']))
    
    path_save = 'data_phase_2/' + str(participant) + '/result/'
    if(os.path.exists(path_save) == False):
        os.mkdir(path_save)
    table_final.to_csv(path_save + 'result_table.csv', index=False)

Leaving 1000 out:
Processing:  1002
Processing:  1003
Processing:  1004
Processing:  1005
Processing:  1006
Processing:  1007
Processing:  1008
Processing:  1009
Processing:  1010
Processing:  1011
Processing:  1012
Processing:  1013
Processing:  1014
Processing:  1015
Processing:  1000
Activity test accuracy: 0.887931
Leaving 1002 out:
Processing:  1000
Processing:  1003
Processing:  1004
Processing:  1005
Processing:  1006
Processing:  1007
Processing:  1008
Processing:  1009
Processing:  1010
Processing:  1011
Processing:  1012
Processing:  1013
Processing:  1014
Processing:  1015
Processing:  1002
Activity test accuracy: 0.842105
Leaving 1003 out:
Processing:  1000
Processing:  1002
Processing:  1004
Processing:  1005
Processing:  1006
Processing:  1007
Processing:  1008
Processing:  1009
Processing:  1010
Processing:  1011
Processing:  1012
Processing:  1013
Processing:  1014
Processing:  1015
Processing:  1003
Activity test accuracy: 0.908257
Leaving 1004 out:
Processing:  1000
P

In [17]:
df_result_LOSO = pd.DataFrame({'Participant (LOSO)': l_participant, 
                               'Activity Accuracy': l_accuracy,
                               'Activity F1': l_f1,
                               'R2 Estimation': l_r2_est,
                               'R2 GoogleFit': l_r2_gf,
                               'R2 VM3': l_r2_vm3,
                               'R2 Freedson': l_r2_freedson})
df_result_LOSO

Unnamed: 0,Participant (LOSO),Activity Accuracy,Activity F1,R2 Estimation,R2 GoogleFit,R2 VM3,R2 Freedson
0,1000,0.887931,0.88524,0.364686,0.032577,0.669411,0.541347
1,1002,0.842105,0.842105,0.479106,-0.228826,0.660617,0.405313
2,1003,0.908257,0.905652,0.248727,-0.190792,0.597895,0.567348
3,1004,0.917431,0.916027,0.44021,0.309772,0.268547,0.689752
4,1005,0.953488,0.952701,0.173217,0.328384,0.622523,0.655544
5,1006,0.90991,0.908765,0.355842,0.083716,0.834717,0.767562
6,1007,0.925,0.923556,0.451137,0.139572,0.67786,0.737133
7,1008,0.905172,0.902895,0.407338,-0.267472,0.826266,0.723418
8,1009,0.936508,0.934996,0.478624,-0.160717,0.744735,0.622011
9,1010,0.908333,0.906569,0.318065,0.219246,0.785789,0.787265
