In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from numpy import set_printoptions
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import log_loss
import lightgbm as lgbm
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import xgboost as xgb


### Light Gradient Boosting algorithm

In [None]:
#train data
os.chdir(r"filepath")
dataset = pd.read_excel('filename.xlsx')

#data split
dataset_train = dataset[:int(0.75*len(dataset))]
dataset_test = dataset[int(0.75*len(dataset)):]

#training data
X_train = dataset_train[['BPM_chest','ACCx_thigh','ACCmag_uchest','ACCx_uchest','ACCz_thigh', 'ACCy_thigh','Tskin_lpthigh',
                  'Tskin_rpthigh','Tskin_rshin','ACCy_uchest']]
y_train = dataset_train[["TDEE_avg"]]
import re
X_train = X_train.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))

#test data

real_tdee = dataset_test[["TDEE_avg"]]
X_test = dataset_test[['BPM_chest','ACCx_thigh','ACCmag_uchest','ACCx_uchest','ACCz_thigh', 'ACCy_thigh','Tskin_lpthigh',
                  'Tskin_rpthigh','Tskin_rshin','ACCy_uchest']]
X_test = X_test.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))


In [None]:
#definition of the tuning space

space= {
    'learning_rate':    hp.choice('learning_rate',    np.arange(0.05, 0.31, 0.05)),
    'max_depth':        hp.choice('max_depth',        np.arange(5, 16, 1, dtype=int)),
    'min_child_weight': hp.choice('min_child_weight', np.arange(1, 8, 1, dtype=int)),
    'colsample_bytree': hp.choice('colsample_bytree', np.arange(0.3, 0.8, 0.1)),
    'subsample':        hp.uniform('subsample', 0.8, 1),
    'n_estimators':     100,
    
}
xgb_fit_params = {
    'eval_metric': 'rmse',
    'early_stopping_rounds': 10,
    'verbose': False
}
xgb_para = dict()
xgb_para['reg_params'] = space
xgb_para['fit_params'] = xgb_fit_params
xgb_para['loss_func' ] = lambda y, pred: np.sqrt(mean_absolute_error(y, pred))


In [None]:
def objective(space):
    clf=lgbm.LGBMRegressor(**space)
    
    evaluation = [( X_train, y_train), ( X_test, real_tdee)]
    
    clf.fit(X_train, y_train,eval_set=evaluation,
                **xgb_para['fit_params'])
    

    pred = clf.predict(X_test)
    mae = mean_absolute_error(real_tdee/1400, pred/1400)
    mape = mean_absolute_percentage_error(real_tdee/1400, pred/1400)
    print ("SCORE:", mape)
    return {'loss': mae, 'status': STATUS_OK }

In [None]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

print("The best hyperparameters are : ","\n")
print(best_hyperparams)

In [None]:
#use best hyperparameters found with tuning to train the model

clf=lgbm.LGBMRegressor(colsample_bytree = 0.3, learning_rate=0.05, max_depth = 5, min_child_weight = 1, subsample = 0.8,
                    n_estimators = 100)
    
    
evaluation = [( X_train, y_train), ( X_test, real_tdee)]
clf.fit(X_train, y_train, eval_set=evaluation,
                **xgb_para['fit_params'])
    

pred = clf.predict(X_test)

### COMPUTE METRICS ERROR FOR LGBM ###
print('R2:',r2_score(real_tdee/1400, pred/1400))
print('MAE:',mean_absolute_error(real_tdee/1400, pred/1400))
print('MAPE: ', mean_absolute_percentage_error(real_tdee/1400, pred/1400))
print('MSE:',mean_squared_error(real_tdee/1400, pred/1400, squared=True))
print('RMSE:',mean_squared_error(real_tdee/1400, pred/1400, squared=False))



### Aggregate LGBM predicitions with CNN-LSTM predicitons

In [None]:
os.chdir(r"filepath")
data_convLSTM = pd.read_excel('filename_results_CNNLSTM.xlsx')
pred_convLstm = data_convLSTM.predicted_TDEE

#computation of the mean
data = np.array([pred_convLstm, pred/1400])
mean_signal = np.mean(data, axis = 0)

#Print the error metrics of the LSTM-LGBM model
print('R2:',r2_score(real_tdee/1400, mean_signal))
print('MAE:',mean_absolute_error(real_tdee/1400, mean_signal))
print('MAPE: ', mean_absolute_percentage_error(real_tdee/1400, mean_signal))
print('MSE:',mean_squared_error(real_tdee/1400, mean_signal, squared=True))
print('RMSE:',mean_squared_error(real_tdee/1400, mean_signal, squared=False))


In [None]:
#save predictions in a xlsx file
df = pd.DataFrame({'Real_TDEE':real_tdee.values.flatten()/1400,'Mean_signal':mean_signal,'Signal_LGBM':pred/1400,'Signal_LSTM':pred_convLstm.values})

df.to_excel(r'filename.xlsx')