In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from numpy import set_printoptions
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
import joblib

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

### Data processing 

In [None]:
#import data
os.chdir(r"") #insert path
dataset = pd.read_excel('Data_Protocol2_TS3.xlsx') # file name 

#data split 
dataset_train = dataset.loc[:int(0.75*len(dataset))]
dataset_test = dataset.loc[int(0.75*len(dataset)):]

In [None]:
# define the features : EE ML-0: all features except COSMED features, EE ML-1: ten best feautures, 
# EE ML-2: best features from wearables only (n<10)
X = dataset_train.drop(columns=['RR', 'VO2', 'VCO2', 'TDEE', 'PRO', 'FAT', 'CHO', 'npRQ', 'timestep',
       'TDEE_avg']).values
y = dataset_train[['TDEE_avg']].values
dataset_train = dataset_train.drop(columns=['RR', 'VO2', 'VCO2', 'TDEE', 'PRO', 'FAT', 'CHO', 'npRQ', 'timestep',
       'TDEE_avg'])

In [None]:
#data scaling

sc_x = MinMaxScaler(feature_range = (0, 1))
sc_y = MinMaxScaler(feature_range = (0, 1))
X_scaled = sc_x.fit_transform(X)
y_scaled = sc_y.fit_transform(y)

In [None]:
# reshape the input data in a 3D shape for the LSTM model
X_train = []
y_train = []
prvs_timesteps =200 # number of previous steps to look at 
for i in range(prvs_timesteps, len(X_scaled)):
    X_train.append(X_scaled[i-prvs_timesteps:i, :])
    y_train.append(y_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))

In [None]:
#define the test data with the same features than for the training and reshape it in a 3D format 

real_tdee = dataset_test[["TDEE_avg"]].values
dataset_test = dataset_test.drop(columns=['RR', 'VO2', 'VCO2', 'TDEE', 'PRO', 'FAT', 'CHO', 'npRQ', 'timestep',
       'TDEE_avg'])
dataset_total = pd.concat((dataset_train, dataset_test), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - prvs_timesteps:].values
inputs = inputs.reshape(-1,X_train.shape[2])
inputs = sc_x.transform(inputs)
X_test = []
for i in range(prvs_timesteps, len(inputs)):
    X_test.append(inputs[i-prvs_timesteps:i, :])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

### CNN LSTM modeling

In [None]:
 
rsquared = 0
MAE =0
MAPE =0
MSE = 0
RMSE = 0
prediction = []
n=10 # number of times the model is trained  
    
for k in range(0,n) : 
    # CNN LSTM definition
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1],X_train.shape[2])))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(RepeatVector(1))
    model.add(LSTM(200, activation = 'relu',return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 100, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 100, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 100))
    model.add(Dropout(0.2))
    model.add((Dense(100, activation='relu')))
    model.add((Dense(1)))
    model.compile(loss='mse', optimizer='adam')
    
    #fitting
    model.fit(X_train, y_train, epochs = 100, batch_size = 64)

    #prediction
    predicted_tdee = model.predict(X_test)
    predicted_tdee = sc_y.inverse_transform(predicted_tdee)
    prediction.append(predicted_tdee)

    # metric computation
    rsq = round(r2_score(real_tdee/1400, predicted_tdee/1400),4)
    mae = round(mean_absolute_error(real_tdee/1400, predicted_tdee/1400),4)
    mape = round(mean_absolute_percentage_error(real_tdee/1400, predicted_tdee/1400),4)
    mse = round(mean_squared_error(real_tdee/1400, predicted_tdee/1400, squared=True),4)
    rmse = round(mean_squared_error(real_tdee/1400, predicted_tdee/1400, squared=False),4)


# Mean of the metrics
print('Mean values')
print("R-Squared: "+str(rsquared/n))
print("Mean Absolute Error: "+str(MAE/n))
print("Mean Absolute Percentage Error: "+str(MAPE/n))
print("Mean Squared Error: "+str(MSE/n))
print("Root Mean Squared Error: "+str(RMSE/n))

In [None]:
#Put the results in a dataframe and save it in xlsx file

time = pd.Series(np.arange(1,len(real_tdee)+1,1))
comp = pd.DataFrame({'time':time, 'real_TDEE':real_tdee.flatten()/1400, 'predicted_TDEE':np.mean(prediction2, axis = 0).flatten()/1400,
                    '1':prediction2[0].flatten()/1400,'2':prediction2[1].flatten()/1400,'3':prediction2[2].flatten()/1400,'4':prediction2[3].flatten()/1400,
                    '5':prediction2[4].flatten()/1400,'6':prediction2[5].flatten()/1400,'7':prediction2[6].flatten()/1400,'8':prediction2[7].flatten()/1400,
                    '9':prediction2[8].flatten()/1400,'10':prediction2[9].flatten()/1400})

comp.to_excel(r'filename.xlsx)

In [None]:
#plot the results
import matplotlib.pyplot as plt
time = pd.Series(np.arange(1,len(real_tdee)+1,1))
comp = pd.DataFrame({'time':time, 'real_TDEE':real_tdee.values.flatten()/1400, 'predicted_TDEE':np.mean(prediction, axis = 0).flatten()/1400})


for j in range(0,len(prediction)):
    plt.plot( comp['time'], prediction[j].flatten()/1400, color = 'lightgrey', alpha = 0.8, linestyle = '--')
plt.plot(comp['time'], comp['real_TDEE'], color = 'blue', label = 'Real EE')
plt.plot(comp['time'], comp['predicted_TDEE'], color = 'red', label = 'Mean prediction EE', alpha = 0.8)
plt.legend()