# ML ellapsed train time

In [30]:
import time
import pandas as pd
import lightgbm as lgb
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [31]:
result_df = pd.read_csv("../Fitabase Data 4.12.16-5.12.16/test_train_data.csv")
result_df

Unnamed: 0,Id,Time,HeartRate,Intensity,Calories
0,2022484408,2016-04-12 07:21:00,101.600000,1,3.32064
1,2022484408,2016-04-12 07:22:00,87.888889,1,3.94326
2,2022484408,2016-04-12 07:23:00,58.000000,0,1.34901
3,2022484408,2016-04-12 07:24:00,58.000000,0,1.03770
4,2022484408,2016-04-12 07:25:00,56.777778,0,1.03770
...,...,...,...,...,...
333141,8877689391,2016-05-12 13:55:00,60.666667,0,1.33353
333142,8877689391,2016-05-12 13:56:00,61.875000,0,1.33353
333143,8877689391,2016-05-12 13:57:00,58.142857,0,1.33353
333144,8877689391,2016-05-12 13:58:00,61.200000,0,1.33353


In [32]:
result_df['Time'] = pd.to_datetime(result_df['Time'])

result_df['Hour'] = result_df['Time'].dt.hour
result_df['Minutes'] = result_df['Time'].dt.minute
result_df['Weekday'] = result_df['Time'].dt.weekday

features = ['Id', 'Hour', 'Minutes', 'Intensity', 'Calories']
target = 'HeartRate'

X = result_df[features]
y = result_df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.17, random_state=777)

categorical_features = ['Id']
numeric_features = ['Hour', 'Minutes', 'Intensity', 'Calories']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ], remainder='passthrough')

# VotingRegressor

In [33]:
best_params = {'learning_rate': 0.2, 'n_estimators': 200, 'num_leaves': 31, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.8}

xgb_model = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree']
)

lgb_model = lgb.LGBMRegressor(
    objective='regression',
    random_state=42,
    learning_rate=best_params['learning_rate'],
    n_estimators=best_params['n_estimators'],
    num_leaves=best_params['num_leaves']
)

rf_model = RandomForestRegressor(random_state=42)

voting_regressor = VotingRegressor(
    estimators=[('xgb', xgb_model), ('lgb', lgb_model), ('rf', rf_model)]
)

#START TIME
start_time = time.time()

voting_regressor.fit(X_train, y_train)

#END TIME
end_time = time.time()

final_time = end_time - start_time
print(f"\033[94;1mTiempo de entrenamiento transcurrido de modelo VotingRegressor(LightGBM, XGBoost y RandomForestRegressor):\033[0m")
print(f"{final_time:.2f} segundos.")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006051 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 358
[LightGBM] [Info] Number of data points in the train set: 276511, number of used features: 5
[LightGBM] [Info] Start training from score 73.705764
[94;1mTiempo de entrenamiento transcurrido de modelo VotingRegressor(LightGBM, XGBoost y RandomForestRegressor):[0m
120.38 segundos.


# LightGBM

In [34]:
lgb_model = lgb.LGBMRegressor(
    objective='regression',
    random_state=42,
    learning_rate=best_params['learning_rate'],
    n_estimators=best_params['n_estimators'],
    num_leaves=best_params['num_leaves']
)

#START TIME
start_time = time.time()

lgb_model.fit(X_train, y_train)

#END TIME
end_time = time.time()

lgb_time = end_time - start_time
print(f"\033[94;1mTiempo de entrenamiento para LightGBM: {lgb_time:.2f} segundos\033[0m")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006493 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 358
[LightGBM] [Info] Number of data points in the train set: 276511, number of used features: 5
[LightGBM] [Info] Start training from score 73.705764
[94;1mTiempo de entrenamiento para LightGBM: 2.86 segundos[0m


# XGBoost

In [35]:

xgb_model = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree']
)
#START TIME
start_time = time.time()

xgb_model.fit(X_train, y_train)

#END TIME
end_time = time.time()

xgb_time = end_time - start_time
print(f"\033[94;1mTiempo de entrenamiento para XGBoost: {xgb_time:.2f} segundos\033[0m")

[94;1mTiempo de entrenamiento para XGBoost: 8.65 segundos[0m
