In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [2]:
data = pd.read_csv('data_processed.csv')
data

Unnamed: 0,Gender,GenderCode,Age,Height,Weight,HeartRate,BodyTemp,ExerciseNameENG,ExerciseNameUKR,ExerciseID,Duration,Calories
0,male,0,68,190,94,105,40.8,"Cycling, 16-19 mph, very fast, racing","Велоспорт, 25-30 км/год, дуже швидко",13,29,231
1,female,1,20,166,60,94,40.3,"Stationary cycling, light","Стаціонарний велоспорт, легкий режим",24,14,66
2,male,0,69,179,79,88,38.7,Watering lawn or garden,Полив газону або саду,22,5,26
3,female,1,34,179,71,100,40.5,Unicycling,Їзда на одноколісному велосипеді,17,13,71
4,female,1,27,154,58,81,39.8,"Stationary cycling, very light","Стаціонарний велоспорт, дуже легкий режим",23,10,35
...,...,...,...,...,...,...,...,...,...,...,...,...
14995,female,1,20,193,86,92,40.4,Mild stretching,Легка розтяжка,19,11,45
14996,female,1,27,165,65,85,39.2,Watering lawn or garden,Полив газону або саду,22,6,23
14997,female,1,43,159,58,90,40.1,Walking 4.5 mph,"Ходьба, 7 км/год, дуже швидкий темп",33,16,75
14998,male,0,78,193,97,84,38.3,Watering lawn or garden,Полив газону або саду,22,2,11


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Gender           15000 non-null  object 
 1   GenderCode       15000 non-null  int64  
 2   Age              15000 non-null  int64  
 3   Height           15000 non-null  int64  
 4   Weight           15000 non-null  int64  
 5   HeartRate        15000 non-null  int64  
 6   BodyTemp         15000 non-null  float64
 7   ExerciseNameENG  15000 non-null  object 
 8   ExerciseNameUKR  15000 non-null  object 
 9   ExerciseID       15000 non-null  int64  
 10  Duration         15000 non-null  int64  
 11  Calories         15000 non-null  int64  
dtypes: float64(1), int64(8), object(3)
memory usage: 1.4+ MB


In [4]:
data.isnull().sum()

Gender             0
GenderCode         0
Age                0
Height             0
Weight             0
HeartRate          0
BodyTemp           0
ExerciseNameENG    0
ExerciseNameUKR    0
ExerciseID         0
Duration           0
Calories           0
dtype: int64

In [5]:
data.drop(['Gender', 'HeartRate', 'BodyTemp', 'ExerciseNameENG', 'ExerciseNameUKR'], axis=1, inplace=True)
data

Unnamed: 0,GenderCode,Age,Height,Weight,ExerciseID,Duration,Calories
0,0,68,190,94,13,29,231
1,1,20,166,60,24,14,66
2,0,69,179,79,22,5,26
3,1,34,179,71,17,13,71
4,1,27,154,58,23,10,35
...,...,...,...,...,...,...,...
14995,1,20,193,86,19,11,45
14996,1,27,165,65,22,6,23
14997,1,43,159,58,33,16,75
14998,0,78,193,97,22,2,11


In [6]:
X = data.drop(['Calories'], axis=1)
Y = data['Calories']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=30)

In [7]:
model = RandomForestRegressor(n_estimators=200, max_depth=20, max_features='sqrt', min_samples_leaf=4, random_state=30)
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
r2s = r2_score(Y_test, Y_pred)
mses = mean_squared_error(Y_test, Y_pred)
maes = mean_absolute_error(Y_test, Y_pred)
print('R2 of Random Forest Regressor Model =', r2s)
print('Mean Squared Error of Random Forest Regressor Model =', mses)
print('Mean Absolute Error of Random Forest Regressor Model =', maes)

R2 of Random Forest Regressor Model = 0.9849033935126035
Mean Squared Error of Random Forest Regressor Model = 59.05235690828764
Mean Absolute Error of Random Forest Regressor Model = 5.149983579631845


In [8]:
joblib.dump(model, 'RFR_Model.pkl')

['RFR_Model.pkl']

In [9]:
X_specific = pd.DataFrame({
    'GenderCode': [0, 1],
    'Age': [63, 23],
    'Height': [173, 179],
    'Weight': [79, 75],
    'ExerciseID': [10, 33],
    'Duration': [18, 30]
})

pred_1 = model.predict(X_specific)
print(pred_1)

[110.91494508 149.77587923]


In [10]:
local_model = joblib.load('RFR_Model.pkl')
pred_2 = local_model.predict(X_specific)
print(pred_2)

[110.91494508 149.77587923]
