In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder

In [39]:
# Load the dataset
df = pd.read_csv(r"fitness.csv")

In [53]:
df.head()

Unnamed: 0,User ID,Age,Activity Level (Steps),Sleep (round off Hours),Heart Rate (BPM),Weight (kg),Height(cm),Fitness Goals (Steps),Unnamed: 9,Unnamed: 10,Gender_Female,Gender_Male
0,1,23,4853,2,146,75,240,17318,,,1.0,0.0
1,2,36,21409,10,132,117,215,25660,,,1.0,0.0
2,3,33,1290,8,76,72,238,6842,,,1.0,0.0
3,4,19,30318,4,62,78,136,29678,,,1.0,0.0
4,5,30,33151,10,126,58,145,34157,,Gender,1.0,0.0


In [40]:
# One-hot encode the categorical variables
encoder = OneHotEncoder()
gender_encoded = encoder.fit_transform(df[["Gender"]]).toarray()
gender_df = pd.DataFrame(gender_encoded, columns=encoder.get_feature_names(["Gender"]))
df = pd.concat([df, gender_df], axis=1)
df = df.drop("Gender", axis=1)




In [42]:
# Select features and target variable
X = df[["Age", "Activity Level (Steps)", "Sleep (round off Hours)", "Heart Rate (BPM)", "Weight (kg)", "Height(cm)"]]
y = df["Fitness Goals (Steps)"]

In [43]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
# Fitting the Linear Regression model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

LinearRegression()

In [45]:
# Fitting the Random Forest Regressor model
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)

RandomForestRegressor(random_state=42)

In [46]:
# Predicting the fitness goals
lin_reg_pred = lin_reg.predict(X_test)
rf_reg_pred = rf_reg.predict(X_test)

In [47]:
# Calculating the mean squared error of the predictions
lin_reg_mse = mean_squared_error(y_test, lin_reg_pred)
rf_reg_mse = mean_squared_error(y_test, rf_reg_pred)

In [48]:
# Selecting the model with the lowest mean squared error
if lin_reg_mse < rf_reg_mse:
    final_model = lin_reg
else:
    final_model = rf_reg

In [49]:
# Storing the final model for future predictions
import joblib
joblib.dump(final_model, "final_model.pkl")

['final_model.pkl']

In [51]:
# Loading the final model for predictions
loaded_model = joblib.load("final_model.pkl")
new_data = np.array([[40, 10000, 8, 90, 89, 200]])
new_fitness_goal = loaded_model.predict(new_data)
print("Predicted Fitness Goal:", new_fitness_goal[0])

Predicted Fitness Goal: 26172.693332790685


