In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# loading the data from csv file to a Pandas DataFrame
calories = pd.read_csv('/content/calories.csv')
exercise_data = pd.read_csv('/content/exercise.csv')

# Merging the datasets
calories_data = pd.concat([exercise_data, calories['Calories']], axis=1)

# Preprocessing
calories_data.replace({"Gender":{'male':0,'female':1}}, inplace=True)
X = calories_data.drop(columns=['User_ID','Calories'], axis=1)
Y = calories_data['Calories']

# Splitting the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# Loading and training the model
model = XGBRegressor()
model.fit(X_train, Y_train)

# Predicting the test set results
test_data_prediction = model.predict(X_test)

# Calculate the Mean Absolute Error
mae = metrics.mean_absolute_error(Y_test, test_data_prediction)
print("Mean Absolute Error =", mae)

# Convert regression predictions to binary classes based on a threshold (e.g., mean of Y_test)
threshold = Y_test.mean()
y_pred_classes = np.where(test_data_prediction > threshold, 1, 0)
y_true_classes = np.where(Y_test > threshold, 1, 0)

# Calculate classification metrics
accuracy = accuracy_score(y_true_classes, y_pred_classes)
precision = precision_score(y_true_classes, y_pred_classes)
recall = recall_score(y_true_classes, y_pred_classes)
f1 = f1_score(y_true_classes, y_pred_classes)

print("Accuracy =", accuracy)
print("Precision =", precision)
print("Recall =", recall)
print("F1 Score =", f1)


Mean Absolute Error = 1.4833678883314132
Accuracy = 0.9943333333333333
Precision = 0.9918759231905465
Recall = 0.9955522609340252
F1 Score = 0.9937106918238994
