In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv("GymAndDietRecommendationCleaned.csv")

X = df[['Sex', 'Hypertension', 'Diabetes', 'Level', 'Age', 'Height', 'Weight']]
y = df['Exercises_Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Handle class imbalance with class weights
class_weights = dict(zip(np.unique(y_train), len(y_train) / (len(np.unique(y_train)) * np.bincount(y_train))))
class_weights = {i: class_weights.get(i, 1.0) for i in range(len(np.unique(y)))}

In [3]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb.fit(X_train, y_train, sample_weight=[class_weights.get(y, 1.0) for y in y_train])
xgb_pred = xgb.predict(X_test)

print("XGBoost Results:")
print("Accuracy:", accuracy_score(y_test, xgb_pred))
print("\nClassification Report:\n", classification_report(y_test, xgb_pred, zero_division=0))

xgb_importance = pd.DataFrame({'Feature': X.columns, 'Importance': xgb.feature_importances_}).sort_values(by='Importance', ascending=False)
print("\nXGBoost Feature Importance:\n", xgb_importance)
print("\nFeature Correlation:\n" , X.corr())


Parameters: { "use_label_encoder" } are not used.



XGBoost Results:
Accuracy: 0.9979406919275123

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       685
           1       1.00      1.00      1.00       627
           2       1.00      0.99      1.00       560
           3       0.99      1.00      1.00       278
           4       0.99      1.00      1.00       278

    accuracy                           1.00      2428
   macro avg       1.00      1.00      1.00      2428
weighted avg       1.00      1.00      1.00      2428


XGBoost Feature Importance:
         Feature  Importance
2      Diabetes    0.449656
1  Hypertension    0.290078
3         Level    0.256876
6        Weight    0.001310
4           Age    0.001020
5        Height    0.000886
0           Sex    0.000173

Feature Correlation:
                    Sex  Hypertension  Diabetes     Level       Age    Height  \
Sex           1.000000      0.066102  0.065967 -0.007114  0.011540 -0.050633   
Hyp