<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from xgboost import XGBRegressor, XGBClassifier

# Load dataset
data = pd.read_csv('/content/nutrition_dataset_with_fiber_water_intake.csv')

# Drop 'Person ID' if it exists
data = data.drop(columns=['Person ID'], errors='ignore')

# Separate inputs and outputs
inputs = data[['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep',
               'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)',
               'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps']]

classification_outputs = data[['Health Status', 'BMI Class']]
regression_outputs = data[['BMI Values', 'Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)',
                            'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)',
                            'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)',
                            'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']]

# Preprocessing for inputs
categorical_features = ['Gender', 'Occupation', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Blood Pressure Category']
numerical_features = ['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps']

categorical_transformer = Pipeline(steps=[('encoder', OneHotEncoder(handle_unknown='ignore'))])
numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Encode classification outputs
le_health_status = LabelEncoder()
classification_outputs['Health Status'] = le_health_status.fit_transform(classification_outputs['Health Status'])
le_bmi_class = LabelEncoder()
classification_outputs['BMI Class'] = le_bmi_class.fit_transform(classification_outputs['BMI Class'])

# Train/test split
X_train, X_test, y_train_reg, y_test_reg, y_train_clf, y_test_clf = train_test_split(
    inputs, regression_outputs, classification_outputs, test_size=0.2, random_state=42)

# Regression model
regressor = MultiOutputRegressor(XGBRegressor(n_jobs=-1))

# Classification model
classifier = MultiOutputRegressor(XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_jobs=-1))

# Combine preprocessing and model into a pipeline
regressor_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', regressor)])
classifier_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', classifier)])

# Fit models
regressor_pipeline.fit(X_train, y_train_reg)
classifier_pipeline.fit(X_train, y_train_clf)

# Predictions for evaluation
y_pred_reg = regressor_pipeline.predict(X_test)
y_pred_clf = classifier_pipeline.predict(X_test)

# Evaluate Regression Model
regression_mse = mean_squared_error(y_test_reg, y_pred_reg, multioutput='raw_values')
print(f"Regression MSE for each output: {regression_mse}")

# Evaluate Classification Model
classification_accuracies = []
for i, col in enumerate(y_test_clf.columns):
    accuracy = accuracy_score(y_test_clf.iloc[:, i], np.round(y_pred_clf[:, i]))
    classification_accuracies.append(accuracy)
    print(f"Accuracy for {col}: {accuracy}")

average_classification_accuracy = np.mean(classification_accuracies)
print(f"Average Classification Accuracy: {average_classification_accuracy}")

# Function to process and predict from a runtime sample
def predict_personalized_outputs():
    print("Enter details for the test sample:")
    sample = {
        'Gender': input("Gender (Male/Female): "),
        'Age': int(input("Age: ")),
        'Occupation': input("Occupation (Teacher/Scientist/Manager/Artist/Engineer/Doctor): "),
        'Sleep Duration': int(input("Sleep Duration (hours): ")),
        'Quality of Sleep': input("Quality of Sleep (Excellent/Good/Fair/Poor): "),
        'Activity Level': input("Activity Level (Low/Medium/High): "),
        'Stress Level': input("Stress Level (Low/Medium/High): "),
        'Weight (kg)': float(input("Weight (kg): ")),
        'Height (cm)': float(input("Height (cm): ")),
        'Blood Pressure Category': input("Blood Pressure Category (Normal/Prehypertension/Hypertension): "),
        'Systolic': int(input("Systolic BP: ")),
        'Diastolic': int(input("Diastolic BP: ")),
        'Heart Rate': int(input("Heart Rate: ")),
        'Daily Steps': int(input("Daily Steps: "))
    }

    # Convert input sample into a DataFrame
    sample_df = pd.DataFrame([sample])

    # Predict outputs
    reg_predictions = regressor_pipeline.predict(sample_df)
    clf_predictions = classifier_pipeline.predict(sample_df)

    # Decode classification outputs
    health_status = le_health_status.inverse_transform([int(np.round(clf_predictions[0][0]))])[0]
    bmi_class = le_bmi_class.inverse_transform([int(np.round(clf_predictions[0][1]))])[0]

    print("\nPersonalized Outputs:")
    print(f"Health Status: {health_status}")
    print(f"BMI Class: {bmi_class}")
    print(f"BMI Value: {reg_predictions[0][0]:.2f}")
    print(f"Calories (kcal): {reg_predictions[0][1]:.2f}")
    print(f"Carbohydrates (g): {reg_predictions[0][2]:.2f}")
    print(f"Proteins (g): {reg_predictions[0][3]:.2f}")
    print(f"Fats (g): {reg_predictions[0][4]:.2f}")
    print(f"Vitamin A (mcg): {reg_predictions[0][5]:.2f}")
    print(f"Vitamin C (mg): {reg_predictions[0][6]:.2f}")
    print(f"Vitamin D (mcg): {reg_predictions[0][7]:.2f}")
    print(f"Sodium (mg): {reg_predictions[0][8]:.2f}")
    print(f"Potassium (mg): {reg_predictions[0][9]:.2f}")
    print(f"Magnesium (mg): {reg_predictions[0][10]:.2f}")
    print(f"Iron (mg): {reg_predictions[0][11]:.2f}")
    print(f"Zinc (mg): {reg_predictions[0][12]:.2f}")
    print(f"Fiber Intake (g): {reg_predictions[0][13]:.2f}")
    print(f"Water Intake (L): {reg_predictions[0][14]:.2f}")

# Call the function
predict_personalized_outputs()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  classification_outputs['Health Status'] = le_health_status.fit_transform(classification_outputs['Health Status'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  classification_outputs['BMI Class'] = le_bmi_class.fit_transform(classification_outputs['BMI Class'])
Parameters: { "use_label_encoder" } are not used.



Regression MSE for each output: [3.1750319e-03 1.9335640e+02 4.5690670e+00 6.0417324e-01 1.8647358e-01
 3.7252903e-09 1.6344712e-10 1.7826096e-10 1.4901161e-08 6.0141083e-08
 2.1280722e-09 5.1290954e-11 3.2741809e-11 2.8992536e+00 2.1833363e-03]
Accuracy for Health Status: 0.79675
Accuracy for BMI Class: 0.99675
Average Classification Accuracy: 0.8967499999999999
Enter details for the test sample:
Gender (Male/Female): Female
Age: 21
Occupation (Teacher/Scientist/Manager/Artist/Engineer/Doctor): Teacher
Sleep Duration (hours): 4
Quality of Sleep (Excellent/Good/Fair/Poor): Poor
Activity Level (Low/Medium/High): Low
Stress Level (Low/Medium/High): High
Weight (kg): 50
Height (cm): 157
Blood Pressure Category (Normal/Prehypertension/Hypertension): Hypertension
Systolic BP: 145
Diastolic BP: 96
Heart Rate: 102
Daily Steps: 5000

Personalized Outputs:
Health Status: Unhealthy
BMI Class: Normal
BMI Value: 20.32
Calories (kcal): 1457.55
Carbohydrates (g): 180.83
Proteins (g): 65.76
Fats (g):