<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/random_forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, mean_squared_error, r2_score


In [None]:
# Load dataset from CSV
file_path = '/content/nutrition_dataset_with_fiber_water_intake.csv'  # Replace with the actual path to your dataset
data = pd.read_csv(file_path)

# Display the first few rows
print(data.head())

# Check for missing values
print(data.isnull().sum())


  Person ID  Gender  Age Occupation  Sleep Duration Quality of Sleep  \
0        P1  Female   50    Teacher               5        Excellent   
1        P2  Female   31  Scientist              10             Good   
2        P3    Male   36  Scientist              10        Excellent   
3        P4    Male   44    Manager               9             Fair   
4        P5    Male   64     Artist               6             Fair   

  Activity Level Stress Level  Weight (kg)  Height (cm)  ... Vitamin A (mcg)  \
0         Medium       Medium           65          159  ...             700   
1           High         High           60          164  ...             700   
2            Low          Low           82          183  ...             900   
3            Low         High           72          177  ...             900   
4            Low         High           67          163  ...             900   

   Vitamin C (mg)  Vitamin D (mcg)  Sodium (mg)  Potassium (mg)  \
0              75  

In [None]:
# Step 3.1: Clean Column Names
# Strip any whitespace and standardize column names
data.columns = data.columns.str.strip()
data.columns = data.columns.str.replace(' ', '_')  # Replace spaces with underscores
print("Column Names After Cleaning:", data.columns)

# Step 3.2: Define Input Features and Outputs
input_features = [
    'Gender', 'Age', 'Occupation', 'Sleep_Duration', 'Quality_of_Sleep',
    'Activity_Level', 'Stress_Level', 'Weight_(kg)', 'Height_(cm)',
    'Blood_Pressure_Category', 'Systolic', 'Diastolic', 'Heart_Rate', 'Daily_Steps'
]

classification_output = 'Health_Status'
regression_outputs = [
    'Calories_(kcal)', 'Carbohydrates_(g)', 'Proteins_(g)', 'Fats_(g)',
    'Vitamin_A_(mcg)', 'Vitamin_C_(mg)', 'Vitamin_D_(mcg)', 'Sodium_(mg)',
    'Potassium_(mg)', 'Magnesium_(mg)', 'Iron_(mg)', 'Zinc_(mg)',
    'Fiber_Intake_(g)', 'Water_Intake_(L)'
]

# Step 3.3: Encode Categorical Variables
# One-hot encoding for categorical variables
categorical_columns = ['Gender', 'Occupation', 'Blood_Pressure_Category']
data_encoded = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Display the resulting columns
print("Column Names After Encoding:", data_encoded.columns)

# Step 3.4: Extract Features and Targets
# Ensure all input feature names match the cleaned/encoded column names
X = data_encoded[[col for col in data_encoded.columns if col in input_features]]
y_class = data_encoded[classification_output]  # Classification target
y_regress = data_encoded[regression_outputs]  # Regression targets

# Display the shapes of X, y_class, and y_regress to confirm
print("Shape of X (Input Features):", X.shape)
print("Shape of y_class (Classification Output):", y_class.shape)
print("Shape of y_regress (Regression Outputs):", y_regress.shape)


Column Names After Cleaning: Index(['Person_ID', 'Gender', 'Age', 'Occupation', 'Sleep_Duration',
       'Quality_of_Sleep', 'Activity_Level', 'Stress_Level', 'Weight_(kg)',
       'Height_(cm)', 'Blood_Pressure_Category', 'Systolic', 'Diastolic',
       'Heart_Rate', 'Daily_Steps', 'Health_Status', 'BMI_Values', 'BMI_Class',
       'Calories_(kcal)', 'Carbohydrates_(g)', 'Proteins_(g)', 'Fats_(g)',
       'Vitamin_A_(mcg)', 'Vitamin_C_(mg)', 'Vitamin_D_(mcg)', 'Sodium_(mg)',
       'Potassium_(mg)', 'Magnesium_(mg)', 'Iron_(mg)', 'Zinc_(mg)',
       'Fiber_Intake_(g)', 'Water_Intake_(L)'],
      dtype='object')
Column Names After Encoding: Index(['Person_ID', 'Age', 'Sleep_Duration', 'Quality_of_Sleep',
       'Activity_Level', 'Stress_Level', 'Weight_(kg)', 'Height_(cm)',
       'Systolic', 'Diastolic', 'Heart_Rate', 'Daily_Steps', 'Health_Status',
       'BMI_Values', 'BMI_Class', 'Calories_(kcal)', 'Carbohydrates_(g)',
       'Proteins_(g)', 'Fats_(g)', 'Vitamin_A_(mcg)', 'Vitamin_

In [None]:
# Split data into train and test sets
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_class, test_size=0.2, random_state=42)
_, X_test_reg, _, y_regress_test = train_test_split(X, y_regress, test_size=0.2, random_state=42)  # Regression uses same X_test


In [None]:
# Step 1: Ensure input features are defined correctly
input_features = ['Gender', 'Occupation', 'Blood_Pressure_Category', 'Quality_of_Sleep', 'Activity_Level',
                  'Sleep_Duration', 'Stress_Level', 'Weight_(kg)', 'Height_(cm)', 'Systolic', 'Diastolic',
                  'Heart_Rate', 'Daily_Steps']

# Update column names based on the dataset you provided
classification_output = 'Health_Status'
regression_outputs = ['BMI_Values', 'Calories_(kcal)', 'Carbohydrates_(g)', 'Proteins_(g)', 'Fats_(g)',
                      'Vitamin_A_(mcg)', 'Vitamin_C_(mg)', 'Vitamin_D_(mcg)', 'Sodium_(mg)', 'Potassium_(mg)',
                      'Magnesium_(mg)', 'Iron_(mg)', 'Zinc_(mg)', 'Fiber_Intake_(g)', 'Water_Intake_(L)']

# Step 2: Extract features and target variables
X = data[input_features].copy()
y_class = data[classification_output]
y_regress = data[regression_outputs]

# Step 3: Encode categorical variables using one-hot encoding
categorical_columns = ['Gender', 'Occupation', 'Blood_Pressure_Category', 'Quality_of_Sleep', 'Activity_Level']
X = pd.get_dummies(X, columns=categorical_columns)

# Step 4: Check if there are any NaN values
if X.isnull().any().any():
    X = X.fillna(0)  # Replace NaN values with 0 (or handle appropriately)

# Step 5: Split data into training and testing sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X, y_class, test_size=0.2, random_state=42, stratify=y_class
)

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X, y_regress, test_size=0.2, random_state=42
)

# Step 6: Train Random Forest models
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_class, y_train_class)

rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train_reg, y_train_reg)

# Step 7: Evaluate models
y_pred_class = rf_classifier.predict(X_test_class)
print("\nClassification Report:")
print(classification_report(y_test_class, y_pred_class))

y_pred_regress = rf_regressor.predict(X_test_reg)

# Calculate RMSE and R² for regression
rmse = np.sqrt(mean_squared_error(y_test_reg, y_pred_regress))
r2 = r2_score(y_test_reg, y_pred_regress)

print("\nRegression Evaluation:")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")


ValueError: could not convert string to float: 'Low'

In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Step 2: Load Dataset
from google.colab import files
uploaded = files.upload()  # Upload your CSV file

df = pd.read_csv(list(uploaded.keys())[0])

# Step 3: Preprocess Dataset
# Map categorical columns to numerical values
occupation_mapping = {occupation: idx for idx, occupation in enumerate(df['Occupation'].unique())}
quality_mapping = {'Excellent': 3, 'Good': 2, 'Fair': 1}
bp_mapping = {'Normal': 0, 'Prehypertension': 1, 'Hypertension': 2}
activity_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
stress_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

# Apply mappings
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Occupation'] = df['Occupation'].map(occupation_mapping)
df['Quality of Sleep'] = df['Quality of Sleep'].map(quality_mapping)
df['Blood Pressure Category'] = df['Blood Pressure Category'].map(bp_mapping)
df['Activity Level'] = df['Activity Level'].map(activity_mapping)
df['Stress Level'] = df['Stress Level'].map(stress_mapping)

# Step 4: Separate Inputs and Outputs
X = df[['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps']]
y_classifier = df['Health Status'].map({'Healthy': 1, 'Unhealthy': 0})
y_regression = df[['Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']]

# Step 5: Train-Test Split
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_classifier, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Step 6: Train Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_class_train)

# Step 7: Train Random Forest Regressor
reg = RandomForestRegressor(random_state=42)
reg.fit(X_reg_train, y_reg_train)

# Step 8: Test Classifier
class_pred = clf.predict(X_test)
accuracy = accuracy_score(y_class_test, class_pred)
print(f"Classifier Accuracy: {accuracy * 100:.2f}%")

# Step 9: Test Regressor
reg_pred = reg.predict(X_reg_test)
mse = mean_squared_error(y_reg_test, reg_pred)
print(f"Regressor Mean Squared Error: {mse:.2f}")

# Step 10: Predict Based on User Input
def user_input_predict():
    print("Enter the following details:")
    gender = int(input("Gender (0 for Male, 1 for Female): "))
    age = int(input("Age: "))
    occupation = input("Occupation: ")
    sleep_duration = int(input("Sleep Duration (hours): "))
    quality_of_sleep = input("Quality of Sleep (Excellent/Good/Fair): ")
    activity_level = input("Activity Level (Low/Medium/High): ")
    stress_level = input("Stress Level (Low/Medium/High): ")
    weight = float(input("Weight (kg): "))
    height = float(input("Height (cm): "))
    bp_category = input("Blood Pressure Category (Normal/Prehypertension/Hypertension): ")
    systolic = int(input("Systolic: "))
    diastolic = int(input("Diastolic: "))
    heart_rate = int(input("Heart Rate: "))
    daily_steps = int(input("Daily Steps: "))

    # Map input values to numerical representations
    occupation_num = occupation_mapping.get(occupation, -1)
    quality_num = quality_mapping.get(quality_of_sleep, -1)
    bp_num = bp_mapping.get(bp_category, -1)
    activity_num = activity_mapping.get(activity_level, -1)
    stress_num = stress_mapping.get(stress_level, -1)

    # Create input array
    input_data = np.array([[gender, age, occupation_num, sleep_duration, quality_num, activity_num, stress_num,
                             weight, height, bp_num, systolic, diastolic, heart_rate, daily_steps]])

    # Predict health status
    health_status = clf.predict(input_data)[0]
    print("Health Status:", "Healthy" if health_status == 1 else "Unhealthy")

    # Predict nutritional needs if unhealthy
    if health_status == 0:
        reg_output = reg.predict(input_data)[0]
        output_columns = y_regression.columns
        print("Predicted Nutritional Needs:")
        for col, value in zip(output_columns, reg_output):
            print(f"{col}: {value:.2f}")

# Step 11: Run User Prediction Function
user_input_predict()


Saving nutrition_dataset_with_fiber_water_intake.csv to nutrition_dataset_with_fiber_water_intake (5).csv
Classifier Accuracy: 80.42%
Regressor Mean Squared Error: 69.70
Enter the following details:
Gender (0 for Male, 1 for Female): 1
Age: 42
Occupation: Manager
Sleep Duration (hours): 8
Quality of Sleep (Excellent/Good/Fair): Fair
Activity Level (Low/Medium/High): Low
Stress Level (Low/Medium/High): Low
Weight (kg): 65
Height (cm): 169
Blood Pressure Category (Normal/Prehypertension/Hypertension): Normal
Systolic: 99
Diastolic: 76
Heart Rate: 80
Daily Steps: 6025
Health Status: Healthy




In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Step 2: Load Dataset
from google.colab import files
uploaded = files.upload()  # Upload your CSV file

df = pd.read_csv(list(uploaded.keys())[0])

# Step 3: Preprocess Dataset
# Map categorical columns to numerical values
occupation_mapping = {occupation: idx for idx, occupation in enumerate(df['Occupation'].unique())}
quality_mapping = {'Excellent': 3, 'Good': 2, 'Fair': 1}
bp_mapping = {'Normal': 0, 'Prehypertension': 1, 'Hypertension': 2}
activity_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
stress_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

# Apply mappings
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Occupation'] = df['Occupation'].map(occupation_mapping)
df['Quality of Sleep'] = df['Quality of Sleep'].map(quality_mapping)
df['Blood Pressure Category'] = df['Blood Pressure Category'].map(bp_mapping)
df['Activity Level'] = df['Activity Level'].map(activity_mapping)
df['Stress Level'] = df['Stress Level'].map(stress_mapping)

# Step 4: Separate Inputs and Outputs
X = df[['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps']]
y_classifier = df['Health Status'].map({'Healthy': 1, 'Unhealthy': 0})
y_regression = df[['Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']]

# Step 5: Train-Test Split
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_classifier, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Step 6: Train Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_class_train)

# Step 7: Train Random Forest Regressor
reg = RandomForestRegressor(random_state=42)
reg.fit(X_reg_train, y_reg_train)

# Step 8: Test Classifier
class_pred = clf.predict(X_test)
accuracy = accuracy_score(y_class_test, class_pred)
print(f"Classifier Accuracy: {accuracy * 100:.2f}%")

# Step 9: Test Regressor
reg_pred = reg.predict(X_reg_test)
mse = mean_squared_error(y_reg_test, reg_pred)
print(f"Regressor Mean Squared Error: {mse:.2f}")

# Step 10: Predict Based on User Input
def user_input_predict():
    print("Enter the following details:")
    gender = int(input("Gender (0 for Male, 1 for Female): "))
    age = int(input("Age: "))
    occupation = input("Occupation: ")
    sleep_duration = int(input("Sleep Duration (hours): "))
    quality_of_sleep = input("Quality of Sleep (Excellent/Good/Fair): ")
    activity_level = input("Activity Level (Low/Medium/High): ")
    stress_level = input("Stress Level (Low/Medium/High): ")
    weight = float(input("Weight (kg): "))
    height = float(input("Height (cm): "))
    bp_category = input("Blood Pressure Category (Normal/Prehypertension/Hypertension): ")
    systolic = int(input("Systolic: "))
    diastolic = int(input("Diastolic: "))
    heart_rate = int(input("Heart Rate: "))
    daily_steps = int(input("Daily Steps: "))

    # Calculate BMI and classify
    bmi_value = weight / ((height / 100) ** 2)
    if bmi_value < 18.5:
        bmi_class = "Underweight"
    elif 18.5 <= bmi_value < 24.9:
        bmi_class = "Normal"
    elif 25 <= bmi_value < 29.9:
        bmi_class = "Overweight"
    else:
        bmi_class = "Obese"

    # Suggest healthy weight range
    lower_healthy_weight = 18.5 * ((height / 100) ** 2)
    upper_healthy_weight = 24.9 * ((height / 100) ** 2)
    weight_to_lose = max(0, weight - upper_healthy_weight)
    target_bmi = 24.9 if weight_to_lose > 0 else 18.5

    print(f"Your BMI: {bmi_value:.2f}")
    print(f"BMI Class: {bmi_class}")
    print(f"Healthy weight range for your height: {lower_healthy_weight:.2f} kg to {upper_healthy_weight:.2f} kg")
    if weight_to_lose > 0:
        print(f"You need to lose {weight_to_lose:.2f} kg to achieve a BMI of {target_bmi}.")

    # Map input values to numerical representations
    occupation_num = occupation_mapping.get(occupation, -1)
    quality_num = quality_mapping.get(quality_of_sleep, -1)
    bp_num = bp_mapping.get(bp_category, -1)
    activity_num = activity_mapping.get(activity_level, -1)
    stress_num = stress_mapping.get(stress_level, -1)

    # Create input array
    input_data = np.array([[gender, age, occupation_num, sleep_duration, quality_num, activity_num, stress_num,
                             weight, height, bp_num, systolic, diastolic, heart_rate, daily_steps]])

    # Predict health status
    health_status = clf.predict(input_data)[0]
    print("Health Status:", "Healthy" if health_status == 1 else "Unhealthy")

    # Predict nutritional needs if unhealthy
    if health_status == 0:
        reg_output = reg.predict(input_data)[0]
        output_columns = y_regression.columns
        print("Predicted Nutritional Needs:")
        for col, value in zip(output_columns, reg_output):
            print(f"{col}: {value:.2f}")

# Step 11: Run User Prediction Function
user_input_predict()


Saving nutrition_dataset_with_fiber_water_intake.csv to nutrition_dataset_with_fiber_water_intake (4).csv
Classifier Accuracy: 80.42%
Regressor Mean Squared Error: 69.70
Enter the following details:
Gender (0 for Male, 1 for Female): 1
Age: 42
Occupation: Manager
Sleep Duration (hours): 8
Quality of Sleep (Excellent/Good/Fair): Fair
Activity Level (Low/Medium/High): Low
Stress Level (Low/Medium/High): Low
Weight (kg): 65
Height (cm): 169
Blood Pressure Category (Normal/Prehypertension/Hypertension): Normal
Systolic: 99
Diastolic: 76
Heart Rate: 80
Daily Steps: 6025
Your BMI: 22.76
BMI Class: Normal
Healthy weight range for your height: 52.84 kg to 71.12 kg
Health Status: Healthy




In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Step 2: Load Dataset
from google.colab import files
uploaded = files.upload()  # Upload your CSV file

df = pd.read_csv(list(uploaded.keys())[0])

# Step 3: Preprocess Dataset
# Map categorical columns to numerical values
occupation_mapping = {occupation: idx for idx, occupation in enumerate(df['Occupation'].unique())}
quality_mapping = {'Excellent': 3, 'Good': 2, 'Fair': 1}
bp_mapping = {'Normal': 0, 'Prehypertension': 1, 'Hypertension': 2}
activity_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
stress_mapping = {'Low': 0, 'Medium': 1, 'High': 2}

# Apply mappings
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Occupation'] = df['Occupation'].map(occupation_mapping)
df['Quality of Sleep'] = df['Quality of Sleep'].map(quality_mapping)
df['Blood Pressure Category'] = df['Blood Pressure Category'].map(bp_mapping)
df['Activity Level'] = df['Activity Level'].map(activity_mapping)
df['Stress Level'] = df['Stress Level'].map(stress_mapping)

# Step 4: Separate Inputs and Outputs
X = df[['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps']]
y_classifier = df['Health Status'].map({'Healthy': 1, 'Unhealthy': 0})
y_regression = df[['Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']]

# Step 5: Train-Test Split
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_classifier, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Step 6: Train Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_class_train)

# Step 7: Train Random Forest Regressor
reg = RandomForestRegressor(random_state=42)
reg.fit(X_reg_train, y_reg_train)

# Step 8: Test Classifier
class_pred = clf.predict(X_test)
accuracy = accuracy_score(y_class_test, class_pred)
print(f"Classifier Accuracy: {accuracy * 100:.2f}%")

# Step 9: Test Regressor
reg_pred = reg.predict(X_reg_test)
mse = mean_squared_error(y_reg_test, reg_pred)
print(f"Regressor Mean Squared Error: {mse:.2f}")

# Step 10: Predict Based on User Input
def user_input_predict():
    print("Enter the following details:")
    gender = int(input("Gender (0 for Male, 1 for Female): "))
    age = int(input("Age: "))
    occupation = input("Occupation: ")
    sleep_duration = int(input("Sleep Duration (hours): "))
    quality_of_sleep = input("Quality of Sleep (Excellent/Good/Fair): ")
    activity_level = input("Activity Level (Low/Medium/High): ")
    stress_level = input("Stress Level (Low/Medium/High): ")
    weight = float(input("Weight (kg): "))
    height = float(input("Height (cm): "))
    bp_category = input("Blood Pressure Category (Normal/Prehypertension/Hypertension): ")
    systolic = int(input("Systolic: "))
    diastolic = int(input("Diastolic: "))
    heart_rate = int(input("Heart Rate: "))
    daily_steps = int(input("Daily Steps: "))

    # Calculate BMI and classify
    bmi_value = weight / ((height / 100) ** 2)
    if bmi_value < 18.5:
        bmi_class = "Underweight"
    elif 18.5 <= bmi_value < 24.9:
        bmi_class = "Normal"
    elif 25 <= bmi_value < 29.9:
        bmi_class = "Overweight"
    else:
        bmi_class = "Obese"

    # Suggest healthy weight range
    lower_healthy_weight = 18.5 * ((height / 100) ** 2)
    upper_healthy_weight = 24.9 * ((height / 100) ** 2)
    weight_to_lose = max(0, weight - upper_healthy_weight)
    target_bmi = 24.9 if weight_to_lose > 0 else 18.5

    # Display BMI metrics
    print(f"\n--- BMI and Weight Metrics ---")
    print(f"Your BMI: {bmi_value:.2f}")
    print(f"BMI Class: {bmi_class}")
    print(f"Healthy weight range for your height: {lower_healthy_weight:.2f} kg to {upper_healthy_weight:.2f} kg")
    if weight_to_lose > 0:
        print(f"You need to lose {weight_to_lose:.2f} kg to achieve a BMI of {target_bmi}.")
    else:
        print(f"You are within the healthy weight range. Maintain your current weight!")

    # Map input values to numerical representations
    occupation_num = occupation_mapping.get(occupation, -1)
    quality_num = quality_mapping.get(quality_of_sleep, -1)
    bp_num = bp_mapping.get(bp_category, -1)
    activity_num = activity_mapping.get(activity_level, -1)
    stress_num = stress_mapping.get(stress_level, -1)

    # Create input array
    input_data = np.array([[gender, age, occupation_num, sleep_duration, quality_num, activity_num, stress_num,
                             weight, height, bp_num, systolic, diastolic, heart_rate, daily_steps]])

    # Predict health status
    health_status = clf.predict(input_data)[0]
    print("\n--- Health Status ---")
    print("Health Status:", "Healthy" if health_status == 1 else "Unhealthy")

    # Predict nutritional needs
    reg_output = reg.predict(input_data)[0]
    output_columns = y_regression.columns
    print("\n--- Nutritional Needs ---")
    for col, value in zip(output_columns, reg_output):
        print(f"{col}: {value:.2f}")

# Step 11: Run User Prediction Function
user_input_predict()


KeyboardInterrupt: 

In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE

# Step 2: Load Dataset
from google.colab import files
uploaded = files.upload()  # Upload your CSV file

df = pd.read_csv(list(uploaded.keys())[0])

# Step 3: Preprocess Dataset
# Separate categorical and numerical columns
categorical_columns = df.select_dtypes(include=['object']).columns
numerical_columns = df.select_dtypes(include=[np.number]).columns

# Handle missing values for categorical and numerical columns separately
categorical_imputer = SimpleImputer(strategy='most_frequent')  # Use most frequent for categorical data
numerical_imputer = SimpleImputer(strategy='mean')  # Use mean for numerical data

# Apply imputers
df[categorical_columns] = categorical_imputer.fit_transform(df[categorical_columns])
df[numerical_columns] = numerical_imputer.fit_transform(df[numerical_columns])

# Feature Engineering - Calculate BMI
df['BMI'] = df['Weight (kg)'] / ((df['Height (cm)'] / 100) ** 2)

# Classify BMI
df['BMI Class'] = pd.cut(df['BMI'], bins=[0, 18.5, 24.9, 29.9, np.inf], labels=['Underweight', 'Normal', 'Overweight', 'Obese'])

# Map categorical columns to numerical values
occupation_mapping = {occupation: idx for idx, occupation in enumerate(df['Occupation'].unique())}
quality_mapping = {'Excellent': 3, 'Good': 2, 'Fair': 1}
bp_mapping = {'Normal': 0, 'Prehypertension': 1, 'Hypertension': 2}
activity_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
stress_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
bmi_class_mapping = {'Underweight': 0, 'Normal': 1, 'Overweight': 2, 'Obese': 3}

# Apply mappings
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Occupation'] = df['Occupation'].map(occupation_mapping)
df['Quality of Sleep'] = df['Quality of Sleep'].map(quality_mapping)
df['Blood Pressure Category'] = df['Blood Pressure Category'].map(bp_mapping)
df['Activity Level'] = df['Activity Level'].map(activity_mapping)
df['Stress Level'] = df['Stress Level'].map(stress_mapping)
df['BMI Class'] = df['BMI Class'].map(bmi_class_mapping)

# Step 4: Separate Inputs and Outputs
X = df[['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']]
y_classifier = df['Health Status'].map({'Healthy': 1, 'Unhealthy': 0})
y_regression = df[['Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']]

# Step 5: Train-Test Split
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_classifier, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Step 6: Handle Class Imbalance (if any)
# Check if the target classes are imbalanced and use SMOTE or undersampling if necessary
if y_class_train.value_counts().min() / y_class_train.value_counts().max() < 0.5:
    X_train, y_class_train = resample(X_train, y_class_train, replace=True, n_samples=len(X_train), random_state=42)

# Step 7: Scale Continuous Features (Standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']])
X_test_scaled = scaler.transform(X_test[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']])

# Replace the continuous features in the datasets with their scaled versions
X_train[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']] = X_train_scaled
X_test[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']] = X_test_scaled

# Step 8: Train Random Forest Classifier with Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

clf = RandomForestClassifier(random_state=42)
clf_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')
clf_search.fit(X_train, y_class_train)

# Best Hyperparameters
print(f"Best parameters for classifier: {clf_search.best_params_}")

# Step 9: Train Random Forest Regressor with Hyperparameter Tuning
reg_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

reg = RandomForestRegressor(random_state=42)
reg_search = GridSearchCV(reg, reg_param_grid, cv=5, scoring='neg_mean_squared_error')
reg_search.fit(X_reg_train, y_reg_train)

# Best Hyperparameters
print(f"Best parameters for regressor: {reg_search.best_params_}")

# Step 10: Evaluate Classifier Performance
class_pred = clf_search.best_estimator_.predict(X_test)
accuracy = accuracy_score(y_class_test, class_pred)
print(f"Classifier Accuracy: {accuracy * 100:.2f}%")

# Step 11: Evaluate Regressor Performance
reg_pred = reg_search.best_estimator_.predict(X_reg_test)
mse = mean_squared_error(y_reg_test, reg_pred)
print(f"Regressor Mean Squared Error: {mse:.2f}")

# Step 12: Predict Based on User Input
def user_input_predict():
    print("Enter the following details:")
    gender = int(input("Gender (0 for Male, 1 for Female): "))
    age = int(input("Age: "))
    occupation = input("Occupation: ")
    sleep_duration = int(input("Sleep Duration (hours): "))
    quality_of_sleep = input("Quality of Sleep (Excellent/Good/Fair): ")
    activity_level = input("Activity Level (Low/Medium/High): ")
    stress_level = input("Stress Level (Low/Medium/High): ")
    weight = float(input("Weight (kg): "))
    height = float(input("Height (cm): "))
    bp_category = input("Blood Pressure Category (Normal/Prehypertension/Hypertension): ")
    systolic = int(input("Systolic: "))
    diastolic = int(input("Diastolic: "))
    heart_rate = int(input("Heart Rate: "))
    daily_steps = int(input("Daily Steps: "))

    # Calculate BMI and classify
    bmi_value = weight / ((height / 100) ** 2)
    if bmi_value < 18.5:
        bmi_class = "Underweight"
    elif 18.5 <= bmi_value < 24.9:
        bmi_class = "Normal"
    elif 25 <= bmi_value < 29.9:
        bmi_class = "Overweight"
    else:
        bmi_class = "Obese"

    print(f"Your BMI: {bmi_value:.2f} ({bmi_class})")

    # Feature scaling
    input_data = pd.DataFrame([[gender, age, occupation, sleep_duration, quality_of_sleep, activity_level, stress_level, weight, height, bp_category, systolic, diastolic, heart_rate, daily_steps, bmi_value]],
                              columns=['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI'])
    input_data_scaled = scaler.transform(input_data[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']])
    input_data[['Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI']] = input_data_scaled

    # Make the prediction for Health Status
    predicted_class = clf_search.best_estimator_.predict(input_data)
    if predicted_class == 1:
        print("You are predicted to be Healthy!")
    else:
        print("You are predicted to be Unhealthy!")

# Call the user input function to predict health status
user_input_predict()


Saving nutrition_dataset_with_fiber_water_intake.csv to nutrition_dataset_with_fiber_water_intake (13).csv


270 fits failed out of a total of 810.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
270 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sk

Best parameters for classifier: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}


270 fits failed out of a total of 810.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
270 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sk

Best parameters for regressor: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Classifier Accuracy: 80.77%
Regressor Mean Squared Error: 347.54
Enter the following details:
Gender (0 for Male, 1 for Female): 1
Age: 45
Occupation: Engineer
Sleep Duration (hours): 2
Quality of Sleep (Excellent/Good/Fair): Excellennt
Activity Level (Low/Medium/High): High
Stress Level (Low/Medium/High): Low
Weight (kg): 78
Height (cm): 189
Blood Pressure Category (Normal/Prehypertension/Hypertension): Normal
Systolic: 120
Diastolic: 80
Heart Rate: 67
Daily Steps: 3000
Your BMI: 21.84 (Normal)


ValueError: could not convert string to float: 'Engineer'