<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/random_on_old_diet_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load dataset
file_path = '/mnt/data/updated_dataset_with_nutritional_needs.csv'
data = pd.read_csv('/content/updated_dataset_with_nutritional_needs.csv')

# Display the first few rows to understand the structure
data.head()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Display column names for reference
print("Columns in dataset:", data.columns)

# Check for missing values
print("Missing values in each column:\n", data.isnull().sum())

# Drop rows with missing values (or you can fill them with suitable values)
data = data.dropna()

# Separate features and target
X = data.drop(['Health Status', 'Nutritional Needs'], axis=1)  # Input features
y = data['Health Status']  # Target variable

# Encode categorical variables
label_encoders = {}
for column in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Encode target variable
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestClassifier

# Initialize and train the model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Save the model and encoders if needed
import pickle
with open('random_forest_health_model.pkl', 'wb') as model_file:
    pickle.dump(rf, model_file)
with open('health_status_label_encoder.pkl', 'wb') as le_file:
    pickle.dump(target_encoder, le_file)

from sklearn.metrics import accuracy_score, classification_report

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)
print("Model Accuracy:", accuracy)
print("\nClassification Report:\n", report)


Columns in dataset: Index(['Person ID', 'Gender', 'Age', 'Occupation', 'Sleep Duration',
       'Quality of Sleep', 'Physical Activity Level', 'Stress Level',
       'BMI Category', 'Blood Pressure Category', 'Systolic', 'Diastolic',
       'Heart Rate', 'Daily Steps', 'Sleep Disorder', 'Health Status',
       'Nutritional Needs'],
      dtype='object')
Missing values in each column:
 Person ID                   0
Gender                      0
Age                         0
Occupation                  0
Sleep Duration              0
Quality of Sleep            0
Physical Activity Level     0
Stress Level                0
BMI Category                0
Blood Pressure Category     0
Systolic                    0
Diastolic                   0
Heart Rate                  0
Daily Steps                 0
Sleep Disorder             98
Health Status               0
Nutritional Needs           0
dtype: int64
Model Accuracy: 0.9821428571428571

Classification Report:
               precision    re

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('/content/updated_dataset_with_nutritional_needs (1).csv')

# Select relevant columns for model training
features = data.drop(columns=['Health Status', 'Nutritional Needs', 'Person ID'], errors='ignore')
target = data['Health Status']

# Encode categorical features in the dataset
label_encoders = {}
for column in features.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    features[column] = le.fit_transform(features[column].fillna('Unknown'))
    label_encoders[column] = le

# Encode the target variable
target_encoder = LabelEncoder()
target = target_encoder.fit_transform(target)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize RandomForest model
rf = RandomForestClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(rf, X_train, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores for each fold:", cv_scores)
print("Average cross-validation accuracy:", np.mean(cv_scores))

# Train the model on the full training set
rf.fit(X_train, y_train)
print("\nModel trained successfully on the full training set.")

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
test_accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)
print("\nTest set accuracy:", test_accuracy)
print("\nClassification Report:\n", report)

# Function to encode user input and predict health status
def encode_and_predict(user_input):
    # Encode categorical features in the user input
    for column, le in label_encoders.items():
        if column in user_input:
            if user_input[column] not in le.classes_:
                le.classes_ = np.append(le.classes_, user_input[column])
            user_input[column] = le.transform([user_input[column]])[0]

    # Convert the user input to a DataFrame with the same columns as training data
    input_df = pd.DataFrame([user_input])
    input_df = input_df.reindex(columns=X_train.columns, fill_value=0)  # Fill missing columns with 0

    # Predict health status
    prediction = rf.predict(input_df)
    predicted_health_status = target_encoder.inverse_transform(prediction)[0]

    print("\nPredicted Health Status:", predicted_health_status)

    # Provide nutritional advice based on predicted health status
    if predicted_health_status == "Unhealthy":
        nutritional_needs = data[data['Health Status'] == "Unhealthy"]['Nutritional Needs'].iloc[0]
        print("\nSuggested Nutritional Advice:\n", nutritional_needs)
    else:
        print("Keep up the good work to maintain your health!")

# Collect user inputs
user_input = {
    "Gender": input("Enter Gender (Male/Female): "),
    "Age": int(input("Enter Age: ")),
    "Occupation": input("Enter Occupation (e.g., Student, Teacher, etc.): "),
    "Sleep Duration": float(input("Enter Sleep Duration in hours: ")),
    "Quality of Sleep": input("Enter Quality of Sleep (Good/Fair/Poor): "),
    "Physical Activity Level": input("Enter Physical Activity Level (Low/Medium/High): "),
    "Stress Level": input("Enter Stress Level (Low/Medium/High): "),
    "BMI Category": input("Enter BMI Category (Underweight/Normal/Overweight/Obese): "),
    "Blood Pressure Category": input("Enter Blood Pressure Category (Normal/Elevated/High): "),
    "Systolic": int(input("Enter Systolic Blood Pressure: ")),
    "Diastolic": int(input("Enter Diastolic Blood Pressure: ")),
    "Heart Rate": int(input("Enter Heart Rate: ")),
    "Daily Steps": int(input("Enter Daily Steps: ")),
    "Sleep Disorder": input("Enter Sleep Disorder (Yes/No): ")
}

# Predict health status and provide nutritional advice
encode_and_predict(user_input)
12

Cross-validation scores for each fold: [1. 1. 1. 1. 1.]
Average cross-validation accuracy: 1.0

Model trained successfully on the full training set.

Test set accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

     Healthy       1.00      1.00      1.00       750
   Unhealthy       1.00      1.00      1.00      3250

    accuracy                           1.00      4000
   macro avg       1.00      1.00      1.00      4000
weighted avg       1.00      1.00      1.00      4000

Enter Gender (Male/Female): Male
Enter Age: 35
Enter Occupation (e.g., Student, Teacher, etc.): Doctor
Enter Sleep Duration in hours: 8
Enter Quality of Sleep (Good/Fair/Poor): good
Enter Physical Activity Level (Low/Medium/High): High
Enter Stress Level (Low/Medium/High): Low
Enter BMI Category (Underweight/Normal/Overweight/Obese): Obese
Enter Blood Pressure Category (Normal/Elevated/High): high
Enter Systolic Blood Pressure: 80
Enter Diastolic Blood Pressure: 100
Ente

12

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('/content/nutition_updated (1).csv')

# Select relevant columns for model training
features = data.drop(columns=['Health Status', 'Nutritional Needs', 'Person ID'], errors='ignore')
target = data['Health Status']

# Encode categorical features in the dataset
label_encoders = {}
for column in features.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    features[column] = le.fit_transform(features[column].fillna('Unknown'))
    label_encoders[column] = le

# Encode the target variable
target_encoder = LabelEncoder()
target = target_encoder.fit_transform(target)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize RandomForest model
rf = RandomForestClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(rf, X_train, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores for each fold:", cv_scores)
print("Average cross-validation accuracy:", np.mean(cv_scores))

# Train the model on the full training set
rf.fit(X_train, y_train)
print("\nModel trained successfully on the full training set.")

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
test_accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)
print("\nTest set accuracy:", test_accuracy)
print("\nClassification Report:\n", report)

# # Plot cross-validation scores
# plt.figure(figsize=(10, 6))
# sns.barplot(x=np.arange(1, 6), y=cv_scores, palette="viridis")
# plt.xlabel('Fold')
# plt.ylabel('Cross-validation Accuracy')
# plt.title('Cross-validation Accuracy for Each Fold')
# plt.ylim(0, 1)
# plt.show()

# # Plot test accuracy
# plt.figure(figsize=(6, 6))
# plt.bar(['Test Set Accuracy'], [test_accuracy], color='skyblue')
# plt.ylim(0, 1)
# plt.ylabel('Accuracy')
# plt.title('Model Accuracy on Test Set')
# plt.show()

# Function to encode user input and predict health status
def encode_and_predict(user_input):
    # Encode categorical features in the user input
    for column, le in label_encoders.items():
        if column in user_input:
            if user_input[column] not in le.classes_:
                le.classes_ = np.append(le.classes_, user_input[column])
            user_input[column] = le.transform([user_input[column]])[0]

    # Convert the user input to a DataFrame with the same columns as training data
    input_df = pd.DataFrame([user_input])
    input_df = input_df.reindex(columns=X_train.columns, fill_value=0)  # Fill missing columns with 0

    # Predict health status
    prediction = rf.predict(input_df)
    predicted_health_status = target_encoder.inverse_transform(prediction)[0]

    print("\nPredicted Health Status:", predicted_health_status)

    # Provide nutritional advice based on predicted health status
    if predicted_health_status == "Unhealthy":
        nutritional_needs = data[data['Health Status'] == "Unhealthy"]['Nutritional Needs'].iloc[0]
        print("\nSuggested Nutritional Advice:\n", nutritional_needs)
    else:
        print("Keep up the good work to maintain your health!")

# Collect user inputs
user_input = {
    "Gender": input("Enter Gender (Male/Female): "),
    "Age": int(input("Enter Age: ")),
    "Occupation": input("Enter Occupation (e.g., Student, Teacher, etc.): "),
    "Sleep Duration": float(input("Enter Sleep Duration in hours: ")),
    "Quality of Sleep": input("Enter Quality of Sleep (Good/Fair/Poor): "),
    "Physical Activity Level": input("Enter Physical Activity Level (Low/Medium/High): "),
    "Stress Level": input("Enter Stress Level (Low/Medium/High): "),
    "BMI Category": input("Enter BMI Category (Underweight/Normal/Overweight/Obese): "),
    "Blood Pressure Category": input("Enter Blood Pressure Category (Normal/Elevated/High): "),
    "Systolic": int(input("Enter Systolic Blood Pressure: ")),
    "Diastolic": int(input("Enter Diastolic Blood Pressure: ")),
    "Heart Rate": int(input("Enter Heart Rate: ")),
    "Daily Steps": int(input("Enter Daily Steps: ")),
    "Sleep Disorder": input("Enter Sleep Disorder (Yes/No): ")
}

# Predict health status and provide nutritional advice
encode_and_predict(user_input)


Cross-validation scores for each fold: [1. 1. 1. 1. 1.]
Average cross-validation accuracy: 1.0

Model trained successfully on the full training set.

Test set accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

     Healthy       1.00      1.00      1.00       750
   Unhealthy       1.00      1.00      1.00      3250

    accuracy                           1.00      4000
   macro avg       1.00      1.00      1.00      4000
weighted avg       1.00      1.00      1.00      4000

Enter Gender (Male/Female): Male
Enter Age: 40
Enter Occupation (e.g., Student, Teacher, etc.): Teacher
Enter Sleep Duration in hours: 5
Enter Quality of Sleep (Good/Fair/Poor): fair
Enter Physical Activity Level (Low/Medium/High): low
Enter Stress Level (Low/Medium/High): medium
Enter BMI Category (Underweight/Normal/Overweight/Obese): normal
Enter Blood Pressure Category (Normal/Elevated/High): elevated
Enter Systolic Blood Pressure: 120
Enter Diastolic Blood Pressure: