In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import pickle

# Load and preprocess data
lbe = LabelEncoder()
obes = pd.read_csv("ObesData.csv")
df = obes.copy()

# Encoding categorical variables
categorical = pd.get_dummies(df["Gender"], dtype=int)
df.drop("Gender", axis=1, inplace=True)
df["gender"] = categorical["Male"]

categorical2 = pd.get_dummies(df["ObesityCategory"], dtype=int)
df[['Normalweight', 'Obese', 'Overweight', 'Underweight']] = categorical2
df = df.drop('ObesityCategory', axis=1)

categorical3 = pd.get_dummies(df['Diet Type'], dtype=int)
df[['Gluten Free', 'Standard', 'Vegan', 'Vegetarian']] = categorical3
df = df.drop('Diet Type', axis=1)

# Define features and target
x = df[["Age", "Height", "Weight", "BMI", "PhysicalActivityLevel", "Assigned_Breakfast_Calories", "Waist", "Diabetes", "Hypertension", 
        "Normalweight", "Obese", "Overweight", "Underweight", "Gluten Free", "Standard", "Vegan", "Vegetarian", "gender"]]
y = df["Assigned_Breakfast"]

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Initialize and train the RandomForestClassifier with corrected parameters
rf = RandomForestClassifier(random_state=45, max_depth=10, max_features='sqrt', min_samples_leaf=2, min_samples_split=10, n_estimators=200)
model = rf.fit(x_train, y_train)

# Predict on test data and evaluate model
y_pred = model.predict(x_test)
accuracy = model.score(x_test, y_test)
print(f"Model Accuracy: {accuracy:.4f}")

# Save the trained model as a pickle file
with open('breakfast_model.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Model has been saved as 'breakfast_model.pkl'")


Model Accuracy: 0.0550
Model has been saved as 'breakfast_model.pkl'
