In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the CSV file
df = pd.read_csv("ObesData.csv")

# Separate the target variable and features
y = df["ObesityCategory"]

# Convert categorical variables to one-hot encoding
categorical = pd.get_dummies(df["Gender"], dtype=int)
X = pd.concat([categorical, df[["Age", "Height", "Weight", "BMI", "PhysicalActivityLevel", "Waist_Width"]]], axis=1)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Create and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

# Evaluate the model
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Save the model
with open('obesity_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)


Accuracy: 1.0
