In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


df = pd.read_csv("train.csv")
df.drop(columns=["id"], inplace=True)

#Data cleaning preprocessing

# categorical columns
categorical_cols = ["Gender", "family_history_with_overweight", "FAVC", "CAEC", "SMOKE", "SCC", "MTRANS", "NObeyesdad"]
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# normalize numerical features
scaler = StandardScaler()
numerical_cols = ["Age", "Height", "Weight", "FCVC", "NCP", "CH2O", "FAF"]
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

def classify_obesity(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif 18.5 <= bmi < 25:
        return "Normal weight"
    elif 25 <= bmi < 30:
        return "Overweight"
    elif 30 <= bmi < 35:
        return "Obesity Type I"
    elif 35 <= bmi < 40:
        return "Obesity Type II"
    else:
        return "Obesity Type III"


df = pd.read_csv("train.csv")
obese_classes = ["Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"]
df["Obese"] = df["NObeyesdad"].apply(lambda x: 1 if x in obese_classes else 0)
df.drop(columns=["NObeyesdad", "id"], inplace=True)

df["BMI"] = df["Weight"] / (df["Height"] ** 2)
df["Obesity_Level"] = df["BMI"].apply(classify_obesity)

df_encoded = pd.get_dummies(df, columns=["Gender", "family_history_with_overweight", "FAVC", "CAEC", "SMOKE", "SCC", "MTRANS"], drop_first=True)

X = df_encoded.drop(columns=["Obese", "Obesity_Level"])
y = df_encoded["Obese"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss", random_state=42)
xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost Model Accuracy: {accuracy:.4f}")

weight = float(input("Enter your weight in kg: "))
height = float(input("Enter your height in meters: "))
bmi = weight / (height ** 2)
classification = classify_obesity(bmi)
print(f"Your BMI is {bmi:.2f}, which falls under '{classification}' category.")


Parameters: { "use_label_encoder" } are not used.



XGBoost Model Accuracy: 0.9774
Enter your weight in kg: 68
Enter your height in meters: 1.8
Your BMI is 20.99, which falls under 'Normal weight' category.
