In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("https://raw.githubusercontent.com/Kuna1Chauhan/EDA/main/ObesityDataSet_raw_and_data_sinthetic.csv")

# Prepare the data
X = data[['Age', 'Gender', 'Height', 'Weight']]
y = data['NObeyesdad']

# Convert categorical variables to numerical using one-hot encoding
X = pd.get_dummies(X, columns=['Gender'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
logreg_predictions = logreg.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_predictions)
print("Logistic Regression Accuracy:", logreg_accuracy)
print(classification_report(y_test, logreg_predictions))

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_predictions = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print(classification_report(y_test, rf_predictions))

# Support Vector Machine (SVM)
svm = SVC()
svm.fit(X_train, y_train)
svm_predictions = svm.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
print(classification_report(y_test, svm_predictions))

# Gradient Boosting
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
gb_predictions = gb.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print(classification_report(y_test, gb_predictions))


Logistic Regression Accuracy: 0.8888888888888888
                     precision    recall  f1-score   support

Insufficient_Weight       0.84      1.00      0.91        56
      Normal_Weight       0.98      0.74      0.84        62
     Obesity_Type_I       0.95      0.91      0.93        78
    Obesity_Type_II       0.91      1.00      0.95        58
   Obesity_Type_III       1.00      1.00      1.00        63
 Overweight_Level_I       0.74      0.86      0.79        56
Overweight_Level_II       0.81      0.68      0.74        50

           accuracy                           0.89       423
          macro avg       0.89      0.88      0.88       423
       weighted avg       0.90      0.89      0.89       423

Random Forest Accuracy: 0.9692671394799054
                     precision    recall  f1-score   support

Insufficient_Weight       0.93      0.98      0.96        56
      Normal_Weight       0.97      0.90      0.93        62
     Obesity_Type_I       0.97      0.99      0.98