In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

In [None]:
df = pd.read_csv("dataset.csv")
df.set_index("id", inplace= True)

In [None]:
df.family_history_with_overweight = [1 if i == "yes" else 0 for i in df.family_history_with_overweight]
df.FAVC = [1 if i == "yes" else 0 for i in df.FAVC]
df.SMOKE = [1 if i == "yes" else 0 for i in df.SMOKE]
df.SCC = [1 if i == "yes" else 0 for i in df.SCC]


In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
ohe = OneHotEncoder(sparse_output = False).set_output(transform = 'pandas')
ohe1 = ohe.fit_transform(df[['Gender']])
ohe2 = ohe.fit_transform(df[['MTRANS']])
ohe3 = ohe.fit_transform(df[['CAEC']])

df = pd.concat([df, ohe1, ohe2,ohe3], axis = 1).drop(columns = ['Gender', 'MTRANS','CAEC'])

df['CALC'].unique()
calc = ['Frequently', 'Sometimes', 'no', 'Always']
enc1 = OrdinalEncoder(categories = [calc])
df['CALC'] = enc1.fit_transform(df[['CALC']])

In [None]:
le = LabelEncoder()
df['target'] = le.fit_transform(df['NObeyesdad'])
X = df.drop(columns=['NObeyesdad', 'target'])  # features
y = df['target']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

from sklearn.feature_selection import SelectKBest, f_classif


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=46)


In [None]:
base_learners = [
    ('knn', KNeighborsClassifier(n_neighbors=7, weights= 'distance', metric='manhattan')),
    ('svc', SVC(probability=True, kernel='rbf', C=20, gamma='auto')),
    ('dt',DecisionTreeClassifier(max_depth=4,min_samples_split=2, random_state=46) ),
    ('rf', RandomForestClassifier(n_estimators=220, max_depth=5,max_features='sqrt')),
    ('gb', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)),
    ('xgb', XGBClassifier( eval_metric='mlogloss')),
    ('log',LogisticRegression(solver='newton-cg', max_iter=200, C = 1))
    ]

model =MLPClassifier(hidden_layer_sizes=(64,32),activation='relu', max_iter=500)

stacking_clf = StackingClassifier(
    estimators=base_learners,
    final_estimator=model,
    passthrough=True,
    cv=2
)

stacking_clf.fit(X_train, y_train)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

y_pred = stacking_clf.predict(X_test)
ypred2 =  model.predict(X_test)

print("Accuracy of baseline model :", accuracy_score(y_test, ypred2))
print("Accuracy of ensemble method :", accuracy_score(y_test, y_pred))

print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy of baseline model : 0.855009633911368
Accuracy of ensemble method : 0.9060693641618497
                     precision    recall  f1-score   support

Insufficient_Weight       0.90      0.96      0.93       497
      Normal_Weight       0.90      0.84      0.87       634
     Obesity_Type_I       0.90      0.90      0.90       594
    Obesity_Type_II       0.97      0.98      0.97       616
   Obesity_Type_III       1.00      1.00      1.00       827
 Overweight_Level_I       0.79      0.78      0.79       475
Overweight_Level_II       0.81      0.82      0.82       509

           accuracy                           0.91      4152
          macro avg       0.90      0.90      0.90      4152
       weighted avg       0.91      0.91      0.91      4152



In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[475  19   0   1   0   1   1]
 [ 46 535   0   0   0  44   9]
 [  1   0 533  14   4  12  30]
 [  0   0   9 603   0   1   3]
 [  0   0   1   1 825   0   0]
 [  4  32  13   0   0 372  54]
 [  0   9  36   5   0  40 419]]
