# Análise de Modelos - Obesidade

Neste notebook, vamos comparar diferentes modelos de Machine Learning para prever níveis de obesidade.

In [1]:
import pandas as pd
import numpy as np
import joblib
import sys
import os

# Adicionar pasta raiz ao path para importar utils
sys.path.append(os.path.abspath('..'))
from utils import DropFeatures, OneHotEncodingNames, OrdinalFeature, MinMaxWithFeatNames

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [2]:
# Carregar Dados
try:
    df = pd.read_csv('../Obesity.csv')
except:
    df = pd.read_csv('obesidade/Obesity.csv')

X = df.drop('Obesity', axis=1)
y = df['Obesity']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Pipeline
pipeline = Pipeline([
    ('feature_dropper', DropFeatures()),
    ('OneHotEncoding', OneHotEncodingNames()),
    ('ordinal_feature', OrdinalFeature()),
    ('min_max_scaler', MinMaxWithFeatNames()),
])

X_processed = pipeline.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_encoded, test_size=0.2, random_state=42)

In [3]:
modelos = {
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier()
}

for nome, model in modelos.items():
    model.fit(X_train, y_train)
    acc = model.score(X_test, y_test)
    print(f'{nome}: {acc:.4f}')

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost: 0.9598
Random Forest: 0.9504
Decision Tree: 0.9409
