In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
class ObesityModel:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path)
        self.preprocessing()
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    def preprocessing(self):
        """ Encoding categorical features and normalizing numerical features """
        self.categorical_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 
                                 'SMOKE', 'SCC', 'CALC', 'MTRANS']
        self.numeric_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
        
        self.encoders = {col: LabelEncoder() for col in self.categorical_cols}
        for col in self.categorical_cols:
            self.data[col] = self.encoders[col].fit_transform(self.data[col])
        
        self.scaler = StandardScaler()
        self.data[self.numeric_cols] = self.scaler.fit_transform(self.data[self.numeric_cols])
        
        self.label_encoder = LabelEncoder()
        self.data['NObeyesdad'] = self.label_encoder.fit_transform(self.data['NObeyesdad'])
    
    def train_model(self):
        """ Train the Random Forest model """
        X = self.data.drop(columns=['NObeyesdad'])
        y = self.data['NObeyesdad']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train)
    
    def predict(self, input_data):
        """ Predict using trained model """
        input_df = pd.DataFrame([input_data])
        for col in self.encoders:
            input_df[col] = self.encoders[col].transform([input_data[col]])
        input_df[self.numeric_cols] = self.scaler.transform(input_df[self.numeric_cols])
        pred = self.model.predict(input_df)
        prob = self.model.predict_proba(input_df)
        return self.label_encoder.inverse_transform(pred)[0], prob
    
    def show_raw_data(self):
        """ Menampilkan raw data """
        return self.data.head()

In [11]:
# Inisialisasi model
obesity_model = ObesityModel("E:\\ML\\archive (10)\\ObesityDataSet_raw_and_data_sinthetic.csv")
obesity_model.train_model()



In [14]:
obesity_model.show_raw_data()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,0,-0.521741,-0.87438,-0.862558,1,0,-0.78481,0.404102,2,0,-0.013141,0,-1.188028,0.562005,3,3,1
1,0,-0.521741,-1.94566,-1.168077,1,0,1.088307,0.404102,2,1,1.618701,1,2.339676,-1.080619,2,3,1
2,1,-0.207057,1.053924,-0.366089,1,0,-0.78481,0.404102,2,0,-0.013141,0,1.163774,0.562005,1,3,1
3,1,0.422312,1.053924,0.015809,0,0,1.088307,0.404102,2,0,-0.013141,0,1.163774,-1.080619,1,4,5
4,1,-0.364399,0.839668,0.122741,0,0,-0.78481,-2.166941,2,0,-0.013141,0,-1.188028,-1.080619,2,3,6


In [16]:
input_data = {
    "Gender": "Male",
    "Age": 25,
    "Height": 1.75,
    "Weight": 80.0,
    "family_history_with_overweight": "yes",
    "FAVC": "no",
    "FCVC": 2.0,
    "NCP": 3.0,
    "CAEC": "Sometimes",
    "SMOKE": "no",
    "CH2O": 2.0,
    "SCC": "no",
    "FAF": 1.5,
    "TUE": 0.5,
    "CALC": "Frequently",
    "MTRANS": "Public_Transportation"
}

In [17]:
obesity_model.predict(input_data)

('Overweight_Level_II', array([[0.  , 0.09, 0.1 , 0.05, 0.  , 0.21, 0.55]]))

In [19]:
print(obesity_model.predict(input_data))

('Overweight_Level_II', array([[0.  , 0.09, 0.1 , 0.05, 0.  , 0.21, 0.55]]))


In [20]:
import pickle as pkl
pkl.dump(obesity_model, open("TugasOOP_MD.pkl", "wb"))