In [1]:
import pandas as pd
import numpy as np
import pickle
import sys

def load_pipeline(model_path="best_model.pkl"):
    with open(model_path, "rb") as f:
        d = pickle.load(f)
    return d

def preprocess_input(input_data, pipeline):
    # Pastikan urutan fitur sama, isi missing
    feature_names = pipeline["feature_names"]
    numerical = pipeline["numerical"]

    # Jika input satu baris dict, konversi ke DataFrame
    if isinstance(input_data, dict):
        input_df = pd.DataFrame([input_data])
    else:
        input_df = input_data.copy()

    # Pastikan semua kolom yang diperlukan ada
    for col in feature_names:
        if col not in input_df.columns:
            input_df[col] = 0  # default 0 (untuk one-hot)

    # Isi numerik yang null dengan median/mean (default: 0, boleh diganti sesuai pipeline training)
    for col in numerical:
        if input_df[col].isnull().any():
            input_df[col] = input_df[col].fillna(0)

    # Scaling numerik
    scaler = pipeline["scaler"]
    input_df[numerical] = scaler.transform(input_df[numerical])

    # Susun ulang kolom agar sesuai urutan saat training
    input_df = input_df[feature_names]
    return input_df

def predict(input_data, model_path="best_model.pkl"):
    pipeline = load_pipeline(model_path)
    input_df = preprocess_input(input_data, pipeline)
    model = pipeline["model"]
    y_pred = model.predict(input_df)

    # Decode label ke bentuk asli
    label_encoder = pipeline["label_encoder"]
    pred_label = label_encoder.inverse_transform(y_pred)
    return pred_label

# -------- Example usage CLI ------------
if __name__ == "__main__":
    # Contoh input dict (isi sesuai fitur hasil one-hot + numerik)
    sample = {
        "Age": 22, "Height": 1.74, "Weight": 75.0, "FCVC": 3, "NCP": 3, "CH2O": 1, "FAF": 1, "TUE": 1,
        # Categorical one-hot yang dipakai model
        "Gender_Male": 1,
        "family_history_with_overweight_yes": 1,
        "FAVC_yes": 1,
        "SMOKE_yes": 0,
        "SCC_yes": 0,
        "CAEC_Often": 1,
        "CAEC_Sometimes": 0,
        "CAEC_Never": 0,
        "CALC_Sometimes": 1,
        "CALC_Never": 0,
        "MTRANS_Public_Transportation": 1,
        "MTRANS_Automobile": 0,
        "MTRANS_Other": 0
    }
    result = predict(sample, "best_model.pkl")
    print("Prediction:", result[0])


Prediction: Normal_Weight
