In [32]:
import pandas as pd
import pickle
import numpy as np
# 讀取測試數據
test_data = pd.read_csv("data/test.csv")

In [33]:
# **保存原始的 "id" 欄位
test_ids = test_data["id"] if "id" in test_data.columns else range(len(test_data))

In [34]:
# 測試數據包含 "id" 欄位，先移除
test_data.drop(columns=["id"], inplace=True)

In [35]:
# 讀取 "preprocessing.pkl"
with open("data/preprocessing.pkl", "rb") as f:
    preprocessor = pickle.load(f)

In [36]:
# 從preprocessing.pkl取得
# "StandardScaler", "one_hot_fields", "CAEC_mapping", "CALC_mapping"
std_scaler = preprocessor["scaler"]
one_hot_fields = preprocessor["one_hot_fields"]  # train.csv 的 One-Hot 欄位
CAEC_mapping = preprocessor["CAEC_mapping"]
CALC_mapping = preprocessor["CALC_mapping"]

In [37]:
# One-Hot Encoding "Gender"
test_data = pd.get_dummies(test_data, columns=["Gender"], drop_first=True)

In [38]:
# 標準化 ("Age", "Height", "Weight")
test_data[["Age", "Height", "Weight"]] = std_scaler.transform(test_data[["Age", "Height", "Weight"]])

In [39]:
# 0/1 類型轉換
yes_no_mapping = {"yes": 1, "no": 0}
binary_features = ["family_history_with_overweight", "FAVC", "SMOKE", "SCC"]
for feature in binary_features:
    test_data[feature] = test_data[feature].map(yes_no_mapping).astype(int)

In [40]:
# Label Encoding
test_data["CAEC"] = test_data["CAEC"].map(CAEC_mapping)
test_data["CALC"] = test_data["CALC"].map(CALC_mapping)

In [42]:
# One-Hot Encoding for "MTRANS"**
def one_hot_encode(data, field_name, known_values):
    """確保 test.csv 的 One-Hot 欄位與 train.csv 一致"""
    for value in known_values:
        new_col = f"{field_name}_{value}"
        data[new_col] = (data[field_name] == value).astype(int)
    return data

# 取得 train.csv 中 MTRANS 的所有類別
MTRANS_categories = [col.replace("MTRANS_", "") for col in one_hot_fields]
test_data = one_hot_encode(test_data, "MTRANS", MTRANS_categories)

# 刪除原始的 MTRANS 欄位
test_data.drop(columns=["MTRANS"], inplace=True)

In [43]:
test_data

Unnamed: 0,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,Gender_Male,MTRANS_Public_Transportation,MTRANS_Automobile,MTRANS_Walking,MTRANS_Motorbike,MTRANS_Bike
0,0.537644,1.695675,1.241770,1,1,2.938616,3.000000,1,0,2.825629,0,0.855400,0.000000,1.0,True,1,0,0,0,0
1,-0.499620,-1.148152,-0.829748,1,1,2.000000,1.000000,1,0,3.000000,0,1.000000,0.000000,1.0,False,1,0,0,0,0
2,0.379434,-0.651587,0.898933,1,1,3.000000,3.000000,1,0,2.621877,0,0.000000,0.250502,1.0,False,1,0,0,0,0
3,-0.503267,-1.685011,0.598259,1,1,2.000000,2.977909,1,0,2.786417,0,0.094851,0.000000,1.0,True,1,0,0,0,0
4,0.379434,-0.834373,0.642469,1,1,3.000000,3.000000,1,0,2.653531,0,0.000000,0.741069,1.0,False,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13835,-0.090361,0.242116,-0.373686,1,0,2.813234,3.000000,1,0,1.000000,0,0.807076,0.778632,1.0,True,1,0,0,0,0
13836,0.906866,-1.262687,-0.981385,0,1,3.000000,3.000000,1,0,2.000000,0,0.000000,0.000000,1.0,False,1,0,0,0,0
13837,-0.159318,-1.313689,-1.649473,0,1,3.000000,2.273740,2,0,2.000000,0,1.949840,1.000000,1.0,False,1,0,0,0,0
13838,-0.499620,-0.919083,-1.322568,1,1,2.000000,3.000000,1,0,2.000000,0,3.000000,2.000000,0.0,True,1,0,0,0,0


In [16]:
# 確保所有數據型態為 float32 或 int 
test_data = test_data.astype(np.float32)  # XGBoost 只接受 float32 或 int ，但 bool也可

In [44]:
# 載入 XGBoost 模型
with open("./model/250302_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

In [45]:
# 進行測試集預測
predictions = loaded_model.predict(test_data)

In [46]:
# 預測結果，為一個 numpy array
predictions

array([5, 2, 6, ..., 0, 1, 5], dtype=int64)

In [48]:
# 轉換 NObeyesdad 類別回原始名稱
NObeyesdad_mapping = {
    0: "Insufficient_Weight", 
    1: "Normal_Weight", 
    2: "Overweight_Level_I",
    3: "Overweight_Level_II", 
    4: "Obesity_Type_I", 
    5: "Obesity_Type_II", 
    6: "Obesity_Type_III"
}
# 轉換成pandas使用 map 方法將數值預測結果轉換為對應的體重類別名稱
predictions = pd.Series(predictions).map(NObeyesdad_mapping).tolist()

In [49]:
# 儲存最終 submission.csv
submission = pd.DataFrame({"id": test_ids, "NObeyesdad": predictions})
submission.to_csv("submission.csv", index=False)
print("結果儲存至 submission.csv")