In [1]:
# --- นำเข้าคลังที่จำเป็น ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# --- โหลดและเตรียมข้อมูล ---
df = pd.read_csv('food_menu.csv')

class_counts = df['food_type'].value_counts()
max_count = class_counts.max()
df_balanced = pd.DataFrame()

for food_class, count in class_counts.items():
    class_df = df[df['food_type'] == food_class]
    if count < max_count:
        repeat_factor = int(np.ceil(max_count / count))
        class_df_duplicated = pd.concat([class_df] * repeat_factor, ignore_index=True)
        class_df_balanced = class_df_duplicated.sample(n=max_count, random_state=42)
    else:
        class_df_balanced = class_df
    df_balanced = pd.concat([df_balanced, class_df_balanced], ignore_index=True)

df = df_balanced

df['category_encoded'] = LabelEncoder().fit_transform(df['category'])
df['sugar_to_carb_ratio'] = df['sugar'] / (df['carbohydrate'] + 1e-8)
df['fat_to_calorie_ratio'] = df['fat'] / (df['calories'] + 1e-8)

X = df[['calories', 'protein', 'carbohydrate', 'sugar', 'fat', 'sodium',
        'category_encoded', 'sugar_to_carb_ratio', 'fat_to_calorie_ratio']]
y = df['food_type']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)

# --- สร้างและประเมินโมเดล XGBoost ---
model = DecisionTreeClassifier(criterion='entropy', random_state=42)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=le.classes_)

print(f"XGBoost Accuracy: {acc:.4f}")
print("Classification Report:")
print(report)

XGBoost Accuracy: 0.8077
Classification Report:
              precision    recall  f1-score   support

   น้ำตาลต่ำ       0.86      0.78      0.82        23
       สมดุล       0.69      0.56      0.62        16
  โซเดียมต่ำ       0.90      0.90      0.90        20
    ไขมันต่ำ       0.75      0.95      0.84        19

    accuracy                           0.81        78
   macro avg       0.80      0.80      0.79        78
weighted avg       0.81      0.81      0.80        78

