In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, classification_report, f1_score, confusion_matrix
from catboost import CatBoostClassifier
import joblib

# Load engineered dataset
df = pd.read_csv("Personalized_Diet_RecommendationsFE.csv")

# Define features
features = [f for f in df.columns if f != 'Recommended_Meal_Plan']
X = df[features]
y = df['Recommended_Meal_Plan']

# Define numeric columns
numeric_cols = features

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train CatBoost Classifier
model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    l2_leaf_reg=5,
    verbose=100,
    random_state=42
)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
balanced_acc = balanced_accuracy_score(y_test, y_pred)
macro_f1 = f1_score(y_test, y_pred, average='macro')
print("CatBoost Balanced Accuracy:", balanced_acc)
print("Macro F1-Score:", macro_f1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Feature Importance
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': model.get_feature_importance()
})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print("\nFeature Importance:")
print(feature_importance)

# Save model and scaler
#model.save_model("catboost_engineered_model.cbm")
#joblib.dump(scaler, "scaler_engineered.pkl")
#print("\nModel saved as 'catboost_engineered_model.cbm'")

0:	learn: 1.3847892	total: 26.8ms	remaining: 13.3s
100:	learn: 1.2885844	total: 1.45s	remaining: 5.71s
200:	learn: 1.2116879	total: 2.89s	remaining: 4.29s
300:	learn: 1.1388030	total: 4.32s	remaining: 2.86s
400:	learn: 1.0765153	total: 5.76s	remaining: 1.42s
499:	learn: 1.0179021	total: 7.2s	remaining: 0us
CatBoost Balanced Accuracy: 0.22961752562773702
Macro F1-Score: 0.2298077568569446

Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.21      0.21       248
           1       0.26      0.23      0.24       259
           2       0.30      0.32      0.31       282
           3       0.15      0.17      0.16       211

    accuracy                           0.23      1000
   macro avg       0.23      0.23      0.23      1000
weighted avg       0.24      0.23      0.24      1000


Confusion Matrix:
[[51 64 63 70]
 [60 59 79 61]
 [71 52 90 69]
 [60 49 67 35]]

Feature Importance:
                    Feature  Importance
1        