In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, classification_report, f1_score, confusion_matrix
from catboost import CatBoostClassifier
import joblib

# Load engineered dataset
df = pd.read_csv("Personalized_Diet_RecommendationsFE.csv")

# Define features
features = [f for f in df.columns if f != 'Recommended_Meal_Plan']
X = df[features]
y = df['Recommended_Meal_Plan']

# Define numeric columns
numeric_cols = features

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train CatBoost Classifier
model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    l2_leaf_reg=5,
    verbose=100,
    random_state=42
)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
balanced_acc = balanced_accuracy_score(y_test, y_pred)
macro_f1 = f1_score(y_test, y_pred, average='macro')
print("CatBoost Balanced Accuracy:", balanced_acc)
print("Macro F1-Score:", macro_f1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Feature Importance
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': model.get_feature_importance()
})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print("\nFeature Importance:")
print(feature_importance)

# Save model and scaler
#model.save_model("catboost_engineered_model.cbm")
#joblib.dump(scaler, "scaler_engineered.pkl")
#print("\nModel saved as 'catboost_engineered_model.cbm'")

0:	learn: 1.3853298	total: 171ms	remaining: 1m 25s
100:	learn: 1.2828461	total: 1.36s	remaining: 5.38s
200:	learn: 1.2079227	total: 2.57s	remaining: 3.82s
300:	learn: 1.1376220	total: 3.86s	remaining: 2.55s
400:	learn: 1.0744128	total: 5.04s	remaining: 1.24s
499:	learn: 1.0171442	total: 6.39s	remaining: 0us
CatBoost Balanced Accuracy: 0.22209327257751355
Macro F1-Score: 0.22217419747233402

Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.21      0.21       248
           1       0.26      0.21      0.23       259
           2       0.26      0.28      0.27       282
           3       0.17      0.19      0.18       211

    accuracy                           0.23      1000
   macro avg       0.22      0.22      0.22      1000
weighted avg       0.23      0.23      0.23      1000


Confusion Matrix:
[[53 48 73 74]
 [61 54 85 59]
 [74 61 78 69]
 [60 45 66 40]]

Feature Importance:
                   Feature  Importance
0       