In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset
df = pd.read_csv("Personalized_Diet_RecommendationsDC.csv")

# Define features (37) and target
features = ['Age', 'Height_cm', 'Weight_kg', 'BMI', 'Gender_Female',
            'Gender_Male', 'Chronic_Disease_Diabetes', 'Chronic_Disease_Heart_Disease',
            'Chronic_Disease_Hypertension', 'Chronic_Disease_Obesity']
X = df[features]
y = df['Recommended_Meal_Plan']

# Define numeric columns for scaling
numeric_cols = ['Age', 'Height_cm', 'Weight_kg', 'BMI']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])

# Train Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature Importance
feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print("\nFeature Importance (Top 10):")
print(feature_importance.head(10))

# Save model and scaler
#joblib.dump(model, "random_forest_model.pkl")
#joblib.dump(scaler, "scaler_rf.pkl")
#print("\nModel saved as 'random_forest_model.pkl'")

Random Forest Accuracy: 0.261

Classification Report:
              precision    recall  f1-score   support

           0       0.27      0.30      0.28       248
           1       0.28      0.25      0.26       259
           2       0.27      0.29      0.28       282
           3       0.21      0.19      0.20       211

    accuracy                           0.26      1000
   macro avg       0.26      0.26      0.26      1000
weighted avg       0.26      0.26      0.26      1000


Feature Importance (Top 10):
                 Feature  Importance
3                    BMI    0.149169
0                    Age    0.137974
2              Weight_kg    0.131421
1              Height_cm    0.130339
24   Alcohol_Consumption    0.027283
23   Genetic_Risk_Factor    0.026648
5            Gender_Male    0.025426
4          Gender_Female    0.025284
22  Food_Aversions_Sweet    0.023459
21  Food_Aversions_Spicy    0.022778
