In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import joblib

# Load dataset
df = pd.read_csv("Personalized_Diet_RecommendationsDC.csv")

# Define features (37) and target
features = ['Age', 'Height_cm', 'Weight_kg', 'BMI', 'Cholesterol_Level', 'Blood_Sugar_Level',
            'Daily_Steps', 'Exercise_Frequency', 'Sleep_Hours', 'Gender_Female',
            'Gender_Male', 'Chronic_Disease_Diabetes', 'Chronic_Disease_Heart_Disease',
            'Chronic_Disease_Hypertension', 'Chronic_Disease_Obesity']
X = df[features]
y = df['Recommended_Meal_Plan']

# Define numeric columns for scaling
numeric_cols = ['Age', 'Height_cm', 'Weight_kg', 'BMI', 'Cholesterol_Level', 'Blood_Sugar_Level',
                'Daily_Steps', 'Exercise_Frequency', 'Sleep_Hours']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])

# Train Logistic Regression
model = LogisticRegression(multi_class='multinomial', max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature Importance (absolute coefficients)
coef = np.abs(model.coef_).mean(axis=0)  # Average across classes
feature_importance = pd.DataFrame({'Feature': features, 'Importance': coef})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
print("\nFeature Importance (Top 10):")
print(feature_importance.head(10))

# Save model and scaler
#joblib.dump(model, "logistic_regression_model.pkl")
#joblib.dump(scaler, "scaler_logistic.pkl")
#print("\nModel saved as 'logistic_regression_model.pkl'")

Logistic Regression Accuracy: 0.255

Classification Report:
              precision    recall  f1-score   support

           0       0.27      0.29      0.28       248
           1       0.25      0.20      0.23       259
           2       0.27      0.32      0.29       282
           3       0.22      0.19      0.20       211

    accuracy                           0.26      1000
   macro avg       0.25      0.25      0.25      1000
weighted avg       0.25      0.26      0.25      1000


Feature Importance (Top 10):
                          Feature  Importance
3                             BMI    0.075335
11       Chronic_Disease_Diabetes    0.074501
2                       Weight_kg    0.073584
12  Chronic_Disease_Heart_Disease    0.065483
9                   Gender_Female    0.060609
1                       Height_cm    0.050404
13   Chronic_Disease_Hypertension    0.044753
10                    Gender_Male    0.031115
0                             Age    0.027391
14        Chron