In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError, CategoricalCrossentropy
from tensorflow.keras.metrics import MeanAbsoluteError, CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import numpy as np
import joblib # To save and load preprocessors and label encoders

# --- Step 1: Load the dataset ---
# The dataset 'meals.csv' is loaded into a pandas DataFrame.
df = pd.read_csv('meals.csv')

print("Dataset loaded successfully. First 5 rows:")
print(df.head())


Dataset loaded successfully. First 5 rows:
   age  gender  weight  height  disease_type  severity  \
0   56    Male    58.4     160       Obesity  Moderate   
1   69    Male   101.2     169      Diabetes      Mild   
2   46  Female    63.5     173  Hypertension      Mild   
3   32    Male    58.1     164           NaN      Mild   
4   60    Male    79.5     197      Diabetes  Moderate   

  physical_activity_level  cholesterol  blood_pressure  glucose  \
0                Moderate        173.3             133    116.3   
1                Moderate        199.2             120    137.1   
2               Sedentary        181.0             121    109.6   
3                Moderate        168.2             144    159.4   
4               Sedentary        200.4             172    182.3   

  dietary_restrictions  weekly_exercise_hours diet_recommendation  \
0                  NaN                    3.1            Balanced   
1                  NaN                    4.5            Low_Carb  

In [24]:
# --- Step 2: Define features (X) and target variables (y) ---
# Features are the input variables provided by the user.
features = [
    'age', 'gender', 'weight', 'height', 'disease_type', 'severity',
    'physical_activity_level', 'cholesterol', 'blood_pressure', 'glucose',
    'dietary_restrictions', 'dietary_habits', 'meal_type', 'budget'
]
# Targets are the variables we want to predict.
targets = ['diet_recommendation', 'weekly_exercise_hours', 'meal', 'ingredients']

X = df[features]
y = df[targets]

# --- Step 3: Identify categorical and numerical features in X ---
# This is crucial for applying appropriate preprocessing steps.
categorical_features_X = X.select_dtypes(include=['object']).columns
numerical_features_X = X.select_dtypes(include=['number']).columns

print("\nFeatures (X) and Targets (y) defined.")
print(f"Numerical features in X: {list(numerical_features_X)}")
print(f"Categorical features in X: {list(categorical_features_X)}")



Features (X) and Targets (y) defined.
Numerical features in X: ['age', 'weight', 'height', 'cholesterol', 'blood_pressure', 'glucose', 'budget']
Categorical features in X: ['gender', 'disease_type', 'severity', 'physical_activity_level', 'dietary_restrictions', 'dietary_habits', 'meal_type']


In [25]:
# --- Step 4: Create a column transformer for preprocessing features (X) ---
# OneHotEncoder is used for categorical features to convert them into a numerical format
# that neural networks can understand. 'handle_unknown='ignore'' ensures that
# new, unseen categories during prediction don't cause errors.
# StandardScaler is used for numerical features to scale them to a standard range (mean=0, variance=1).
# This helps neural networks converge faster and perform better.
preprocessor_X = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features_X),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_X)
    ])

# --- Step 5: Preprocess target variables (y) ---
# 'weekly_exercise_hours' is numerical (regression target).
# 'diet_recommendation', 'meal', 'ingredients' are categorical (classification targets).
# For classification targets, we need to convert them to one-hot encoded format.
# LabelEncoder is used first to map string labels to integers, then to_categorical
# converts these integers into a one-hot encoded array.

label_encoders = {} # Dictionary to store LabelEncoders for inverse transformation
y_processed = {} # Dictionary to store preprocessed target arrays

# Process weekly_exercise_hours (numerical regression target)
y_processed['weekly_exercise_hours'] = y['weekly_exercise_hours'].values.astype(np.float32)

# Process categorical targets (classification targets)
categorical_targets = ['diet_recommendation', 'meal', 'ingredients']
for target_name in categorical_targets:
    le = LabelEncoder()
    integer_encoded = le.fit_transform(y[target_name])
    onehot_encoded = to_categorical(integer_encoded)
    y_processed[target_name] = onehot_encoded
    label_encoders[target_name] = le # Store the encoder for inverse transformation

print("\nPreprocessing transformers and label encoders initialized.")



Preprocessing transformers and label encoders initialized.


In [26]:
# --- Step 6: Split data into training and testing sets ---
# The dataset is split into 80% for training and 20% for testing.
# 'random_state' ensures reproducibility of the split.
# We pass X and the original y DataFrame to train_test_split to keep indices aligned.
X_train, X_test, y_train_orig, y_test_orig = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 7: Apply preprocessing to the training and test data ---
# Transform features (X)
X_train_processed = preprocessor_X.fit_transform(X_train)
X_test_processed = preprocessor_X.transform(X_test)

# Prepare y_train and y_test for the Keras model (list of arrays for multiple outputs)
y_train_for_keras = [
    y_train_orig['weekly_exercise_hours'].values.astype(np.float32), # Numerical target
    to_categorical(label_encoders['diet_recommendation'].transform(y_train_orig['diet_recommendation'])),
    to_categorical(label_encoders['meal'].transform(y_train_orig['meal'])),
    to_categorical(label_encoders['ingredients'].transform(y_train_orig['ingredients']))
]

y_test_for_keras = [
    y_test_orig['weekly_exercise_hours'].values.astype(np.float32), # Numerical target
    to_categorical(label_encoders['diet_recommendation'].transform(y_test_orig['diet_recommendation'])),
    to_categorical(label_encoders['meal'].transform(y_test_orig['meal'])),
    to_categorical(label_encoders['ingredients'].transform(y_test_orig['ingredients']))
]

# Get the number of unique classes for each categorical target
num_diet_recommendations = len(label_encoders['diet_recommendation'].classes_)
num_meals = len(label_encoders['meal'].classes_)
num_ingredients = len(label_encoders['ingredients'].classes_)

print("\nData split into training and testing sets, and preprocessed.")
print(f"Shape of processed training features: {X_train_processed.shape}")
print(f"Number of unique diet recommendations: {num_diet_recommendations}")



Data split into training and testing sets, and preprocessed.
Shape of processed training features: (800, 28)
Number of unique diet recommendations: 3


In [27]:
# --- Step 8: Build the Neural Network Model (Keras Functional API for Multi-Output) ---
# The functional API allows for flexible model architectures, including multiple inputs and outputs.

# Input Layer
input_layer = Input(shape=(X_train_processed.shape[1],), name='input_features')

# Common Dense Layers (shared by all outputs)
x = Dense(128, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)

# Output Branch for 'weekly_exercise_hours' (Regression)
weekly_exercise_hours_output = Dense(1, activation='linear', name='weekly_exercise_hours_output')(x)

# Output Branch for 'diet_recommendation' (Classification)
diet_recommendation_output = Dense(num_diet_recommendations, activation='softmax', name='diet_recommendation_output')(x)

# Output Branch for 'meal' (Classification)
meal_output = Dense(num_meals, activation='softmax', name='meal_output')(x)

# Output Branch for 'ingredients' (Classification)
ingredients_output = Dense(num_ingredients, activation='softmax', name='ingredients_output')(x)

# Create the Keras Model
model = Model(
    inputs=input_layer,
    outputs=[
        weekly_exercise_hours_output,
        diet_recommendation_output,
        meal_output,
        ingredients_output
    ]
)

# --- Step 9: Compile the Model ---
# Define loss functions and metrics for each output.
# 'mse' (Mean Squared Error) for regression, 'categorical_crossentropy' for one-hot encoded classification.
# 'mae' (Mean Absolute Error) for regression, 'accuracy' for classification.
model.compile(
    optimizer=Adam(learning_rate=0.001), # Adam optimizer is a good general-purpose choice
    loss={
        'weekly_exercise_hours_output': MeanSquaredError(),
        'diet_recommendation_output': CategoricalCrossentropy(),
        'meal_output': CategoricalCrossentropy(),
        'ingredients_output': CategoricalCrossentropy()
    },
    metrics={
        'weekly_exercise_hours_output': MeanAbsoluteError(),
        'diet_recommendation_output': CategoricalAccuracy(),
        'meal_output': CategoricalAccuracy(),
        'ingredients_output': CategoricalAccuracy()
    }
)

# Display model summary
model.summary()
print("\nNeural Network model built and compiled.")



Neural Network model built and compiled.


In [28]:
# --- Step 10: Train the Model ---
# The model learns the relationships from the training data.
# 'epochs' is the number of times the model will iterate over the entire dataset.
# 'batch_size' is the number of samples per gradient update.
# 'validation_split' reserves a portion of the training data for validation during training.
print("\nTraining the Neural Network model...")
history = model.fit(
    X_train_processed,
    y_train_for_keras,
    epochs=100, # You might need to adjust this for optimal performance
    batch_size=32,
    validation_split=0.2, # Use 20% of training data for validation
    verbose=1 # Show training progress
)
print("Training complete.")



Training the Neural Network model...
Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 96ms/step - diet_recommendation_output_categorical_accuracy: 0.3339 - diet_recommendation_output_loss: 1.1290 - ingredients_output_categorical_accuracy: 0.1246 - ingredients_output_loss: 1.8754 - loss: 35.1054 - meal_output_categorical_accuracy: 0.1937 - meal_output_loss: 1.8449 - weekly_exercise_hours_output_loss: 30.2561 - weekly_exercise_hours_output_mean_absolute_error: 4.6848 - val_diet_recommendation_output_categorical_accuracy: 0.4125 - val_diet_recommendation_output_loss: 1.0825 - val_ingredients_output_categorical_accuracy: 0.2188 - val_ingredients_output_loss: 1.8543 - val_loss: 18.4589 - val_meal_output_categorical_accuracy: 0.2125 - val_meal_output_loss: 1.8430 - val_weekly_exercise_hours_output_loss: 13.6790 - val_weekly_exercise_hours_output_mean_absolute_error: 3.0765
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - diet

In [29]:
# --- Step 13: Inverse Transform Predictions for Categorical Targets ---
# Convert one-hot encoded predictions back to original string labels.
y_pred_diet_recommendation = label_encoders['diet_recommendation'].inverse_transform(np.argmax(y_pred_diet_recommendation_onehot, axis=1))
y_pred_meal = label_encoders['meal'].inverse_transform(np.argmax(y_pred_meal_onehot, axis=1))
y_pred_ingredients = label_encoders['ingredients'].inverse_transform(np.argmax(y_pred_ingredients_onehot, axis=1))

# Create a DataFrame for predicted values for easier comparison
y_pred_df = pd.DataFrame({
    'weekly_exercise_hours': y_pred_weekly_exercise_hours,
    'diet_recommendation': y_pred_diet_recommendation,
    'meal': y_pred_meal,
    'ingredients': y_pred_ingredients
}, index=y_test_orig.index)

# --- Step 14: Calculate and Print Final Metrics (using scikit-learn for clarity) ---
print("\n--- FINAL MODEL ACCURACY AND METRICS ---")

# Weekly Exercise Hours (Regression)
mae_weekly = mean_absolute_error(y_test_orig['weekly_exercise_hours'], y_pred_df['weekly_exercise_hours'])
r2_weekly = r2_score(y_test_orig['weekly_exercise_hours'], y_pred_df['weekly_exercise_hours'])
print(f"  Weekly Exercise Hours (Regression):")
print(f"    Mean Absolute Error (MAE): {mae_weekly:.4f}")
print(f"    R-squared (R2): {r2_weekly:.4f}")

# Categorical Targets (Classification Accuracy)
accuracy_diet = accuracy_score(y_test_orig['diet_recommendation'], y_pred_df['diet_recommendation'])
accuracy_meal = accuracy_score(y_test_orig['meal'], y_pred_df['meal'])
accuracy_ingredients = accuracy_score(y_test_orig['ingredients'], y_pred_df['ingredients'])

print(f"  Diet Recommendation (Classification):")
print(f"    Accuracy: {accuracy_diet:.4f}")
print(f"  Meal (Classification):")
print(f"    Accuracy: {accuracy_meal:.4f}")
print(f"  Ingredients (Classification):")
print(f"    Accuracy: {accuracy_ingredients:.4f}")

print("\n--- Example Predictions vs. Actual (first 5 test samples) ---")
print("Actual:")
print(y_test_orig.head())
print("\nPredicted:")
print(y_pred_df.head())



--- FINAL MODEL ACCURACY AND METRICS ---
  Weekly Exercise Hours (Regression):
    Mean Absolute Error (MAE): 2.8935
    R-squared (R2): -0.5332
  Diet Recommendation (Classification):
    Accuracy: 1.0000
  Meal (Classification):
    Accuracy: 0.9950
  Ingredients (Classification):
    Accuracy: 0.9950

--- Example Predictions vs. Actual (first 5 test samples) ---
Actual:
    diet_recommendation  weekly_exercise_hours                          meal  \
521          Low_Sodium                    3.4           Rice and Fish Curry   
737            Balanced                    9.5         Grilled Chicken Salad   
740            Balanced                    6.8  Vegetable Stir Fry with Tofu   
660          Low_Sodium                    5.8   Lentil Soup with Brown Rice   
411            Low_Carb                    2.9  Vegetable Stir Fry with Tofu   

                                           ingredients  
521       White Rice, Fish, Coconut Milk, Curry Spices  
737  Grilled Chicken, Lettuc

In [30]:
# --- Step 15: Save the trained model and preprocessors ---
# Save the Keras model in the recommended SavedModel format
model.save('diet_recommendation_nn_model.keras') # .keras is the recommended extension

# Save the preprocessor and label encoders
joblib.dump(preprocessor_X, 'preprocessor_X_nn.pkl')
joblib.dump(label_encoders, 'label_encoders_nn.pkl')
print("\nNeural Network model, preprocessor, and label encoders saved successfully.")



Neural Network model, preprocessor, and label encoders saved successfully.
