<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/DEEP_LEARNING_ON_DIET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load dataset
# Replace 'file_path.csv' with your actual dataset file path
data = pd.read_csv('/content/nutrition_dataset_with_fiber_water_intake (1).csv')

# 1. Drop 'Person ID' and 'Health Status' columns
data = data.drop(columns=['Person ID', 'Health Status'])

# 2. Check for missing values and handle them
# Fill missing numerical values with the mean, and categorical values with the mode
for column in data.columns:
    if data[column].dtype == 'object':
        data[column].fillna(data[column].mode()[0], inplace=True)
    else:
        data[column].fillna(data[column].mean(), inplace=True)

# 3. Handle outliers using the IQR method
def handle_outliers(df, columns):
    for col in columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df[col] = np.clip(df[col], lower_bound, upper_bound)
    return df

numerical_columns = [
    'Age', 'Sleep Duration', 'Weight (kg)', 'Height (cm)', 'Systolic',
    'Diastolic', 'Heart Rate', 'Daily Steps', 'BMI Values'
]
data = handle_outliers(data, numerical_columns)

# Separate inputs and outputs
input_columns = numerical_columns + [
    'Gender', 'Occupation', 'Quality of Sleep', 'Activity Level',
    'Stress Level', 'Blood Pressure Category', 'BMI Class'
]
output_columns = [
    'Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)',
    'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)',
    'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)',
    'Fiber Intake (g)', 'Water Intake (L)'
]
X = data[input_columns]
y = data[output_columns]

# 4. Preprocess categorical and numerical data
# Binary encode Gender
X['Gender'] = X['Gender'].map({'Male': 0, 'Female': 1})

# Label encode categorical columns
label_columns = ['Quality of Sleep', 'Activity Level', 'Stress Level', 'Blood Pressure Category', 'BMI Class']
label_encoders = {col: LabelEncoder() for col in label_columns}
for col in label_columns:
    X[col] = label_encoders[col].fit_transform(X[col])

# One-hot encode Occupation
X = pd.get_dummies(X, columns=['Occupation'], drop_first=True)

# Standardize and normalize numerical columns
scaler = Pipeline(steps=[
    ('standardize', StandardScaler()),
    ('normalize', MinMaxScaler())
])
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])

# 5. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)


# 6. Define the Deep Learning model using Keras
def build_deep_learning_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(512, input_dim=input_dim, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(output_dim))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

# Initialize and train the model
input_dim = X_train.shape[1]
output_dim = y_train.shape[1]

deep_model = build_deep_learning_model(input_dim, output_dim)
deep_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=1)

# 7. Evaluate the model
y_pred = deep_model.predict(X_test)

# 8. Evaluate the model using performance metrics
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# 9. Save the model for later use
deep_model.save('deep_learning_model.h5')


Epoch 1/100


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].fillna(data[column].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].fillna(data[column].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value inst

[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 745805.5625 - mae: 426.8336 - val_loss: 44258.8984 - val_mae: 97.4548
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 54903.6367 - mae: 136.1062 - val_loss: 29569.2578 - val_mae: 80.8349
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 40726.4297 - mae: 108.4423 - val_loss: 23632.5898 - val_mae: 73.0580
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 33355.4062 - mae: 93.4321 - val_loss: 19607.5879 - val_mae: 66.9928
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 29029.1934 - mae: 84.2903 - val_loss: 17277.6348 - val_mae: 62.5270
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 26396.8398 - mae: 78.3812 - val_loss: 14666.5186 - val_mae: 58.1579
Epoch 7/100
[1m200/200[0m [



Mean Absolute Error (MAE): 17.507478713989258
Mean Squared Error (MSE): 1142.9605712890625
R² Score: 0.7112976908683777
