<a href="https://colab.research.google.com/github/CH2-PS020-FitSync/CH2-PS020-ML/blob/main/model/NutritionDataModelTF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json

import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
try:
    from google.colab import drive

    !gdown "1N9BApsWeS7dlLIdOV9v2yzZ_xrpE-O5u"

    nutrition_json = './nutrition_data.json'
except ImportError:
    nutrition_json = '../data/nutrition_data.json'

Downloading...
From: https://drive.google.com/uc?id=1N9BApsWeS7dlLIdOV9v2yzZ_xrpE-O5u
To: /content/nutrition_data.json
100% 72.9M/72.9M [00:01<00:00, 69.2MB/s]


In [3]:
df_nutrition = pd.read_json(nutrition_json)
df_nutrition.head()

Unnamed: 0,Age,Weight,Gender,Height,Activity_Level,Goal,Estimated_Calories,Estimated_Carbohydrates,Estimated_Protein_Min,Estimated_Protein_Max,Estimated_Fat
0,58,61,m,181,Light,Maintain Weight,1941,291,72,169,32
1,58,61,m,181,Light,Mild Weight Loss,1746,291,65,152,29
2,58,61,m,181,Light,Weight Loss,1533,291,57,134,25
3,58,61,m,181,Light,Mild Weight Gain,2426,291,90,212,40
4,58,61,m,181,Light,Gain Weight,2814,291,105,246,46


In [4]:
print(
    '\n\n'.join(
        i + '\t' + str(df_nutrition[i].unique()[:10]) for i in df_nutrition.columns
    )
)

Age	[58 26 37 34 60 41 29 28 48 56]

Weight	[61 46 69 65 93 94 67 48 91 83]

Gender	['m' 'f']

Height	[181 180 156 195 183 157 164 189 161 186]

Activity_Level	['Light' 'Extra Active' 'Active' 'Sedentary' 'Very Active' 'Moderate']

Goal	['Maintain Weight' 'Mild Weight Loss' 'Weight Loss' 'Mild Weight Gain'
 'Gain Weight']

Estimated_Calories	[1941 1746 1533 2426 2814 2485 2236 1963 3106 3603]

Estimated_Carbohydrates	[291 372 357 263 446 323 327 292 575 295]

Estimated_Protein_Min	[ 72  65  57  90 105  93  83  73 116 135]

Estimated_Protein_Max	[169 152 134 212 246 217 195 171 271 315]

Estimated_Fat	[32 29 25 40 46 41 37 51 60 39]


# Preprocessing

In [5]:
df_nutrition['Activity_Level'].replace({
        'Very Active|Extra Active': 'Expert',
        'Moderate|Active': 'Intermediate',
        'Sedentary|Light': 'Beginner'
    },
    regex=True,
    inplace=True
)

mean_protein = (df_nutrition.Estimated_Protein_Min + df_nutrition.Estimated_Protein_Max) / 2.

df_nutrition.insert(8, 'Estimated_Protein_Mean', mean_protein, allow_duplicates=True)

df_nutrition.drop(
    ['Estimated_Protein_Min', 'Estimated_Protein_Max'],
    axis=1,
    inplace=True
)

df_nutrition.head()

Unnamed: 0,Age,Weight,Gender,Height,Activity_Level,Goal,Estimated_Calories,Estimated_Carbohydrates,Estimated_Protein_Mean,Estimated_Fat
0,58,61,m,181,Beginner,Maintain Weight,1941,291,120.5,32
1,58,61,m,181,Beginner,Mild Weight Loss,1746,291,108.5,29
2,58,61,m,181,Beginner,Weight Loss,1533,291,95.5,25
3,58,61,m,181,Beginner,Mild Weight Gain,2426,291,151.0,40
4,58,61,m,181,Beginner,Gain Weight,2814,291,175.5,46


In [6]:
FEATURES = ['Age', 'Weight', 'Gender', 'Height', 'Activity_Level', 'Goal']
TARGET = ['Estimated_Calories', 'Estimated_Carbohydrates', 'Estimated_Protein_Mean', 'Estimated_Fat']
LABEL_ENCODER = dict()

In [7]:
categorical_cols = ['Gender', 'Activity_Level', 'Goal']

for col in categorical_cols:
    LABEL_ENCODER[col] = LabelEncoder()
    df_nutrition[col] = LABEL_ENCODER[col].fit_transform(df_nutrition[col])

joblib.dump(LABEL_ENCODER, '../nutrition_label.joblib')

['../nutrition_label.joblib']

# Training

In [8]:
X_train, X_test, y_train, y_test = train_test_split(df_nutrition[FEATURES], df_nutrition[TARGET], train_size=0.9)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(len(FEATURES),)),
    tf.keras.layers.Dense(len(TARGET))
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=['mae', 'mse']
)

model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=1000,
    epochs=20
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x783f7d5d54e0>

In [9]:
prediction = model.predict(X_test)

print("Prediction:", prediction)

loss = model.evaluate(X_test, y_test)

print("Loss:", loss)

Prediction: [[2405.3333    364.10446   151.99461    37.62687 ]
 [3349.441     354.95828   218.26846    56.35562 ]
 [3994.4148    443.83936   258.53064    64.84858 ]
 ...
 [2581.6897    310.20108   165.90297    41.885105]
 [3931.9927    424.473     256.07504    65.94976 ]
 [2652.5925    336.97678   169.66077    42.35469 ]]
Loss: [63254.11328125, 115.70964813232422, 63254.11328125]


In [10]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge

regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X_train, y_train)
regr.predict(X_test)

array([[2548.47087692,  354.14692436,  158.80218014,   41.98633579],
       [3386.7354663 ,  374.94758053,  211.18746425,   55.95673956],
       [3838.80520123,  437.93126479,  239.44438661,   63.49017121],
       ...,
       [2636.57105625,  318.60131487,  164.3059133 ,   43.4531329 ],
       [3807.9462936 ,  433.60575877,  237.51102404,   62.97714417],
       [2766.37629986,  336.60497138,  172.41749595,   45.6168994 ]])

In [11]:
# Weight goals must be transformed from actual goal in kgs to percentage of body mass to lose or gain
new_user = {
    'Age': 17,
    'Weight': 65,
    'Gender': 'f',
    'Height': 160,
    'Activity_Level': 'Beginner',
    'Goal': 'Maintain Weight'
}
user_df = pd.DataFrame([new_user])


for col in categorical_cols:
    user_df[col] = LABEL_ENCODER[col].transform(user_df[col])


X_new = user_df[FEATURES]
predict = model.predict(X_new)


print('Predicted Nutritional Needs:')
for i, target in enumerate(TARGET):
    print(f'{target}: {predict[0][i]}')

Predicted Nutritional Needs:
Estimated_Calories: 2447.214111328125
Estimated_Carbohydrates: 337.5760803222656
Estimated_Protein_Mean: 154.16798400878906
Estimated_Fat: 37.342689514160156
