In [45]:
import json
from google.colab import files
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import (Dense, Embedding, Flatten, Input, concatenate)
from tensorflow.keras.models import Model
from sklearn.preprocessing import StandardScaler

In [60]:
try:
    from google.colab import drive

    !gdown "1Mgb_IhQHroQDuG6lQ039F6NF8Yeuy6QN"

    nutrition_json = '../content/sample_data/nutrition_dataTest.json'
except ImportError:
    nutrition_json = '../sample_data/nutrition_dataTest.json'

Downloading...
From: https://drive.google.com/uc?id=1Mgb_IhQHroQDuG6lQ039F6NF8Yeuy6QN
To: /content/nutrition_data.json
100% 66.4M/66.4M [00:00<00:00, 133MB/s]


In [61]:
df_nutrition = pd.read_json(nutrition_json)
df_nutrition.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Age                      200 non-null    int64 
 1   Weight                   200 non-null    int64 
 2   Gender                   200 non-null    object
 3   Height                   200 non-null    int64 
 4   Activity_Level           200 non-null    object
 5   Goal                     200 non-null    object
 6   Estimated_Calories       200 non-null    int64 
 7   Estimated_Carbohydrates  200 non-null    int64 
 8   Estimated_Protein_Min    200 non-null    int64 
 9   Estimated_Protein_Max    200 non-null    int64 
 10  Estimated_Fat            200 non-null    int64 
dtypes: int64(8), object(3)
memory usage: 17.3+ KB


In [62]:
df = pd.DataFrame(df_nutrition)

df['Activity_Level'].replace('Sedentary', 'Beginner', inplace=True)
df['Activity_Level'].replace('Light', 'Beginner', inplace=True)
df['Activity_Level'].replace('Moderate', 'Intermediate', inplace=True)
df['Activity_Level'].replace('Active', 'Intermediate', inplace=True)
df['Activity_Level'].replace('Very Active', 'Expert', inplace=True)
df['Activity_Level'].replace('Extra Active', 'Expert', inplace=True)

print(df)

     Age  Weight Gender  Height Activity_Level              Goal  \
0     19      90      f     174       Beginner   Maintain Weight   
1     19      90      f     174       Beginner  Mild Weight Loss   
2     19      90      f     174       Beginner       Weight Loss   
3     19      90      f     174       Beginner  Mild Weight Gain   
4     19      90      f     174       Beginner       Gain Weight   
..   ...     ...    ...     ...            ...               ...   
195   20      99      m     164       Beginner   Maintain Weight   
196   20      99      m     164       Beginner  Mild Weight Loss   
197   20      99      m     164       Beginner       Weight Loss   
198   20      99      m     164       Beginner  Mild Weight Gain   
199   20      99      m     164       Beginner       Gain Weight   

     Estimated_Calories  Estimated_Carbohydrates  Estimated_Protein_Min  \
0                  2410                      361                     90   
1                  2169          

In [63]:
df_nutrition.head()

Unnamed: 0,Age,Weight,Gender,Height,Activity_Level,Goal,Estimated_Calories,Estimated_Carbohydrates,Estimated_Protein_Min,Estimated_Protein_Max,Estimated_Fat
0,19,90,f,174,Beginner,Maintain Weight,2410,361,90,210,40
1,19,90,f,174,Beginner,Mild Weight Loss,2169,361,81,189,36
2,19,90,f,174,Beginner,Weight Loss,1903,361,71,166,31
3,19,90,f,174,Beginner,Mild Weight Gain,3012,361,112,263,50
4,19,90,f,174,Beginner,Gain Weight,3494,361,131,305,58


In [64]:
FEATURES = ['Age', 'Weight', 'Gender', 'Height', 'activity_level', 'goal']
LABEL_ENCODER = dict()

In [65]:
def get_col_to_encode(*dataframes):
    cols = set()

    for dataframe in dataframes:
        dataframe_cols = dataframe.select_dtypes(exclude=[np.number])
        cols.update(dataframe_cols)

        for col in dataframe_cols.columns:
            if col != 'name':
                LABEL_ENCODER[col] = LABEL_ENCODER.get(col, LabelEncoder().fit(dataframe[col]))

    if 'name' in cols:
        cols.remove('name')

    return cols

**EDA**

In [66]:
df_nutrition.head()

Unnamed: 0,Age,Weight,Gender,Height,Activity_Level,Goal,Estimated_Calories,Estimated_Carbohydrates,Estimated_Protein_Min,Estimated_Protein_Max,Estimated_Fat
0,19,90,f,174,Beginner,Maintain Weight,2410,361,90,210,40
1,19,90,f,174,Beginner,Mild Weight Loss,2169,361,81,189,36
2,19,90,f,174,Beginner,Weight Loss,1903,361,71,166,31
3,19,90,f,174,Beginner,Mild Weight Gain,3012,361,112,263,50
4,19,90,f,174,Beginner,Gain Weight,3494,361,131,305,58


In [67]:
df_nutrition.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Age                      200 non-null    int64 
 1   Weight                   200 non-null    int64 
 2   Gender                   200 non-null    object
 3   Height                   200 non-null    int64 
 4   Activity_Level           200 non-null    object
 5   Goal                     200 non-null    object
 6   Estimated_Calories       200 non-null    int64 
 7   Estimated_Carbohydrates  200 non-null    int64 
 8   Estimated_Protein_Min    200 non-null    int64 
 9   Estimated_Protein_Max    200 non-null    int64 
 10  Estimated_Fat            200 non-null    int64 
dtypes: int64(8), object(3)
memory usage: 17.3+ KB


In [68]:
print(
    df_nutrition.Age.value_counts(),
    df_nutrition.Weight.value_counts(),
    df_nutrition.Gender.value_counts(),
    df_nutrition.Height.value_counts(),
    df_nutrition.Activity_Level.value_counts(),
    sep='\n\n'
)

40    20
59    15
20    15
26    10
48    10
49    10
39    10
55    10
43    10
31    10
19     5
35     5
54     5
23     5
44     5
33     5
47     5
36     5
27     5
34     5
42     5
46     5
41     5
21     5
45     5
25     5
Name: Age, dtype: int64

99    15
74    15
93    15
48    10
56    10
82    10
53    10
92    10
81    10
97    10
50    10
83    10
85     5
95     5
72     5
73     5
90     5
77     5
57     5
55     5
89     5
91     5
86     5
49     5
46     5
Name: Weight, dtype: int64

m    110
f     90
Name: Gender, dtype: int64

184    15
169    15
164    15
174    10
168    10
172    10
160    10
159    10
176    10
194     5
152     5
195     5
187     5
183     5
171     5
153     5
158     5
182     5
163     5
198     5
155     5
177     5
175     5
186     5
181     5
150     5
185     5
173     5
Name: Height, dtype: int64

Beginner        75
Intermediate    70
Expert          55
Name: Activity_Level, dtype: int64


**Encoding**

In [70]:
# Define the dataset
data = pd.read_json('sample_data/nutrition_dataTest.json')
# Convert the dataset to a DataFrame
df = pd.DataFrame(data)

# Encode categorical columns to numerical values
label_encoders = {}
categorical_cols = ["Gender", "Activity_Level", "Goal"]

for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Define features and labels
features = ["Age", "Weight", "Gender", "Height", "Activity_Level", "Goal"]
labels = ["Estimated_Calories", "Estimated_Carbohydrates", "Estimated_Protein_Min", "Estimated_Protein_Max", "Estimated_Fat"]

# Process input data
X = df[features].values.astype('float32')
y = df[labels].values.astype('float32')

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(len(features),)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(labels))  # Output layer with the same number of nodes as labels
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=20, batch_size=1, verbose=1)

predictions = model.predict(X)  # X is your input data for predictions

# Print predictions
print("Predictions:", predictions)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Predictions: [[3360.5715    446.28006   132.8005    334.17703    48.129326]
 [3143.5964    418.455     124.45406   313.2691     46.014015]
 [3035.1084    404.5424    120.28084   302.81522    44.956367]
 [3252.0837    432.36758   128.62726   323.72305    47.07168 ]
 [3469.0593    460.19266   136.97379   344.63083    49.18702 ]
 [2983.4192    396.2927    117.86537   296.38382    43.66873 ]
 [2766.4438    368.4675    109.51891   275.47592    41.553413]
 [2657.9563    354.55496   105.345695  265.022      40.495735]
 [2874.9316    382.38016   113.6921    285.92987    42.61107 ]
 [3091.907     410.20535   122.03855   306.83768    44.726418]
 [2699.8733    358.86942   106.47883   268.09702    41.517494]
 [2482.8977    331.04425    98.1324    247.1892     39.40217 ]


In [73]:
# Define new input data for prediction
new_data = {
    "Age": 30,
    "Weight": 65,
    "Gender": "f",
    "Height": 160,
    "Activity_Level": "Very Active",
    "Goal": "Maintain Weight"
}

# Convert the new data into a DataFrame
new_df = pd.DataFrame([new_data])

# Encode categorical columns to numerical values using the same LabelEncoders used during training
for col in categorical_cols:
    new_df[col] = label_encoders[col].transform(new_df[col])

# Process input data
X_new = new_df[features].values.astype('float32')

# Make predictions for the new data
new_predictions = model.predict(X_new)

# Print predicted nutritional needs for the new data
print("Predicted Nutritional Needs:")
for i, label in enumerate(labels):
    print(f"{label}: {new_predictions[0][i]}")

Predicted Nutritional Needs:
Estimated_Calories: 2652.4453125
Estimated_Carbohydrates: 352.4355163574219
Estimated_Protein_Min: 104.65879821777344
Estimated_Protein_Max: 263.9916076660156
Estimated_Fat: 39.198204040527344
