In [16]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.applications import DenseNet201
import os, numpy as np
from keras.callbacks import ReduceLROnPlateau

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Load metadata
metadata = pd.read_csv("food_nutrition.csv")

for path in metadata['images_path'].values[:5]:
    if not os.path.exists(os.getcwd() + f"/{path}"):
        print(f"File not found: {path}")

# Extract image paths, ingredients, and nutrition values
image_paths = [os.getcwd() + f"/{p}" for p in metadata['images_path'].values]
nutrition_values = metadata.drop(columns=['images_path', 'ingredients']).values

Num GPUs Available:  1


In [17]:

# Split the data into train, validation, and test sets
train_paths, test_paths, train_nutrition, test_nutrition = train_test_split(
    image_paths, nutrition_values, test_size=0.1, random_state=42)
val_paths, test_paths, val_nutrition, test_nutrition = train_test_split(
    test_paths, test_nutrition, test_size=0.7, random_state=42)

# nutrition_mean = np.mean(train_nutrition, axis=0)
# nutrition_std = np.std(train_nutrition, axis=0)
# nutrition_mean, nutrition_std
len(train_paths), len(val_paths), len(test_paths)

(4343, 144, 339)

In [18]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform the training data
train_nutrition = scaler.fit_transform(train_nutrition)

# Transform the validation and test data using the fitted scaler
val_nutrition = scaler.transform(val_nutrition)
test_nutrition = scaler.transform(test_nutrition)

scaler.mean_, np.sqrt(scaler.var_)

(array([2.56363430e+02, 7.33971313e+00, 2.61689099e+01, 9.00596559e+00,
        5.73073905e+00, 1.49130057e+01, 2.49028021e-02, 3.62616988e+00,
        3.56952805e-02]),
 array([7.11202240e+01, 3.18900623e+00, 1.02698777e+01, 1.03793033e+01,
        3.77882835e+00, 7.53279707e+00, 2.63631380e-02, 2.63860028e+00,
        1.98664601e-02]))

In [19]:

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to the required size
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Function to preprocess nutrition values
def preprocess_nutrition_values(nutrition_values):
    return tf.convert_to_tensor(nutrition_values, dtype=tf.float64)

# Create a TensorFlow Dataset from the image paths and nutrition values
def create_tf_dataset(image_paths, nutrition_values):
    # Create a dataset of image paths
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)

    # Create a dataset of nutrition values
    nutrition_ds = tf.data.Dataset.from_tensor_slices(nutrition_values)

    # Map the datasets to the preprocessing functions
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    nutrition_ds = nutrition_ds.map(preprocess_nutrition_values, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # Zip the image and nutrition datasets together
    dataset = tf.data.Dataset.zip((image_ds, nutrition_ds))

    return dataset

# Create the datasets
train_dataset = create_tf_dataset(train_paths, train_nutrition).batch(8)
val_dataset = create_tf_dataset(val_paths, val_nutrition).batch(8)
test_dataset = create_tf_dataset(test_paths, test_nutrition).batch(8)

# Load the MobileNetV2 model, excluding the top classification layer
base_model = DenseNet201(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.BatchNormalization()(x)

# Define separate outputs for each nutritional value with BatchNormalization layers
def add_regression_head(x, name):
    x = layers.Dense(512, activation="relu", name=f"{name}_dense_1")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation="relu", name=f"{name}_dense_2")(x)
    x = layers.BatchNormalization()(x)
    return layers.Dense(1, activation='linear', name=f"{name}")(x)

output_names = ["energy", "protein", "carbohydrates", "sugars", "fiber", "fat", "cholesterol", "minerals", "vitamins"]
outputs = [add_regression_head(x, name) for name in output_names]

annealer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-5)

# Create the model
model = models.Model(inputs=base_model.input, outputs=outputs)
# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model with separate loss functions for each output
losses = {name: 'mean_squared_error' for name in output_names}


model.compile(optimizer='adam', loss=losses)
# Train the model
model.fit(train_dataset, validation_data=val_dataset, epochs=30, verbose=1, callbacks=[annealer])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 19: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 20/30
Epoch 21/30
Epoch 21: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 28: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x189e89e5f90>

In [21]:
model.save("foodNutrition.h5")