In [1]:
from pathlib import Path

import pandas as pd
import tensorflow as tf
from PIL import Image
from tensorflow import keras

In [2]:
class OneHotEncoder:
    def __init__(self, all_category_list, all_ingredient_list):
        self.all_food_categories = all_category_list
        self.all_food_categories.sort()
        self.all_food_categories_integer_encoded = (
            self.__encode_categories_to_integers()
        )
        self.all_ingredients = all_ingredient_list
        self.all_ingredients.sort()
        self.all_ingredients_integer_encoded = self.__encode_ingredients_to_integers()

    def get_category_one_hot_encoding(self, category_name):
        index = self.all_food_categories_integer_encoded[category_name]
        assert index != None, f"{category_name} does not have an integer mapping"
        num_classes = len(self.all_food_categories)
        return keras.utils.to_categorical(index, num_classes)

    def get_ingredients_one_hot_encoding(self, ingredient_list):
        ingredient_list = list(
            map(lambda x: self.__transform_ingredient_to_integer(x), ingredient_list)
        )
        multi_one_hot_layer = tf.keras.layers.CategoryEncoding(
            num_tokens=len(self.all_ingredients), output_mode="multi_hot"
        )
        return multi_one_hot_layer(ingredient_list)

    def __transform_ingredient_to_integer(self, ingredient_name):
        index = self.all_ingredients_integer_encoded[ingredient_name]
        assert index != None, f"{ingredient_name} does not have an integer mapping"
        return index

    def __encode_categories_to_integers(self):
        return {
            category_name: index
            for index, category_name in enumerate(self.all_food_categories)
        }

    def __encode_ingredients_to_integers(self):
        return {
            ingredient_name: index
            for index, ingredient_name in enumerate(self.all_ingredients)
        }

In [26]:
class Recipes5k:
    def __init__(self):
        self.dir_path = Path("../Food Datasets/final-dataset")
        self.metadata = self.load_recipe5k_metadata()
        self.all_categories = self.extract_all_categories()
        self.all_ingredients = self.extract_all_ingredients()
        self.one_hot_encoder = OneHotEncoder(self.all_categories, self.all_ingredients)
        self.entire_dataset = self.get_dataset()
        self.training_split = 0.7
        self.training_dataset, self.validation_dataset = self.split_data()

    def load_image_to_arr(self, path):
        image = tf.keras.preprocessing.image.load_img(path)
        img_tensor = tf.keras.preprocessing.image.img_to_array(image)
        return tf.image.resize(img_tensor, (224, 224))

    def load_recipe5k_metadata(self):
        directory = self.dir_path / "metadata" / "recipes5k_metadata.csv"
        return pd.read_csv(directory, sep="\t")

    def extract_all_categories(self):
        return self.metadata["Category"].unique().tolist()

    def extract_all_ingredients(self):
        unique_ingredients = set()
        for ingredient_list in self.metadata["Ingredients"]:
            ingredient_list = ingredient_list.split(",")
            unique_ingredients.update(ingredient_list)
        return [*unique_ingredients]

    def generate_dataset(self):
        img_dir = self.dir_path / "images"
        for index, row in self.metadata.iterrows():
            img_path = img_dir / row["Category"] / (row["ID/File Name"] + ".jpg")
            img_tensor = self.load_image_to_arr(img_path)
            nutrition_tensor = [
                row["Calorie(kcal)"],
                row["Carbohydrate(g)"],
                row["Protein(g)"],
                row["Fat(g)"],
            ]
            one_hot_category_tensor = (
                self.one_hot_encoder.get_category_one_hot_encoding(row["Category"])
            )
            one_hot_ingredient_tensor = (
                self.one_hot_encoder.get_ingredients_one_hot_encoding(
                    row["Ingredients"].split(",")
                )
            )
            yield tf.constant(img_tensor), {
                "category_output": tf.constant(one_hot_category_tensor),
                "nutrition_output": tf.constant(nutrition_tensor),
                "ingredients_output": one_hot_ingredient_tensor,
            }

    def get_dataset(self):
        dataset = tf.data.Dataset.from_generator(
            self.generate_dataset,
            output_signature=(
                tf.TensorSpec(shape=(224, 224, 3), dtype=tf.dtypes.float32),
                {
                    "category_output": tf.TensorSpec(
                        shape=(101), dtype=tf.dtypes.float32
                    ),
                    "nutrition_output": tf.TensorSpec(
                        shape=(4), dtype=tf.dtypes.float32
                    ),
                    "ingredients_output": tf.TensorSpec(
                        shape=(892), dtype=tf.dtypes.float32
                    ),
                },
            ),
        )
        # return dataset.shuffle(self.__len__() + 10, seed=1234)
        return dataset

    def split_data(self):
        assert self.entire_dataset != None, "No dataset is found."
        training_samples = int(self.__len__() * self.training_split)
        training_data = (
            self.entire_dataset.take(training_samples)
            .batch(32)
            .prefetch(tf.data.AUTOTUNE)
        )
        validation_data = (
            self.entire_dataset.skip(training_samples)
            .take(-1)
            .prefetch(tf.data.AUTOTUNE)
            .batch(32)
        )
        return training_data, validation_data

    def __len__(self):
        return len(self.metadata)

In [27]:
# Load dataset
recipes5k = Recipes5k()

## MobileNetv2 Convolution Base Model Building

In [30]:
class Model:
    def __init__(
        self,
        input_shape=(224, 224, 3),
        total_food_category=101,
        total_ingredients_category=892,
    ):
        self.input_shape = input_shape
        self.input_layer = self.get_input_layer()
        self.preprocess_layers = self.get_preprocess_layers()
        self.convolution_block = self.get_mobilenetv2_convolution_block()
        self.shared_layers = self.get_shared_layers()
        self.category_classification_layers = self.get_category_classification_layers(
            total_food_category
        )
        self.nutrition_regression_layers = self.get_nutrition_regression_layers()
        self.ingredients_multilabel_layers = self.get_ingredients_multilabel_layers(
            total_ingredients_category
        )

    def get_input_layer(self):
        return keras.Input(shape=self.input_shape)

    def get_preprocess_layers(self):
        preprocess_layers = []
        # Data augmentation
        preprocess_layers.append(keras.layers.RandomFlip("horizontal"))
        preprocess_layers.append(keras.layers.RandomRotation(0.2))
        # Layer to preprocess input for mobilenetv2 architecture
        preprocess_layers.append(keras.applications.mobilenet_v2.preprocess_input)

        return preprocess_layers

    def get_mobilenetv2_convolution_block(self):
        mobilenet_v2_convolution_layers = keras.applications.MobileNetV2(
            input_shape=self.input_shape, include_top=False, weights="imagenet"
        )
        mobilenet_v2_convolution_layers.trainable = False
        return mobilenet_v2_convolution_layers

    def get_shared_layers(self):
        shared_layers = []
        shared_layers.append(keras.layers.GlobalAveragePooling2D())
        shared_layers.append(
            keras.layers.Dense(64, activation="relu", name="shared_dense_1")
        )
        shared_layers.append(
            keras.layers.Dense(64, activation="relu", name="shared_dense_2")
        )
        shared_layers.append(keras.layers.Dropout(0.2))
        return shared_layers

    def get_category_classification_layers(self, total_categories):
        category_classification_layers = []
        # category_classification_layers.append(
        #     keras.layers.Dense(256, activation="relu", name="category_dense_1")
        # )
        # category_classification_layers.append(
        #     keras.layers.Dense(128, activation="relu", name="category_dense_2")
        # )
        # category_classification_layers.append(
        #     keras.layers.Dense(64, activation="relu", name="category_dense_3")
        # )
        # category_classification_layers.append(
        #     keras.layers.Dense(32, activation="relu", name="category_dense_4")
        # )
        category_classification_layers.append(
            keras.layers.Dense(
                total_categories, activation="softmax", name="category_output"
            )
        )
        return category_classification_layers

    def get_nutrition_regression_layers(self):
        nutrition_regression_layers = []
        nutrition_regression_layers.append(
            keras.layers.Dense(64, activation="relu", name="nutrition_dense_1")
        )
        nutrition_regression_layers.append(
            keras.layers.Dense(4, name="nutrition_output")
        )
        return nutrition_regression_layers

    def get_ingredients_multilabel_layers(self, total_ingredients):
        ingredients_multilabel_layers = []
        ingredients_multilabel_layers.append(
            keras.layers.Dense(256, activation="relu", name="ingredients_dense_1")
        )
        ingredients_multilabel_layers.append(
            keras.layers.Dense(128, activation="relu", name="ingredients_dense_2")
        )
        ingredients_multilabel_layers.append(
            keras.layers.Dense(
                total_ingredients, activation="sigmoid", name="ingredients_output"
            )
        )
        return ingredients_multilabel_layers

    def build_and_compile(
        self,
        category_classification_loss=keras.losses.CategoricalCrossentropy(),
        nutrition_regression_loss=keras.losses.MeanAbsoluteError(),
        ingredient_multilabel_loss=keras.losses.CategoricalCrossentropy(),
        category_classification_metrics=[keras.metrics.Accuracy()],
        nutrition_regression_metrics=[keras.metrics.MeanAbsoluteError()],
        ingredient_multilabel_metrics=[keras.metrics.Accuracy()],
    ):
        model = self.input_layer
        for layer in self.preprocess_layers:
            model = layer(model)
        model = self.convolution_block(model, training=False)
        for layer in self.shared_layers:
            model = layer(model)
        category_classification_head = self.category_classification_layers[0](model)
        nutrition_regression_head = self.nutrition_regression_layers[0](model)
        ingredients_multilabel_head = self.ingredients_multilabel_layers[0](model)
        for layer in self.category_classification_layers[1:]:
            category_classification_head = layer(category_classification_head)
        for layer in self.nutrition_regression_layers[1:]:
            nutrition_regression_head = layer(nutrition_regression_head)
        for layer in self.ingredients_multilabel_layers[1:]:
            ingredients_multilabel_head = layer(ingredients_multilabel_head)
        model = keras.Model(
            inputs=self.input_layer,
            outputs=[
                category_classification_head,
                nutrition_regression_head,
                ingredients_multilabel_head,
            ],
            name="FoodNet_with_MobileNetv2",
        )
        model.compile(
            optimizer="adam",
            loss={
                "category_output": category_classification_loss,
                "nutrition_output": nutrition_regression_loss,
                "ingredients_output": ingredient_multilabel_loss,
            },
            metrics={
                "category_output": category_classification_metrics,
                "nutrition_output": nutrition_regression_metrics,
                "ingredients_output": ingredient_multilabel_metrics,
            },
        )
        return model

In [31]:
model = Model()
model.build_and_compile().fit(
    recipes5k.training_dataset,
    epochs=2,
    verbose=1,
    validation_data=recipes5k.validation_dataset,
)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1fd01b605b0>

In [12]:
image = list(recipes5k.training_dataset.take(1))

In [25]:
pprint(image[0][1]["category_output"].numpy()[3])

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)


## Testing 

In [None]:
def load_recipe5k_metadata():
    directory = (
        Path("../Food Datasets/final-dataset") / "metadata" / "recipes5k_metadata.csv"
    )
    return pd.read_csv(directory, sep="\t")

In [None]:
data = load_recipe5k_metadata()

In [None]:
data.head()

In [None]:
test_recipes5k = Recipes5k()

In [None]:
len(test_recipes5k)

In [None]:
test_gen_func = test_recipes5k.generate_dataset()

In [None]:
test_dataset = test_recipes5k.get_dataset()

In [None]:
list(recipes5k.take(1))

In [None]:
test_model = Model()

In [None]:
test_model = test_model.build_and_compile()

In [None]:
test_model.summary()

In [None]:
test_model.fit(
    x=test_recipes5k.training_dataset,
    epochs=1,
    verbose=1,
    validation_data=test_recipes5k.validation_dataset,
)

In [None]:
test = tf.constant([[[1, 2, 3]]])

In [None]:
test.shape

In [None]:
tf.expand_dims(test, axis=0)

In [None]:
test_recipes5k.training_dataset

In [None]:
test_recipes5k.validation_dataset

In [None]:
row = 0
for x in test_recipes5k.training_dataset:
    row += 1
print(row)