In [8]:
from pathlib import Path

import pandas as pd
import tensorflow as tf
from PIL import Image
from tensorflow import keras

In [103]:
class OneHotEncoder:
    def __init__(self, all_category_list, all_ingredient_list):
        self.all_food_categories = all_category_list
        self.all_food_categories.sort()
        self.all_food_categories_integer_encoded = (
            self.__encode_categories_to_integers()
        )
        self.all_ingredients = all_ingredient_list
        self.all_ingredients.sort()
        self.all_ingredients_integer_encoded = self.__encode_ingredients_to_integers()

    def get_category_one_hot_encoding(self, category_name):
        index = self.all_food_categories_integer_encoded[category_name]
        assert index != None, f"{category_name} does not have an integer mapping"
        num_classes = len(self.all_food_categories)
        return keras.utils.to_categorical(index, num_classes)

    def get_ingredients_one_hot_encoding(self, ingredient_list):
        ingredient_list = list(
            map(lambda x: self.__transform_ingredient_to_integer(x), ingredient_list)
        )
        multi_one_hot_layer = tf.keras.layers.CategoryEncoding(
            num_tokens=len(self.all_ingredients), output_mode="multi_hot"
        )
        return multi_one_hot_layer(ingredient_list)

    def __transform_ingredient_to_integer(self, ingredient_name):
        index = self.all_ingredients_integer_encoded[ingredient_name]
        assert index != None, f"{ingredient_name} does not have an integer mapping"
        return index

    def __encode_categories_to_integers(self):
        return {
            category_name: index
            for index, category_name in enumerate(self.all_food_categories)
        }

    def __encode_ingredients_to_integers(self):
        return {
            ingredient_name: index
            for index, ingredient_name in enumerate(self.all_ingredients)
        }

In [104]:
class Recipes5k:
    def __init__(self):
        self.dir_path = Path("../Food Datasets/final-dataset")
        self.metadata = self.__load_recipe5k_metadata()
        self.all_categories = self.extract_all_categories()
        self.all_ingredients = self.extract_all_ingredients()
        self.one_hot_encoder = OneHotEncoder(self.all_categories, self.all_ingredients)

    def load_image_to_arr(self, path):
        image = tf.keras.preprocessing.image.load_img(path)
        img_tensor = tf.keras.preprocessing.image.img_to_array(image)
        return tf.image.resize(img_tensor, (224, 224))

    def __normalize_image(self, img_tensor):
        return img_tensor / tf.math.reduce_max(img_tensor)

    def __load_recipe5k_metadata(self):
        directory = self.dir_path / "metadata" / "recipes5k_metadata.csv"
        return pd.read_csv(directory, sep="\t")

    def extract_all_categories(self):
        return self.metadata["Category"].unique().tolist()

    def extract_all_ingredients(self):
        unique_ingredients = set()
        for ingredient_list in self.metadata["Ingredients"]:
            ingredient_list = ingredient_list.split(",")
            unique_ingredients.update(ingredient_list)
        return [*unique_ingredients]

    def generate_dataset(self):
        img_dir = self.dir_path / "images"
        for index, row in self.metadata.iterrows():
            img_path = img_dir / row["Category"] / (row["ID/File Name"] + ".jpg")
            img_tensor = self.load_image_to_arr(img_path)
            img_tensor = self.__normalize_image(img_tensor)
            nutrition_tensor = [
                row["Calorie(kcal)"],
                row["Carbohydrate(g)"],
                row["Protein(g)"],
                row["Fat(g)"],
            ]
            one_hot_category_tensor = (
                self.one_hot_encoder.get_category_one_hot_encoding(row["Category"])
            )
            one_hot_ingredient_tensor = (
                self.one_hot_encoder.get_ingredients_one_hot_encoding(
                    row["Ingredients"].split(",")
                )
            )
            yield tf.constant(img_tensor),
            tf.constant(one_hot_category_tensor),
            tf.constant(nutrition_tensor),
            one_hot_ingredient_tensor

In [117]:
# Load dataset
recipes5k = Recipes5k()
dataset = tf.data.Dataset.from_generator(
    recipes5k.generate_dataset,
    output_signature=(
        tf.TensorSpec(shape=(224, 224, 3), dtype=tf.dtypes.float32),
        tf.TensorSpec(shape=(101,), dtype=tf.dtypes.float32),
        tf.TensorSpec(shape=(4,), dtype=tf.dtypes.float32),
        tf.TensorSpec(shape=(892,), dtype=tf.dtypes.float32),
    ),
)

In [28]:
img_arr = load_image_to_arr(
    dataset_dir / "images" / "apple_pie" / "114542501848791040941317630876042494434.jpg"
)

In [119]:
list(dataset.take(3))

[(<tf.Tensor: shape=(224, 224, 3), dtype=float32, numpy=
  array([[[0.9581807 , 0.7057573 , 0.3880127 ],
          [0.9407813 , 0.7043092 , 0.4169968 ],
          [0.9565776 , 0.72282165, 0.5078832 ],
          ...,
          [0.6443882 , 0.6558776 , 0.70307165],
          [0.64169884, 0.6573851 , 0.6998473 ],
          [0.6635027 , 0.67918897, 0.7147632 ]],
  
         [[0.9654062 , 0.7153612 , 0.44116396],
          [0.9490196 , 0.7078807 , 0.46494848],
          [0.9412865 , 0.6956908 , 0.48196033],
          ...,
          [0.67415214, 0.6824155 , 0.7282138 ],
          [0.67411214, 0.68895805, 0.73251545],
          [0.6740146 , 0.68944585, 0.7327106 ]],
  
         [[0.96217984, 0.71140957, 0.47696579],
          [0.9492822 , 0.694888  , 0.5079907 ],
          [0.93989843, 0.67181873, 0.45635006],
          ...,
          [0.7066525 , 0.71769196, 0.75086015],
          [0.70323664, 0.71171755, 0.7568631 ],
          [0.67128897, 0.6836188 , 0.72843426]],
  
         ...,
  
     

In [31]:
img_arr.shape

(240, 360, 3)

In [32]:
img_arr.max()

255.0

In [33]:
img_arr.min()

0.0

In [35]:
normalize_image(img_arr)

array([[[0.7529412 , 0.77254903, 0.7490196 ],
        [0.7529412 , 0.77254903, 0.7490196 ],
        [0.75686276, 0.7764706 , 0.7529412 ],
        ...,
        [0.41568628, 0.2       , 0.17254902],
        [0.39607844, 0.19607843, 0.18039216],
        [0.3764706 , 0.18431373, 0.16862746]],

       [[0.7490196 , 0.76862746, 0.74509805],
        [0.7529412 , 0.77254903, 0.7490196 ],
        [0.7529412 , 0.77254903, 0.7490196 ],
        ...,
        [0.42352942, 0.21568628, 0.19215687],
        [0.40392157, 0.21176471, 0.19215687],
        [0.37254903, 0.19607843, 0.18431373]],

       [[0.7411765 , 0.7607843 , 0.7372549 ],
        [0.74509805, 0.7647059 , 0.7411765 ],
        [0.74509805, 0.7647059 , 0.7411765 ],
        ...,
        [0.40784314, 0.21568628, 0.1882353 ],
        [0.3882353 , 0.21176471, 0.19215687],
        [0.36862746, 0.19607843, 0.18431373]],

       ...,

       [[0.7411765 , 0.63529414, 0.4509804 ],
        [0.70980394, 0.6       , 0.4117647 ],
        [0.6745098 , 0

## Testing 

In [9]:
def load_recipe5k_metadata():
    directory = (
        Path("../Food Datasets/final-dataset") / "metadata" / "recipes5k_metadata.csv"
    )
    return pd.read_csv(directory, sep="\t")

In [11]:
data = load_recipe5k_metadata()

In [17]:
data.head()

Unnamed: 0,ID/File Name,Category,Calorie(kcal),Carbohydrate(g),Protein(g),Fat(g),Ingredients
0,211103814294563038966243131439696432980,apple_pie,3.4393,0.2716,0.0161,0.2603,"flour,salt,oil,water,apple,sugar,cinnamon,butter"
1,263538410746730869383121236069019641366,apple_pie,2.8444,0.3591,0.0293,0.1501,"shells,pie,sugar,flour,cinnamon,apple,lemon,bu..."
2,328597835729663550995342992726274622904,apple_pie,3.2812,0.2658,0.0369,0.2419,"apple,lemon,sugar,flour,cinnamon,nut,butter,sa..."
3,192812446104799022112212906789347819248,apple_pie,2.6394,0.3287,0.0245,0.1409,"pie,apple,sugar,corn starch,cinnamon,lemon,but..."
4,191920265948634303435638687483707715644,apple_pie,3.6433,0.3161,0.0464,0.2601,"apple,brown sugar,butter,cinnamon,pepper,nut,pie"


In [105]:
test_recipes5k = Recipes5k()

In [106]:
test_gen_func = test_recipes5k.generate_dataset()

In [107]:
next(test_gen_func)

(<tf.Tensor: shape=(224, 224, 3), dtype=float32, numpy=
 array([[[0.9581807 , 0.7057573 , 0.3880127 ],
         [0.9407813 , 0.7043092 , 0.4169968 ],
         [0.9565776 , 0.72282165, 0.5078832 ],
         ...,
         [0.6443882 , 0.6558776 , 0.70307165],
         [0.64169884, 0.6573851 , 0.6998473 ],
         [0.6635027 , 0.67918897, 0.7147632 ]],
 
        [[0.9654062 , 0.7153612 , 0.44116396],
         [0.9490196 , 0.7078807 , 0.46494848],
         [0.9412865 , 0.6956908 , 0.48196033],
         ...,
         [0.67415214, 0.6824155 , 0.7282138 ],
         [0.67411214, 0.68895805, 0.73251545],
         [0.6740146 , 0.68944585, 0.7327106 ]],
 
        [[0.96217984, 0.71140957, 0.47696579],
         [0.9492822 , 0.694888  , 0.5079907 ],
         [0.93989843, 0.67181873, 0.45635006],
         ...,
         [0.7066525 , 0.71769196, 0.75086015],
         [0.70323664, 0.71171755, 0.7568631 ],
         [0.67128897, 0.6836188 , 0.72843426]],
 
        ...,
 
        [[0.9264531 , 0.70707285