In [78]:
import random
import shutil
from collections import Counter, defaultdict
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import pandas as pd

## Analyze common ingredients from recipe5k for each 101 categories

In [4]:
cleaned_metadata = pd.read_csv(
    "../final-dataset/metadata/recipes5k_metadata.csv", sep="\t"
)

In [5]:
all_categories = cleaned_metadata["Category"].unique()

In [32]:
category_ingredients_tally = defaultdict()
for category in all_categories:
    all_rows = cleaned_metadata[cleaned_metadata["Category"] == category]
    ingredients_tally = Counter()
    average_ingredients = 0
    for ingredients in all_rows["Ingredients"]:
        ingredients_list = ingredients.split(",")
        average_ingredients += len(ingredients_list)
        ingredients_tally.update(ingredients_list)
    average_ingredients = average_ingredients // len(all_rows)
    print(f"Category : {category}, Average Ingredients : {average_ingredients}")
    category_ingredients_tally[category] = [average_ingredients, ingredients_tally]

Category : apple_pie, Average Ingredients : 8
Category : croque_madame, Average Ingredients : 10
Category : paella, Average Ingredients : 16
Category : gyoza, Average Ingredients : 12
Category : crab_cakes, Average Ingredients : 11
Category : carrot_cake, Average Ingredients : 13
Category : chicken_curry, Average Ingredients : 12
Category : steak, Average Ingredients : 6
Category : cannoli, Average Ingredients : 10
Category : ceviche, Average Ingredients : 10
Category : shrimp_and_grits, Average Ingredients : 13
Category : hummus, Average Ingredients : 8
Category : hot_and_sour_soup, Average Ingredients : 15
Category : nachos, Average Ingredients : 11
Category : creme_brulee, Average Ingredients : 4
Category : hamburger, Average Ingredients : 9
Category : beef_tartare, Average Ingredients : 12
Category : fish_and_chips, Average Ingredients : 10
Category : falafel, Average Ingredients : 13
Category : beet_salad, Average Ingredients : 8
Category : samosa, Average Ingredients : 14
Categor

## Build food101 dataset 

In [37]:
food101_images_dir = Path("./images")
categories_dir = [x for x in food101_images_dir.iterdir()]

In [104]:
def move_image(category, path):
    src_dir = Path("./images")
    parent_dest_dir = Path("../final-dataset/images")
    src_path = src_dir / category / path
    dir_name = category
    # check if dir containing file exists in destination
    dest_dir = parent_dest_dir / dir_name
    if not dest_dir.exists():
        dest_dir.mkdir()
    file_name = src_path.name
    dest_path = dest_dir / file_name
    shutil.copy(src_path, dest_path)
    return (dir_name, file_name)


def get_ingredients(
    category, category_ingredients_tally=category_ingredients_tally, top_n_percent=0.8
):
    # random.seed(999) set the seed outside of this method
    average_ingredients_per_recipe, ingredients_counter = category_ingredients_tally[
        category
    ]
    ingredients_counter = ingredients_counter.most_common()
    top_n = int(average_ingredients * top_n_percent)
    random_n = average_ingredients_per_recipe - top_n
    top_n_ingredients = ingredients_counter[:top_n]
    random_n_ingredients = random.choices(ingredients_counter[top_n:], k=random_n)
    top_n_ingredients.extend(random_n_ingredients)
    ingredients_list = [a for a, b in top_n_ingredients]
    return ingredients_list

In [83]:
category_ingredients_tally["apple_pie"][1].most_common()[:9]

[('apple', 53),
 ('cinnamon', 49),
 ('flour', 40),
 ('sugar', 38),
 ('butter', 30),
 ('salt', 24),
 ('pie', 23),
 ('nut', 22),
 ('lemon', 21)]

In [112]:
get_ingredients("apple_pie")

['apple',
 'cinnamon',
 'flour',
 'sugar',
 'shortening',
 'vanilla ice',
 'gin',
 'egg']