## Imports

In [5]:
from typing import List
from IPython.display import display
from functools import partial

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

import altair as alt
import string
import ipywidgets as wid

## Load Data From File

In [6]:
raw_data = pd.read_csv("data/recipes.csv")

In [7]:
raw_data.head()

Unnamed: 0,Want to make,Recipe Name,Date of Book,Protein,Starch/Grain,Produce,Pantry,Specialty Ingredients,Favorite,Low Points
0,,Acorn Squash Bowls,13-Jan-20,,freekeh,"acorn squash, Meyer lemon, Lucianato kale","walnuts, golden raisins, white balsamic vinega...",,,
1,,Almond Butter Tofu,16-Sep-19,baked tofu,bamboo rice,"ginger, mixed fresh vegetables","rice vinegar, sesame oil, almond butter",,,
2,,Almond Butter Tofu Bowls,web,baked tofu,sushi rice,"ginger, zucchini, bok choy, radish","almond butter, sesame oil",chile garlic sauce,,
3,,Ancient Grain Bowls,17-Feb-20,chickpeas,farro,"leeks, lemon, apple, arugula",pistachios,"French mustard & herb blend, Treeline cashew c...",,yes
4,,Apple Sage Sausage Skillet,21-Oct-19,Apple Sage sausages,farro,"cremini mushrooms, shallot, apple, baby spinach",,French mustard & herb blend,yes,yes


## Clean and Parse Data

In [8]:
def clean_ingredient_names(col):
    return col\
    .str.replace(" ", "_")\
    .str.replace(",_", ",")\
    .str.replace('[^a-zA-Z,_]', '')\
    .str.replace('_{2,}', '_')\
    .str.strip()\
    .str.lower()

In [9]:
data = raw_data.drop(["Want to make"], axis=1)
data = data.rename({"Recipe Name":"Recipe_Name", "Date of Book":"Source"},axis=1)
data = data.fillna("")
recipe_dict = raw_data.loc[:,"Recipe Name"].to_dict()
recipe_id_dict = raw_data.reset_index().set_index("Recipe Name").loc[:,"index"].to_dict()

In [10]:
ingredient_columns = ["Protein", "Starch/Grain", "Produce", "Pantry", "Specialty Ingredients"]
for col in ingredient_columns:
    data[col] = clean_ingredient_names(data[col])

In [11]:
ingredient_type_df = data.loc[:,ingredient_columns].melt(var_name = "Type", value_name = "Ingredients")\
.query("Ingredients != ''")
#ingredient_type_df["Ingredients"] = 
ingredient_type_df = ingredient_type_df["Type"].to_frame().join(
    ingredient_type_df["Ingredients"].str.split(',', expand=True)
).melt(id_vars="Type")
ingredient_type_map = ingredient_type_df.drop("variable", axis=1).drop_duplicates().set_index("value")["Type"].to_dict()

In [12]:
data["Ingredients"] = data["Protein"] + "," + data["Starch/Grain"] + "," + data["Produce"]\
     + "," + data["Pantry"] + "," + data["Specialty Ingredients"]
data["Ingredients"] = data["Ingredients"]\
    .str.replace(",_", ",")\
    .str.replace(' {2,}', ' ')\
    .str.replace(',{2,}', ' ')\
    .str.strip()\
    .str.strip(",")

In [13]:
data = data.loc[:,["Recipe_Name", "Source", "Ingredients"]]

## Build function to return meal plans based on passed parameters

In [None]:
build_meal_plan(    
    data = data,
    selected_recipe_id = 1,
    randomization_strength = 0.5,
    recipes_per_meal_plan = 5,
    recipes_to_select_at_each_stage = 1,
    meal_plan_results_to_return = 3
)

In [None]:
def build_meal_plan(
    data: pd.DataFrame,
    selected_recipe_id: int,
    scoring_function: str = "minimize_non_shared_ingredients",
    randomization_strength: float = 0,
    recipes_per_meal_plan: int = 5,
    recipes_to_select_at_each_stage: int = 10,
    meal_plan_results_to_return: int = 5
) -> List[str]:
    
    if selected_recipe_id is None:
        return None
    
    vectorizer = CountVectorizer()
        
    #generate frequency matrix
    count_matrix = vectorizer.fit_transform(data["Ingredients"])
    data = pd.concat(
        [
            data,
            pd.DataFrame(
                count_matrix.toarray(), 
                columns=vectorizer.get_feature_names()
            )
        ], axis=1)
    
    ingredients = list(data.columns)[3:]   
    
    # build meal plan
    queue = [[selected_recipe_id]]
    meal_plans = []
    while len(queue) > 0:
        current_recipes = queue.pop()
        selected_recipes = data.iloc[current_recipes,3:].sum(axis=0)
        selected_recipe_ingredients = list(selected_recipes[selected_recipes >= 1].index)
        #if scoring_function == "maximize_shared_ingredients":
        sort_ascending = False
        filter_columns = selected_recipe_ingredients
#         else:
#             sort_ascending = True
#             filter_columns = [x for x in ingredients if x not in selected_recipe_ingredients]
        #selected_recipe_indices = [i for i,x in enumerate(data.columns) if x in selected_recipe_ingredients]
        recipes_to_consider = data.query("index not in @current_recipes").iloc[:, 3:]
        
        intersection_weights = recipes_to_consider\
            .loc[:,filter_columns]\
            .sum(axis=1)
            
        union_weights = recipes_to_consider.sum(axis=1)
        
        random_weights = np.random.uniform(
            low=1-randomization_strength, 
            high=1+randomization_strength, 
            size=(len(recipes_to_consider),)
        )
        
        potential_meal_plans = intersection_weights/union_weights * random_weights
        
        results = potential_meal_plans.sort_values(ascending=sort_ascending)\
            .iloc[:recipes_to_select_at_each_stage].to_dict()

        for result, value in results.items():
            new_meal_plan = current_recipes + [result]
            if len(new_meal_plan) == recipes_per_meal_plan:
                meal_plans.append((tuple(sorted(new_meal_plan) + [value])))
            else:
                queue.append(new_meal_plan)
                
    columns = ["Ingredient_"+str(x+1) for x in range(recipes_per_meal_plan)]
    results = pd.DataFrame(
        meal_plans,
        columns=columns + ["Score"]
    )\
    .groupby(columns).max()\
    .reset_index().sort_values("Score", ascending=False)\
    .iloc[:meal_plan_results_to_return,:recipes_per_meal_plan]
    
    for col in columns:
        results[col] = results[col].map(recipe_dict)
        
    results.columns = ["Recipe_"+str(x+1) for x in range(recipes_per_meal_plan)]
    
    meal_plans = results.reset_index(drop=True).reset_index(drop=False).rename({"index": "id"}, axis=1)   
    chart = alt.Chart(meal_plans.melt(id_vars = ["id"])).mark_rect().encode(
        y = alt.Y("id:O", axis=alt.Axis( labelAngle=0), title="Meal Plans"),
        x = alt.X("value", axis=alt.Axis(labelAngle=15), title="Recipes"),
        #color = alt.Color("count()", legend=None)
    ).properties(
        width = 700
    )
    display(chart)
    return meal_plans

In [None]:
weight_by_selector = wid.Checkbox(value=False, description="Weight by Inverse Frequency")
recipe_picklist = [(val, key) for key, val in recipe_dict.items()]
starting_recipe_selector = wid.Dropdown(
    value=recipe_picklist[0][1], 
    options=recipe_picklist,
    description="Select Base Recipe"
)
scoring_function_selector = wid.Dropdown(
    options=[
        ("Maximize shared ingredients", "maximize_shared_ingredients"),
        ("Minimize other ingredients", "minimize_non_shared_ingredients")
    ],
    description="What should the meal plan optimize?"
)
randomization_selector = wid.FloatSlider(
    value = 0,
    min = 0,
    max = 1,
    step = 0.05,
    description = "Degree of randomness"
)

# recipes_per_meal_plan_selector = wid.BoundedIntText(
#     value = 5,
#     min = 3, 
#     max = 7,
#     description = "Recipes per meal plan"
# )

# recipes_per_stage_selector = wid.BoundedIntText(
#     value = 5,
#     min = 3, 
#     max = 15,
#     description = "Recipes to keep at each stage. Higher numbers take longer"
# )

# meal_plans_to_return_selector = wid.BoundedIntText(
#     value = 5,
#     min = 1, 
#     max = 20,
#     description = "Potential meal plans to review"
# )

results = wid.interactive(
    build_meal_plan,
    data = wid.fixed(data),
    selected_recipe_id = starting_recipe_selector,
    scoring_function = scoring_function_selector,
    randomization_strength = randomization_selector,
    recipes_per_meal_plan = wid.fixed(5),#recipes_per_meal_plan_selector,
    recipes_to_select_at_each_stage = wid.fixed(10),#recipes_per_stage_selector,
    meal_plan_results_to_return = wid.fixed(5)#meal_plans_to_return_selector 
)
results

In [None]:
def get_meal_plan_ingredients( **args): # data: pd.DataFrame,
    global data
    selected_meal_plan_recipes = [key for key, value in args.items() if value]
    if len(selected_meal_plan_recipes) == 0:
        display(None)
        return True
    selected_meal_plan_ingredients = data.query("Recipe_Name in @selected_meal_plan_recipes")
    vectorizer = CountVectorizer()
    count_matrix = vectorizer.fit_transform(selected_meal_plan_ingredients["Ingredients"])
    selected_meal_plan_ingredients = pd.concat(
        [
            selected_meal_plan_ingredients["Recipe_Name"].reset_index(drop=True),
            pd.DataFrame(
                count_matrix.toarray(), 
                columns=vectorizer.get_feature_names()
            )
        ], axis=1
    )
    chart_data = selected_meal_plan_ingredients.melt(
        id_vars = ["Recipe_Name"], 
        var_name = "Ingredient", 
        value_name="count"
    )
    
    chart_data["Ingredient_Type"] = chart_data["Ingredient"].replace(ingredient_type_map)
    ingredient_sort = chart_data.groupby(["Ingredient_Type", "Ingredient"])["count"].sum()\
    .reset_index().sort_values(["Ingredient_Type", "count"], ascending=False)["Ingredient"].to_list()
    chart = alt.Chart(chart_data).mark_rect().encode(
        x = alt.X(
            "Recipe_Name", 
            axis = alt.Axis(orient="top", labelAngle=-15),
            title = None
        ),
        y = alt.Y(
            "Ingredient", 
            axis = alt.Axis( labelAngle=0),
            sort = ingredient_sort,#alt.EncodingSortField(field="Ingredient", op="count", order="ascending")
            title = None
        ),
        color = alt.condition(alt.datum.count == 1, alt.Color("Ingredient_Type:N"), alt.value(None))
    ).properties(
        width = 100 * len(selected_meal_plan_ingredients)
        #width = 30 * selected_meal_plan_ingredients.shape[1]
    )
    display(chart)

In [14]:
def multi_checkbox_widget(options_dict):
    """ Widget with a search field and lots of checkboxes """
    search_widget = wid.Text()
    output_widget = wid.Output()
    options = [x for x in options_dict.values()]
    options_layout = wid.Layout(
        overflow='auto',
        border='1px solid black',
        width='300px',
        height='300px',
        flex_flow='column',
        display='flex'
    )
    
    #selected_widget = wid.Box(children=[options[0]])
    options_widget = wid.VBox(options, layout=options_layout)
    #left_widget = wid.VBox(search_widget, selected_widget)
    multi_select = wid.VBox([search_widget, options_widget])

    @output_widget.capture()
    def on_checkbox_change(change):
        
        selected_recipe = change["owner"].description
        #print(options_widget.children)
        #selected_item = wid.Button(description = change["new"])
        #selected_widget.children = [] #selected_widget.children + [selected_item]
        options_widget.children = sorted([x for x in options_widget.children], key = lambda x: x.value, reverse = True)
        
    for checkbox in options:
        checkbox.observe(on_checkbox_change, names="value")

    # Wire the search field to the checkboxes
    @output_widget.capture()
    def on_text_change(change):
        search_input = change['new']
        if search_input == '':
            # Reset search field
            new_options = sorted(options, key = lambda x: x.value, reverse = True)
        else:
            # Filter by search field using difflib.
            #close_matches = difflib.get_close_matches(search_input, list(options_dict.keys()), cutoff=0.0)
            close_matches = [x for x in list(options_dict.keys()) if str.lower(search_input.strip('')) in str.lower(x)]
            new_options = sorted(
                [x for x in options if x.description in close_matches], 
                key = lambda x: x.value, reverse = True
            ) #[options_dict[x] for x in close_matches]
        options_widget.children = new_options

    search_widget.observe(on_text_change, names='value')
    display(output_widget)
    return multi_select

In [20]:
def get_meal_plan(
    ** args,
) -> List[str]:
    
    RANDOMIZATION_STRENGTH = 0.5
    RECIPES_PER_MEAL_PLAN = 5
    
    initial_recipes = [recipe_id_dict[key] for key, value in args.items() if value]
    
    if len(initial_recipes) == 0:
        display("")
        return True
    
    #weight by inverse frequency
    vectorizer = CountVectorizer()
        
    #generate frequency matrix
    count_matrix = vectorizer.fit_transform(data["Ingredients"])
    vect_data = pd.concat(
        [
            data,
            pd.DataFrame(
                count_matrix.toarray(), 
                columns=vectorizer.get_feature_names()
            )
        ], axis=1)
    
    ingredients = list(vect_data.columns)[3:]   
    
    # build meal plan
    queue = [initial_recipes]
    meal_plans = []
    if len(initial_recipes) >= RECIPES_PER_MEAL_PLAN:
        new_meal_plan = initial_recipes
    else:
        while len(queue) > 0:
            current_recipes = queue.pop()
            selected_recipes = vect_data.iloc[current_recipes,3:].sum(axis=0)
            selected_recipe_ingredients = list(selected_recipes[selected_recipes >= 1].index)
            sort_ascending = False
            filter_columns = selected_recipe_ingredients

            recipes_to_consider = vect_data.query("index not in @current_recipes").iloc[:, 3:]

            intersection_weights = recipes_to_consider\
                .loc[:,filter_columns]\
                .sum(axis=1)

            union_weights = recipes_to_consider.sum(axis=1)

            random_weights = np.random.uniform(
                low=1-RANDOMIZATION_STRENGTH, 
                high=1+RANDOMIZATION_STRENGTH, 
                size=(len(recipes_to_consider),)
            )

            potential_meal_plans = intersection_weights/union_weights * random_weights
            result = potential_meal_plans.sort_values(ascending=sort_ascending)\
                .index.to_list()[0]

            new_meal_plan = current_recipes + [result]
            if len(new_meal_plan) == RECIPES_PER_MEAL_PLAN:
                new_meal_plan = sorted(new_meal_plan)
            else:
                queue.append(new_meal_plan)

    selected_meal_plan_ingredients = vect_data.query("index in @new_meal_plan").drop(["Source", "Ingredients"], axis=1)
    chart_data = selected_meal_plan_ingredients.melt(
        id_vars = ["Recipe_Name"], 
        var_name = "Ingredient", 
        value_name="count"
    ).query("count > 0")
    
    chart_data["Ingredient_Type"] = chart_data["Ingredient"].replace(ingredient_type_map)    
    chart_data["Ingredient"] = chart_data["Ingredient"].str.replace("_"," ").str.title()
    
    ingredient_sort = chart_data.groupby(["Ingredient_Type", "Ingredient"])["count"].sum()\
    .reset_index().sort_values(["Ingredient_Type", "count"], ascending=False)["Ingredient"].to_list()
    chart = alt.Chart(chart_data).mark_rect().encode(
        x = alt.X(
            "Recipe_Name", 
            axis = alt.Axis(orient="top", labelAngle=-15),
            title = None
        ),
        y = alt.Y(
            "Ingredient", 
            axis = alt.Axis( labelAngle=0),
            sort = ingredient_sort,
            title = None
        ),
        color = alt.condition(alt.datum.count == 1, alt.Color("Ingredient_Type:N"), alt.value(None))
    ).properties(
        width = 100 * len(selected_meal_plan_ingredients)
        #width = 30 * selected_meal_plan_ingredients.shape[1]
    )
    display(chart)    

In [21]:
recipe_checkbox_dict = {
    title: wid.Checkbox(
        description=title, 
        value=False,
        style={"description_width":"0px"}
    ) for title in data["Recipe_Name"].to_list()
}

ui = multi_checkbox_widget(recipe_checkbox_dict)
out = wid.interactive_output(get_meal_plan, recipe_checkbox_dict)
display(wid.HBox([ui, out]))

Output()

HBox(children=(VBox(children=(Text(value=''), VBox(children=(Checkbox(value=False, description='Acorn Squash B…

- Number of meal plans selector
- Go button
- Clean up code
- Ingredients selector and filter