## Imports

In [1]:
from typing import List
from IPython.display import display
from io import StringIO

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

import altair as alt
import string
import ipywidgets as wid

## Load Data From File

In [70]:
def parse_data(raw_df):
    
    def clean_ingredient_names(col):
        return col\
        .str.replace(" ", "_")\
        .str.replace(",_", ",")\
        .str.replace('[^a-zA-Z,_]', '')\
        .str.replace('_{2,}', '_')\
        .str.strip()\
        .str.lower()

    # define needed columns
    ingredient_columns = ["Protein", "Starch/Grain", "Produce", "Pantry", "Specialty Ingredients"]    
    cols_to_keep = ["Recipe_Name", "Source"] + ingredient_columns    

    # temp
    data = raw_df.rename({"Recipe Name":"Recipe_Name", "Date of Book":"Source"},axis=1)

    # remove unnecessary columns, if any
    data = data.loc[:,cols_to_keep]

    # clean data
    data = data.fillna("")
    for col in ingredient_columns:
        data[col] = clean_ingredient_names(data[col])

    # build helper data structures
    recipe_dict = data.loc[:,"Recipe_Name"].to_dict()
    recipe_id_dict = data.reset_index().set_index("Recipe_Name").loc[:,"index"].to_dict()

    ingredient_type_df = data.loc[:,ingredient_columns].melt(var_name = "Type", value_name = "Ingredients")\
    .query("Ingredients != ''")
    #ingredient_type_df["Ingredients"] = 
    ingredient_type_df = ingredient_type_df["Type"].to_frame().join(
        ingredient_type_df["Ingredients"].str.split(',', expand=True)
    ).melt(id_vars="Type")
    ingredient_type_map = ingredient_type_df.drop("variable", axis=1).drop_duplicates().set_index("value")["Type"].to_dict()

    # make combined ingredients column
    data["Ingredients"] = data["Protein"] + "," + data["Starch/Grain"] + "," + data["Produce"]\
         + "," + data["Pantry"] + "," + data["Specialty Ingredients"]
    data["Ingredients"] = data["Ingredients"]\
        .str.replace(",_", ",")\
        .str.replace(' {2,}', ' ')\
        .str.replace(',{2,}', ' ')\
        .str.strip()\
        .str.strip(",")

    # remove intermediary columns
    data = data.loc[:,["Recipe_Name", "Source", "Ingredients"]]
    
    return data

In [80]:
def multi_checkbox_widget(options_dict):
    """ Widget with a search field and lots of checkboxes """
    default_search_text = "<search by recipe name and ingredients>"
    search_widget = wid.Text(value = default_search_text)
    output_widget = wid.Output()
    options = [x for x in options_dict.values()]
    options_layout = wid.Layout(
        overflow='auto',
        border='1px solid black',
        width='300px',
        height='300px',
        flex_flow='column',
        display='flex'
    )
    
    options_widget = wid.VBox(options, layout=options_layout)
    multi_select = wid.VBox([search_widget, options_widget])

    @output_widget.capture()
    def on_checkbox_change(change):
        
        selected_recipe = change["owner"].description
        options_widget.children = sorted([x for x in options_widget.children], key = lambda x: x.value, reverse = True)
        
    for checkbox in options:
        checkbox.observe(on_checkbox_change, names="value")

    # Wire the search field to the checkboxes
    @output_widget.capture()
    def on_text_change(change):
        search_input = change['new']
        if search_input in ['', default_search_text]:
            # Reset search field
            new_options = sorted(options, key = lambda x: x.value, reverse = True)
        else:
            # Filter by search field using difflib.
            #close_matches = difflib.get_close_matches(search_input, list(options_dict.keys()), cutoff=0.0)
            close_matches = [x for x in list(options_dict.keys()) if str.lower(search_input.strip('')) in str.lower(x)]
            new_options = sorted(
                [x for x in options if x.description in close_matches], 
                key = lambda x: x.value, reverse = True
            ) #[options_dict[x] for x in close_matches]
        options_widget.children = new_options

    search_widget.observe(on_text_change, names='value')
    display(output_widget)
    return multi_select

In [81]:
def get_meal_plan(
    ** args,
) -> List[str]:
    
    RANDOMIZATION_STRENGTH = 0.5
    
    recipes_per_meal_plan = args["recipes_per_meal_plan"]
    initial_recipes = [recipe_id_dict[key] for key, value in args.items() if value and key in recipe_id_dict]
    
    if len(initial_recipes) == 0:
        print("Select a Recipe or Ingredient to See a Meal Plan")
        return True
    
    vectorizer = CountVectorizer()
        
    # generate ingredient frequency matrix
    count_matrix = vectorizer.fit_transform(data["Ingredients"])
    vect_data = pd.concat(
        [
            data,
            pd.DataFrame(
                count_matrix.toarray(), 
                columns=vectorizer.get_feature_names()
            )
        ], axis=1)
    
    ingredients = list(vect_data.columns)[3:]   
    
    # build meal plan
    queue = [initial_recipes]
    meal_plans = []
    if len(initial_recipes) >= recipes_per_meal_plan:
        new_meal_plan = initial_recipes
    else:
        while len(queue) > 0:
            current_recipes = queue.pop()
            selected_recipes = vect_data.iloc[current_recipes,3:].sum(axis=0)
            selected_recipe_ingredients = list(selected_recipes[selected_recipes >= 1].index)
            sort_ascending = False
            filter_columns = selected_recipe_ingredients

            recipes_to_consider = vect_data.query("index not in @current_recipes").iloc[:, 3:]

            intersection_weights = recipes_to_consider\
                .loc[:,filter_columns]\
                .sum(axis=1)

            union_weights = recipes_to_consider.sum(axis=1)

            random_weights = np.random.uniform(
                low=1-RANDOMIZATION_STRENGTH, 
                high=1+RANDOMIZATION_STRENGTH, 
                size=(len(recipes_to_consider),)
            )

            potential_meal_plans = intersection_weights/union_weights * random_weights
            result = potential_meal_plans.sort_values(ascending=sort_ascending)\
                .index.to_list()[0]

            new_meal_plan = current_recipes + [result]
            if len(new_meal_plan) == recipes_per_meal_plan:
                new_meal_plan = sorted(new_meal_plan)
            else:
                queue.append(new_meal_plan)

    selected_meal_plan_ingredients = vect_data.query("index in @new_meal_plan").drop(["Source", "Ingredients"], axis=1)
    chart_data = selected_meal_plan_ingredients.melt(
        id_vars = ["Recipe_Name"], 
        var_name = "Ingredient", 
        value_name="count"
    ).query("count > 0")
    
    chart_data["Ingredient_Type"] = chart_data["Ingredient"].replace(ingredient_type_map)    
    chart_data["Ingredient"] = chart_data["Ingredient"].str.replace("_"," ").str.title()
    
    ingredient_sort = chart_data.groupby(["Ingredient_Type", "Ingredient"])["count"].sum()\
    .reset_index().sort_values(["Ingredient_Type", "count"], ascending=False)["Ingredient"].to_list()
    chart = alt.Chart(chart_data).mark_rect().encode(
        x = alt.X(
            "Recipe_Name", 
            axis = alt.Axis(orient="top", labelAngle=-15),
            title = None
        ),
        y = alt.Y(
            "Ingredient", 
            axis = alt.Axis( labelAngle=0),
            sort = ingredient_sort,
            title = None
        ),
        color = alt.condition(alt.datum.count == 1, alt.Color("Ingredient_Type:N"), alt.value(None))
    ).properties(
        width = 100 * len(selected_meal_plan_ingredients)
        #width = 30 * selected_meal_plan_ingredients.shape[1]
    )
    display(chart)    

In [82]:
file_output = wid.Output()
file_widget = wid.FileUpload(accept=".csv")

@file_output.capture()
def get_uploaded_file(change):
    #print(change["new"])
    file_to_string = StringIO(
        str(
            change["new"][
                list(file_widget.value.keys())[0]
            ]["content"],
            encoding = 'utf-8'
        )
    ) 

    raw_df = pd.read_csv(file_to_string)
    data = parse_data(raw_df)
    recipe_checkbox_dict = {
        title: wid.Checkbox(
            description=title, 
            value=False,
            style={"description_width":"0px"}
        ) for title in data["Recipe_Name"].to_list()
    }

    ui = multi_checkbox_widget(recipe_checkbox_dict)
    num_recipes_selector = wid.ToggleButtons(
        options=[3, 4, 5, 6, 7],
        value = 5,
        description='Number of Recipes:',
        style={"description_width":"120px"},
        layout = wid.Layout(width="100px")
    )
    recipe_checkbox_dict["recipes_per_meal_plan"] = num_recipes_selector
    out = wid.interactive_output(get_meal_plan, recipe_checkbox_dict)
    display(wid.HBox([wid.VBox([ui, num_recipes_selector]), out]))
    
file_widget.observe(get_uploaded_file, names="value")
display(file_widget)
display(file_output)

FileUpload(value={}, accept='.csv', description='Upload')

Output()

- Ingredient filter
- Search recipes by ingredient
- Change file structure and fix different values
- to .py file
- add Markdown
- Binder