In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
from gensim import models
import sys
sys.path.insert(0, "/home/luke/Github/Project/python")
import compute_healthiness as ch
import clean_recipes_datasets as clean

### load data

In [2]:
embeddings_path = "../generated/food_embeddings.bin"
food_embeddings = models.Word2Vec.load(embeddings_path)

# Table of conversion for the unit found in 1M
with open('../generated/convert_gr.json') as json_data:
    convert_gr = json.load(json_data)
    
# Mapping from ingredient to USDA id
with open('../generated/ing_id_mapping.json') as json_data:
    mapping_usda_id = json.load(json_data)
    
#nutrients table
nutrients_mapping = pd.read_hdf("../generated/nut_data.h5", 'table')

id_repr = json.load(open("../generated/id_repr.json"))
repr_ids = json.load(open("../generated/repr_ids.json"))

### define example recipes

In [3]:
recipe1 = [[1.0, 'c.', 'elbow macaroni'],
  [1.0, 'c.', 'American cheese'],
  [0.5, 'c.', 'celery'],
  [0.5, 'c.', 'green peppers'],
  [3.0, 'tbsp', 'pimentos'],
  [0.5, 'c.', 'mayonnaise'],
  [1.0, 'tbsp', 'vinegar'],
  [0.75, 'teaspoon', 'salt'],
  [0.5, 'teaspoon', 'dry dill weed']]

recipe2 = [[12.0, 'cup', 'shredded coconut'],
  [1.0, 'lb', 'lean ground beef'],
  [1.5, 'tablespoon', 'fresh garlic'],
  [2.5, 'g', 'kosher salt'],
  [1.0, 'tablespoon', 'lemon juice'],
  [1.0, 'tablespoon', 'soy sauce'],
  [2.0, 'tablespoon', 'cornstarch'],
  [8.0, 'ounce', 'pineapple chunks'],
  [16.0, 'ounce', 'mandarin oranges'],
  [12.0, 'cup', 'cashew nuts']]

recipe3 = [[1.0, 'lb', 'lean ground beef'],
    [1.0, '', 'onion'],
    [1, 'tsp', 'chili powder'],
    [0.5, 'tsp', 'salt'],
    [0.5, 'tsp', 'garlic powder'],
    [8, 'oz', 'tomato sauce'],
    [10, 'oz', 'taco shells'],
    [6, 'oz', 'cheddar cheese'],
    [2, 'c.', 'lettuce'],
    [2, '', 'tomatoes'],
    [0.75, 'c.', 'salsa'],
    [0.75, 'c.', 'sour cream']]

### design recommender

We first estmate the healthiness score for a given recipe

In [4]:
input_recipe = recipe2
ch.compute_healthiness(input_recipe, mapping_usda_id, nutrients_mapping, convert_gr)

shredded coconut    : 35.90% =>  content(grams): (fat=346.3680, sat_fat=308.7206, sugar=693.6576, salt=3.7018)
lean ground beef    : 10.60% =>  content(grams): (fat=12.2016, sat_fat= 4.6811, sugar= 0.0000, salt=0.2495)
fresh garlic        :  0.50% =>  content(grams): (fat= 0.1073, sat_fat= 0.0191, sugar= 0.2145, salt=0.0036)
kosher salt         :  0.06% =>  content(grams): (fat= 0.0000, sat_fat= 0.0000, sugar= 0.0000, salt=0.9690)
lemon juice         :  0.33% =>  content(grams): (fat= 0.0343, sat_fat= 0.0057, sugar= 0.3604, salt=0.0001)
soy sauce           :  0.33% =>  content(grams): (fat= 0.0143, sat_fat= 0.0016, sugar= 0.2431, salt=0.7988)
cornstarch          :  0.67% =>  content(grams): (fat= 0.0143, sat_fat= 0.0026, sugar= 0.0000, salt=0.0026)
pineapple chunks    :  5.24% =>  content(grams): (fat= 0.2240, sat_fat= 0.0157, sugar=47.2640, salt=0.0045)
mandarin oranges    : 10.47% =>  content(grams): (fat= 1.3888, sat_fat= 0.1747, sugar=47.3984, salt=0.0090)
cashew nuts         : 35.

{'fat': 35.10995253309952,
 'sat_fat': 57.20625283038799,
 'sugar': 22.567201387697597,
 'salt': 1.384310411798608,
 'total': 116.26771716298371}

We now design a strategy for finding replacements

In [31]:
def health_similarity_score(similarity, health) :
    if health <= 7 :
        return similarity
    else :
        return (similarity**2 / health) 
        

def find_swaps(ing_id, threshold=0.45) : 
    
    try :        
        ing_ref = id_repr[str(ing_id)] 
        
    except KeyError:
        print("The ingredient with id", ing_id, "has never been seen before")  
        return None
    
    closest_refs = food_embeddings.wv.most_similar(ing_ref, topn=50)
    
    to_consider = []
    
    closest_refs_and_contains = list(filter(lambda x : (ing_ref     in x[0]) and (x[1] >= threshold), closest_refs))
    closest_refs_not_contains = list(filter(lambda x : (ing_ref not in x[0]) and (x[1] >= threshold), closest_refs))
    
    if len(closest_refs_not_contains) == 0 :
        to_consider = closest_refs_and_contains
    else :
        to_consider = closest_refs_not_contains
        
    if len(to_consider) == 0 :
        return None
    
    else :
        entries = []
        fat, sat_fat, sugar, salt = ch.compute_profile([(100, ing_id)], nutrients_mapping)
        base_score = ch.score(fat, sat_fat, sugar, salt)['total']
        
        for ref in to_consider :
            
            best_id = -1
            best_score = np.inf
            
            for collided_id in repr_ids[ref[0]] :
                fat, sat_fat, sugar, salt = ch.compute_profile([(100, collided_id)], nutrients_mapping)
                score = ch.score(fat, sat_fat, sugar, salt)['total']
                
                if score < best_score :
                    best_id = collided_id
                    best_score = score
            
            
            if (score < base_score) :
                entries.append((ref[0], ref[1], score))
                
        return sorted(entries, key=lambda x : health_similarity_score(x[1], x[2]), reverse=True)[0]

In [None]:
def 

In [37]:
find_swaps(mapping_usda_id["lamb"])

('eggplant', 0.5672319531440735, 2.386)

In [13]:
ing_ref = "carrot"
closest_refs = food_embeddings.wv.most_similar(ing_ref, topn=50)
    
print(list(filter(lambda x : ing_ref     in x[0], closest_refs)))
print("\n\n")
print(list(filter(lambda x : ing_ref not in x[0], closest_refs)))

[]



[('marrow', 0.5013813972473145), ('barley', 0.49812817573547363), ('turnip', 0.4500754475593567), ('vegetable', 0.4114275872707367), ('beef shank', 0.4037729501724243), ('leek', 0.39629676938056946), ('bouillon', 0.3956068158149719), ('kielbasa', 0.3881249725818634), ('fava bean', 0.38158679008483887), ('parsnip', 0.36232221126556396), ('yellow bean', 0.36217039823532104), ('cranberry bean', 0.35601702332496643), ('lamb neck', 0.35586971044540405), ('celeriac', 0.35531941056251526), ('lentil', 0.3484030067920685), ('white potato', 0.34645459055900574), ('rutabaga', 0.3420451283454895), ('lima bean', 0.34195131063461304), ('salt pork', 0.3323369026184082), ('french lentil', 0.33201396465301514), ('rice', 0.32865625619888306), ('stewing chicken', 0.32775914669036865), ('enoki mushroom', 0.3265804350376129), ('broth', 0.3265687823295593), ('fish bone', 0.32590949535369873), ('beef sausage', 0.3233147859573364), ('medium-grain rice', 0.32107189297676086), ('chicken giblet', 0.3209991