In [1]:
import json
import nltk
import inflect
import tqdm
import time
import requests 
from bs4 import BeautifulSoup as bs
import gensim
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import numpy as np
import collections
import json
import sys
sys.path.insert(0, "/Users/Lionel/Documents/Ada/Project/python")
import clean_recipes_datasets as clean
engine = inflect.engine()

In [2]:
# Load the recipes for which we have the quantity for each ingredients 
with open('../generated/1m_quant_usda_recipes.json') as json_data:
    quantities_recipes = json.load(json_data)

# Table of conversion for the unit found in 1M
with open('../generated/convert_gr.json') as json_data:
    convert_gr = json.load(json_data)
    
# Mapping from ingredient to USDA id
with open('../generated/ing_usda_mapping_high_score_3d.json') as json_data:
    mapping_usda_id = json.load(json_data)
    mapping_usda_id['kosher salt'] = 2047
    
nutrients_mapping = pd.read_hdf("../generated/nut_data_3d.h5", 'table')

In [3]:
# We convert every quantities units to grams, and delete the recipes that contains unconvertable unit or unmappable ingredients
def recipes_to_usda(quantities_recipes, mapping_usda_id):
    to_remove = []
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        failure = False
        for ingr in recipe:
            try:
                ingr[1] = convert_gr[ingr[1]]
                try:
                    ingr[2] = mapping_usda_id[clean.clean_ingredient(ingr[2])]
                except KeyError:
                    failure = True
            except KeyError:
                failure = True
        if(failure):
            to_remove.append(recipe)
    quantities_recipes = [list(map(lambda x: (x[0] * x[1], x[2]), recipe)) for recipe in tqdm.tqdm_notebook(quantities_recipes) if recipe not in to_remove]

    with open('../generated/recipes_quantities_mapped_usda.json', 'w') as outfile:
        json.dump(quantities_recipes, outfile)
    return quantities_recipes

In [4]:
def median_weight_ingredient(quantities_recipes):
    to_remove = []
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        failure = False
        for ingr in recipe:
            try:
                ingr[1] = convert_gr[ingr[1]]
                ingr[2] = clean.clean_ingredient(ingr[2])
            except KeyError:
                failure = True
        if(failure):
            to_remove.append(recipe)
    quantities_recipes = [list(map(lambda x: (x[0] * x[1], x[2]), recipe)) for recipe in tqdm.tqdm_notebook(quantities_recipes) if recipe not in to_remove]
    
    ingredient_weight = []
    
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        for ingredient in recipe:
            ingredient_weight.append(ingredient)
        
    ingredient_weight = np.array(ingredient_weight)
    ingredient_weight_df = pd.DataFrame(ingredient_weight, columns=['weight', 'ingredient'])
    ingredient_weight_df = ingredient_weight_df.astype({'weight': float})
    
    median_weight_by_ingredient_df = ingredient_weight_df.groupby('ingredient').median().reset_index()
    median_weight_by_ingredient_df.to_json('../generated/median_weight_ingredient.json')
        
    return median_weight_by_ingredient_df

In [5]:
reci = [[1.0, 'c.', 'elbow macaroni'],
  [1.0, 'c.', 'American cheese'],
  [0.5, 'c.', 'celery'],
  [0.5, 'c.', 'green peppers'],
  [3.0, 'tbsp', 'pimentos'],
  [0.5, 'c.', 'mayonnaise'],
  [1.0, 'tbsp', 'vinegar'],
  [0.75, 'teaspoon', 'salt'],
  [0.5, 'teaspoon', 'dry dill weed']]

recip2 = [[12.0, 'cup', 'shredded coconut'],
  [1.0, 'lb', 'lean ground beef'],
  [1.5, 'tablespoon', 'fresh garlic'],
  [2.5, 'g', 'kosher salt'],
  [1.0, 'tablespoon', 'lemon juice'],
  [1.0, 'tablespoon', 'soy sauce'],
  [2.0, 'tablespoon', 'cornstarch'],
  [8.0, 'ounce', 'pineapple chunks'],
  [16.0, 'ounce', 'mandarin oranges'],
  [12.0, 'cup', 'cashew nuts']]

def map_one_recipe_usda(recipe, mapping_usda_id):
    recipe_copy = []
    for ingr in recipe:
        recipe_copy.append(ingr.copy())
    
    failure = False
    for ingr in recipe_copy:
        try:
            ingr[1] = convert_gr[ingr[1]]
            try:
                ingr[2] = mapping_usda_id[clean.clean_ingredient(ingr[2])]
            except KeyError:
                print(ingr[2])
                failure = True
        except KeyError:
            failure = True
            
    if(failure):
        print('Mapping of the recipe has failed.')
        
    recipe_copy = list(map(lambda x: (x[0] * x[1], x[2]), recipe_copy))
    return recipe_copy

def compute_profile(recipe, nutrients_mapping):
    
    nutrients_mapping = nutrients_mapping.reset_index()
    nutrients_mapping = nutrients_mapping.fillna(0.0)
    fat = 0
    sat_fat = 0
    sugar = 0
    salt = 0
    total_weight = 0
    
    for ingr in recipe:
        total_weight += ingr[0]
        view = nutrients_mapping[nutrients_mapping['food_id'] == ingr[1]]['nutr_per_100g'] * (ingr[0] / 100)
        fat += view['Total lipid (fat)'].values[0]
        sat_fat += view['Fatty acids, total saturated'].values[0]
        sugar += view['Sugars, total'].values[0]
        salt += view['Sodium, Na'].values[0] / 1000
                
    ratio = (100 / total_weight)
    return fat * ratio, sat_fat * ratio, sugar * ratio, salt * ratio

def fetch_profile_ingr(ingr, nutrients_mapping):
    nutrients_mapping = nutrients_mapping.reset_index()
    nutrients_mapping = nutrients_mapping.fillna(0.0)
    
    view = nutrients_mapping[nutrients_mapping['food_id'] == ingr[1]]['nutr_per_100g'] * (ingr[0] / 100)
    fat = view['Total lipid (fat)'].values[0]
    sat_fat = view['Fatty acids, total saturated'].values[0]
    sugar = view['Sugars, total'].values[0]
    salt = view['Sodium, Na'].values[0] / 1000
    
    return fat, sat_fat, sugar, salt
    

def score(fat, sat_fat, sugar, salt):
    score_fat = score_cat(fat, 3.0, 17.5)
    score_sat_fat = score_cat(sat_fat, 1.5, 5.0)
    score_sugar = score_cat(sugar, 5.0, 22.5)
    score_salt = score_cat(salt, 0.3, 1.5)
    score_total = {'fat': score_fat,
                   'sat_fat' : score_sat_fat,
                   'sugar' : score_sugar,
                   'salt' : score_salt,
                   'total' : score_fat + score_sat_fat + score_sugar + score_salt}
    return score_total
    
def score_cat(cat, medium, upper):
    if (cat < medium):
        return 0
    elif (cat > medium and cat < upper):
        return 4
    else:
        return 20
    
def beautiful_print(recipe, mapping_usda_id, nutrients_mapping):
    total_weight = 0
    mapped_recipe = map_one_recipe_usda(recipe, mapping_usda_id)
    table_nut = []
    
    for ingr in mapped_recipe:
        total_weight += ingr[0]
        table_nut.append(fetch_profile_ingr(ingr, nutrients_mapping)) 
        
    for e, ingr in enumerate(table_nut):
        weight = (mapped_recipe[e][0] / total_weight) * 100
        print(str(recipe[e][2]) + ': ' + "{0:.2f}".format(weight) + '%' + ' => content(grams): ' + "(fat={0:.4f}, sat_fat={1:.4f}, sugar={2:.4f}, salt={3:.4f})".format(ingr[0], ingr[1], ingr[2], ingr[3]))
        
        
def compute_healthiness(recipe, mapping_usda_id, nutrients_mapping):
    mapped_recipe = map_one_recipe_usda(recipe, mapping_usda_id)
    fat, sat_fat, sugar, salt = compute_profile(mapped_recipe, nutrients_mapping)
    score_total = score(fat, sat_fat, sugar, salt)
    beautiful_print(recipe, mapping_usda_id, nutrients_mapping)
    return score_total

In [6]:
compute_healthiness(reci, mapping_usda_id, nutrients_mapping)

elbow macaroni: 25.08% => content(grams): (fat=1.1904, sat_fat=0.2253, sugar=0.7168, salt=0.0013)
American cheese: 25.08% => content(grams): (fat=17.9200, sat_fat=11.2512, sugar=9.9072, salt=1.7216)
celery: 12.54% => content(grams): (fat=0.1088, sat_fat=0.0269, sugar=0.8576, salt=0.0512)
green peppers: 12.54% => content(grams): (fat=0.1088, sat_fat=0.0371, sugar=1.5360, salt=0.0019)
pimentos: 8.40% => content(grams): (fat=0.1287, sat_fat=0.0193, sugar=1.1626, salt=0.0060)
mayonnaise: 12.54% => content(grams): (fat=49.7920, sat_fat=6.9018, sugar=0.1920, salt=0.3110)
vinegar: 2.80% => content(grams): (fat=0.0000, sat_fat=0.0000, sugar=2.1379, salt=0.0033)
salt: 0.62% => content(grams): (fat=0.0000, sat_fat=0.0000, sugar=0.0000, salt=1.2209)
dry dill weed: 0.41% => content(grams): (fat=0.0235, sat_fat=0.0013, sugar=0.0000, salt=0.0013)


{'fat': 4, 'salt': 4, 'sat_fat': 4, 'sugar': 0, 'total': 12}

In [7]:
compute_healthiness(recip2, mapping_usda_id, nutrients_mapping)

shredded coconut: 35.90% => content(grams): (fat=346.3680, sat_fat=308.7206, sugar=693.6576, salt=3.7018)
lean ground beef: 10.60% => content(grams): (fat=12.2016, sat_fat=4.6811, sugar=0.0000, salt=0.2495)
fresh garlic: 0.50% => content(grams): (fat=0.1073, sat_fat=0.0191, sugar=0.2145, salt=0.0036)
kosher salt: 0.06% => content(grams): (fat=0.0000, sat_fat=0.0000, sugar=0.0000, salt=0.9690)
lemon juice: 0.33% => content(grams): (fat=0.0343, sat_fat=0.0057, sugar=0.3604, salt=0.0001)
soy sauce: 0.33% => content(grams): (fat=0.0143, sat_fat=0.0016, sugar=0.2431, salt=0.7988)
cornstarch: 0.67% => content(grams): (fat=0.0143, sat_fat=0.0026, sugar=0.0000, salt=0.0026)
pineapple chunks: 5.24% => content(grams): (fat=0.2240, sat_fat=0.0157, sugar=47.2640, salt=0.0045)
mandarin oranges: 10.47% => content(grams): (fat=1.3888, sat_fat=0.1747, sugar=47.3984, salt=0.0090)
cashew nuts: 35.90% => content(grams): (fat=673.5360, sat_fat=119.5469, sugar=90.7776, salt=0.1843)


{'fat': 20, 'salt': 0, 'sat_fat': 20, 'sugar': 4, 'total': 44}