In [1]:
import json
import nltk
import inflect
import tqdm
import time
import requests 
from bs4 import BeautifulSoup as bs
import gensim
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import numpy as np
import collections
import json
import sys
sys.path.insert(0, "/Users/Lionel/Documents/Ada/Project/python")
import clean_recipes_datasets as clean
engine = inflect.engine()

In [15]:
# Load the recipes for which we have the quantity for each ingredients 
with open('../generated/1m_quant_usda_recipes.json') as json_data:
    quantities_recipes = json.load(json_data)

# Table of conversion for the unit found in 1M
with open('../generated/convert_gr.json') as json_data:
    convert_gr = json.load(json_data)
    
# Mapping from ingredient to USDA id
with open('../generated/ing_usda_mapping_high_score_3d.json') as json_data:
    mapping_usda_id = json.load(json_data)
    
nutrients_mapping = pd.read_hdf("../generated/nut_data_3d.h5", 'table')

In [None]:
# Get the profile of each recipes contained in 'recipes'
def map_recipes(recipes, mapping):
    profile = []

    for recipe in tqdm.tqdm(recipes):
        recipe_mapped = []
        
        for ingredient in recipe:
            usda_id = ingredient[1]
            res_map = nutrients_mapping.loc[usda_id]
            recipe_mapped.append(res_map.tolist())
  
        recipe_mapped = pd.DataFrame(recipe_mapped)
        profile.append(list())

    profile = pd.DataFrame(profile)
    profile.columns = mapping.columns
    
    return profile

In [None]:
# We convert every quantities units to grams, and delete the recipes that contains unconvertable unit or unmappable ingredients
def recipes_to_usda(quantities_recipes, mapping_usda_id):
    to_remove = []
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        failure = False
        for ingr in recipe:
            try:
                ingr[1] = convert_gr[ingr[1]]
                try:
                    ingr[2] = mapping_usda_id[clean.clean_ingredient(ingr[2])]
                except KeyError:
                    failure = True
            except KeyError:
                failure = True
        if(failure):
            to_remove.append(recipe)
    quantities_recipes = [list(map(lambda x: (x[0] * x[1], x[2]), recipe)) for recipe in tqdm.tqdm_notebook(quantities_recipes) if recipe not in to_remove]

    with open('../generated/recipes_quantities_mapped_usda.json', 'w') as outfile:
        json.dump(quantities_recipes, outfile)
    return quantities_recipes

In [3]:
def median_weight_ingredient(quantities_recipes):
    to_remove = []
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        failure = False
        for ingr in recipe:
            try:
                ingr[1] = convert_gr[ingr[1]]
                ingr[2] = clean.clean_ingredient(ingr[2])
            except KeyError:
                failure = True
        if(failure):
            to_remove.append(recipe)
    quantities_recipes = [list(map(lambda x: (x[0] * x[1], x[2]), recipe)) for recipe in tqdm.tqdm_notebook(quantities_recipes) if recipe not in to_remove]
    
    ingredient_weight = []
    
    for recipe in tqdm.tqdm_notebook(quantities_recipes):
        for ingredient in recipe:
            ingredient_weight.append(ingredient)
        
    ingredient_weight = np.array(ingredient_weight)
    ingredient_weight_df = pd.DataFrame(ingredient_weight, columns=['weight', 'ingredient'])
    ingredient_weight_df = ingredient_weight_df.astype({'weight': float})
    
    median_weight_by_ingredient_df = ingredient_weight_df.groupby('ingredient').median().reset_index()
    median_weight_by_ingredient_df.to_json('../generated/median_weight_ingredient.json')
        
    return median_weight_by_ingredient_df

In [None]:
# Get median weight for each ingredient
ingredient_weight = []

for recipe in tqdm.tqdm_notebook(quantities_recipes):
    for ingredient in recipe:
        ingredient_weight.append(ingredient)
        
ingredient_weight = np.array(ingredient_weight)
ingredient_weight_df = pd.DataFrame(ingredient_weight, columns=['weight', 'ingredient'])
ingredient_weight_df = ingredient_weight_df.astype({'weight': float})

median_weight_by_ingredient = ingredient_weight_df.groupby('ingredient').median().reset_index()

In [5]:

median_weight_ingredient(quantities_recipes)










Unnamed: 0,ingredient,weight
0,,256.000000
1,& chunky salsa,128.000000
2,'n,3.150000
3,abalone,1000.000000
4,abalone steak,453.592000
5,absinthe,28.000000
6,absolut citron vodka,42.000000
7,absolut kurant vodka,35.000000
8,absolut mandarin vodka,40.000000
9,absolut vodka,28.000000


In [84]:
reci = [[1.0, 'c.', 'elbow macaroni'],
  [1.0, 'c.', 'American cheese'],
  [0.5, 'c.', 'celery'],
  [0.5, 'c.', 'green peppers'],
  [3.0, 'tbsp', 'pimentos'],
  [0.5, 'c.', 'mayonnaise'],
  [1.0, 'tbsp', 'vinegar'],
  [0.75, 'teaspoon', 'salt'],
  [0.5, 'teaspoon', 'dry dill weed']]

def map_one_recipe_usda(recipe, mapping_usda_id):
    
    failure = False
    for ingr in recipe:
        try:
            ingr[1] = convert_gr[ingr[1]]
            try:
                ingr[2] = mapping_usda_id[clean.clean_ingredient(ingr[2])]
            except KeyError:
                failure = True
        except KeyError:
            failure = True
            
    if(failure):
        print('Mapping of the recipe has failed.')
        
    recipe = list(map(lambda x: (x[0] * x[1], x[2]), recipe))
    return recipe

def compute_profile(recipe, nutrients_mapping):
    
    nutrients_mapping = nutrients_mapping.reset_index()
    nutrients_mapping = nutrients_mapping.fillna(0.0)
    fat = 0
    sat_fat = 0
    sugar = 0
    salt = 0
    total_weight = 0
    
    for ingr in recipe:
        total_weight += ingr[0]
        view = nutrients_mapping[nutrients_mapping['food_id'] == ingr[1]]['nutr_per_100g'] * (ingr[0] / 100)
        fat += view['Total lipid (fat)'].values[0]
        sat_fat += view['Total lipid (fat)'].values[0]
        sugar += view['Sugars, total'].values[0]
        salt += view['Sodium, Na'].values[0] / 1000
        
    ratio = (100 / total_weight)
    return fat * ratio, sat_fat * ratio, sugar * ratio, salt * ratio

def compute_healthiness(recipe, mapping_usda_id, nutrients_mapping):
    mapped_recipe = map_one_recipe_usda(recipe, mapping_usda_id)
    fat, sat_fat, sugar, salt = compute_profile(mapped_recipe, nutrients_mapping)
    

In [85]:
test = map_one_recipe_usda(reci, mapping_usda_id)

In [None]:
#Fat: 'Total lipid (fat)'
#Saturated fats: 'Fatty acids, total saturated'
#Sugar: 'Sugars, total'
#Salt: 'Sodium, Na'