In [1]:
import pandas as pd
import requests
import json
import os

from difflib import SequenceMatcher

In [2]:
def save_as_edammam_id_food_info(food_id, information):
    path = '../data_discovery/edamam_nutrients/'
    with open(f'{path}edamam__{food_id}.json', 'w') as f:
        json.dump(information, f)
        
def check_if_food_info_downloaded(food_id):
    all_ids = [
        food_id.replace('edamam__', '').replace('.json', '') 
        for food_id 
        in os.listdir('../data_discovery/edamam_nutrients/')
    ]
    if food_id in all_ids:
        path = f'../data_discovery/edamam_nutrients/edamam__{food_id}.json'
        return path
    return None
    

def get_ingredient_nutritional_info(food_id_list):
    url = 'https://api.edamam.com/api/food-database/v2/nutrients?app_id=0abd8d1d&app_key=1d961d26601c2a91f64b6e594255f8e2'
    save_responses = []
        
    for food_id in food_id_list:
        headers = {
          "ingredients": [
            {
              "quantity": 100,
              "measureURI": "http://www.edamam.com/ontologies/edamam.owl#Measure_gram",
              "foodId": f"{food_id}"
            }
          ]
        }
        downloaded_path = check_if_food_info_downloaded(food_id)
        if downloaded_path is not None:
            with open(downloaded_path, 'r') as f:
                actual_response = json.load(f)
        else:
            response = requests.post(url, json=headers)
            actual_response = response.json()
            save_as_edammam_id_food_info(food_id, actual_response)
        relevant_nutrient_info = actual_response.get('totalNutrients')
        response_df = pd.DataFrame(relevant_nutrient_info)
        save_responses.append(response_df)
    if save_responses:
        reduced_responses = pd.concat(save_responses)
        nutritional_info = process_nutritional_information_ingredient(reduced_responses)
    else:
        nutritional_info = 'NO NUTRITIONAL INFO'
    return nutritional_info

def process_nutritional_information_ingredient(ingredients_df):
    ingredients_df_t = ingredients_df.T
    try:
        ingredients_df_t['real_quantity'] = ingredients_df_t['quantity'].mean(axis=1)
    except ValueError:
        ingredients_df_t['real_quantity'] = ingredients_df_t['quantity']
    except KeyError:
        ingredients_df_t['real_quantity'] = None
    ingredients_df_t = ingredients_df_t.loc[:,~ingredients_df_t.columns.duplicated()].copy()
    try:
        reduced_ingredients_df_t = ingredients_df_t.drop(columns='quantity')
    except:
        reduced_ingredients_df_t = ingredients_df_t
    return reduced_ingredients_df_t.T.to_dict()
    
def delete_files(food_id_list):
    for food_id in food_id_list:
        try:
            path = check_if_food_info_downloaded(food_id)
            if path is not None: 
                os.remove(path)
        except ValueError as e:
            print(e)
            
def get_similarity(x, ingredient):
    return SequenceMatcher(a=ingredient, b=x).ratio()

In [3]:
edammam_flavor_db = pd.read_pickle('../data_discovery/edamam_flavor_dbs.pkl')

In [4]:
edammam_flavor_db.head()

Unnamed: 0,category,entity_id,entity_alias_readable,molecules,search_names,json_path,possible_food_ids
0,animalproduct,0,Egg,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",egg,../data_discovery/edamam/edamam_egg.json,[food_bhpradua77pk16aipcvzeayg732r]
1,bakery,1,Bakery Products,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",bakery-products,../data_discovery/edamam/edamam_bakery-product...,[food_asqoa92bsbr9ehafce252bhzucsr]
2,bakery,2,Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",bread,../data_discovery/edamam/edamam_bread.json,[food_a3049hmbqj5wstaeeb3udaz6uaqv]
3,bakery,3,Rye Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",rye-flour,../data_discovery/edamam/edamam_rye-flour.json,[food_aaunsnzbeyyj8rbkei49wb6nxlii]
4,bakery,4,Wheaten Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",soda-bread,../data_discovery/edamam/edamam_soda-bread.json,[food_aqij0r7bw0uducb4m3uhja52emit]


In [5]:
nutritional_info = edammam_flavor_db.possible_food_ids.apply(get_ingredient_nutritional_info)

In [6]:
edammam_flavor_db['nutritional_info'] = nutritional_info

In [8]:
not_missing_edammam_flavors = edammam_flavor_db[~(edammam_flavor_db.nutritional_info=={})]
not_missing_edammam_flavors

Unnamed: 0,category,entity_id,entity_alias_readable,molecules,search_names,json_path,possible_food_ids,nutritional_info
0,animalproduct,0,Egg,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",egg,../data_discovery/edamam/edamam_egg.json,[food_bhpradua77pk16aipcvzeayg732r],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
1,bakery,1,Bakery Products,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",bakery-products,../data_discovery/edamam/edamam_bakery-product...,[food_asqoa92bsbr9ehafce252bhzucsr],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
2,bakery,2,Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",bread,../data_discovery/edamam/edamam_bread.json,[food_a3049hmbqj5wstaeeb3udaz6uaqv],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
3,bakery,3,Rye Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",rye-flour,../data_discovery/edamam/edamam_rye-flour.json,[food_aaunsnzbeyyj8rbkei49wb6nxlii],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
4,bakery,4,Wheaten Bread,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",soda-bread,../data_discovery/edamam/edamam_soda-bread.json,[food_aqij0r7bw0uducb4m3uhja52emit],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
...,...,...,...,...,...,...,...,...
969,vegetable,969,Yellow zucchini,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",yellow-zucchini,../data_discovery/edamam/edamam_yellow-zucchin...,[food_avpihljbuwpd8ibbmahcabaros5s],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
970,fruit-berry,970,Saskatoon berry,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",saskatoon-berry,../data_discovery/edamam/edamam_saskatoon-berr...,[food_ayfmhzfb476v6aa3a57t0bmhy8zu],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
972,fruit,972,Japanese pumpkin,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",kabocha,../data_discovery/edamam/edamam_kabocha.json,[food_a6aiaf8aokpc4gagdiu88b3cj13t],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."
977,meat,977,Guinea hen,"[{'bond_stereo_count': 0, 'undefined_atom_ster...",guinea-hen,../data_discovery/edamam/edamam_guinea-hen.json,[food_bj8vt3nawa2i11bt8d1reaeyzqrl],"{'ENERC_KCAL': {'label': 'Energy', 'unit': 'kc..."


In [13]:
not_missing_edammam_flavors.to_pickle('../data_discovery/edamam_flavor_dbs_nutrients.pkl')

In [242]:
import re
import string

def clean_ingredient(ing):
    ingredient = ing.strip()
    ingredient = ingredient.translate(str.maketrans('', '', string.punctuation))
    return ingredient

ingredients = "whole grain oats, caramel pieces (cane sugar, palm oil, tapioca syrup, sunflower lecithin, rosemary extract for freshness), maple syrup, praline pecans (pecans, cane sugar, tapioca syrup, coconut oil, salt, rosemary extract for freshness), expeller pressed canola oil, honey, whole oat flour, brown rice syrup, oat bran, cane sugar, ground flax seeds, molasses, sea salt, natural flavor, cinnamon."
ingredients = "enriched flour (wheat flour, niacin, reduced iron, thiamin mononitrate, riboflavin, folic acid), sugar, palm oil, corn starch, salt, natural and artificial flavor, leavening (baking soda, monocalcium phosphate), confectioner's glaze, yellow 5, yellow 6, red 40 lake, red 40, blue 1, soy lecithin, carnauba wax."
find_parenthesis_list = re.findall( r'\(([^()]+)\)', ingredients)
ingredients_without_parenthesis = re.sub( r'\(([^()]+)\)', ',parenthesis,', ingredients)
for parenthesis_element in find_parenthesis_list:
    ingredients_without_parenthesis = ingredients_without_parenthesis.replace('parenthesis', parenthesis_element)

ingredient_list = [clean_ingredient(ingredient) for ingredient in ingredients_without_parenthesis.split(',')]
ingredients

"enriched flour (wheat flour, niacin, reduced iron, thiamin mononitrate, riboflavin, folic acid), sugar, palm oil, corn starch, salt, natural and artificial flavor, leavening (baking soda, monocalcium phosphate), confectioner's glaze, yellow 5, yellow 6, red 40 lake, red 40, blue 1, soy lecithin, carnauba wax."

In [241]:
for ingredient in ingredient_list:
    print(ingredient, get_similarity_to_db(ingredient).entity_alias_readable.to_list())

enriched flour ['Flour', 'Rye Bread']
wheat flour ['Flour', 'Wheat', 'Rye Bread']
niacin ['Icing', 'Hyacinth', 'Acorn', 'Onion', 'Nance', 'Cumin', 'Pistachio']
reduced iron ['Red Currant']
thiamin mononitrate []
riboflavin ['Climbing bean']
folic acid ['Focaccia']
 []
sugar ['Sugar', 'Agar', 'Squab', 'Shark', 'Garlic', 'Bulgur']
palm oil ['Chamomile', 'Salmon', 'Almond', 'Peanut Oil', 'Canola Oil', 'Pink salmon']
corn starch ['Ostrich', 'Conch', 'Corn chip', 'Corn salad', 'Corn']
salt ['Salt', 'Malt', 'Shallot', 'Spelt', 'Smelt', 'Snail', 'Walnut', 'Salmon', 'Scallop', 'Muskrat']
natural and artificial flavor []
leavening ['Leavening agent', 'Beaver', 'Shortening']
wheat flour ['Flour', 'Wheat', 'Rye Bread']
niacin ['Icing', 'Hyacinth', 'Acorn', 'Onion', 'Nance', 'Cumin', 'Pistachio']
reduced iron ['Red Currant']
thiamin mononitrate []
riboflavin ['Climbing bean']
folic acid ['Focaccia']
 []
confectioners glaze []
yellow 5 ['Phyllo dough', 'Yellowfin tuna', 'Yellow zucchini']
yellow 6 

In [231]:
ingredient ='grain oats'

def get_similarity_to_db(ingredient):
    similarity = not_missing_edammam_flavors.search_names.apply(
        get_similarity, ingredient=ingredient
    ).sort_values(ascending=False)

    similarity = similarity[similarity > 0.5]

    return not_missing_edammam_flavors.loc[
        similarity.index
    ]

In [204]:
#not_missing_edammam_flavors.search_names.unique()
