# Integrate FAO with recipes non vege

In [1]:
import pandas as pd
import requests
from pathlib import Path
import json
from pathlib import Path
import sys
import spacy

project_root = Path.cwd().parent
sys.path.append(str(project_root))

from src.utils import *

In [2]:
DATA_DIR = Path(project_root / "data")
RECIPES_FILE = DATA_DIR / "recipes_non_vege_with_ecv.json"
FAO_FILE = DATA_DIR / "fao_clean.json"
OUTPUT_FILE = DATA_DIR / "recipes_non_vege_clean.json"

In [3]:
with open(RECIPES_FILE, "r", encoding="utf-8") as f:
    recipes = json.load(f)

len(recipes)

1097

In [4]:
unique_ing = get_unique_ingredients(recipes)

len(unique_ing)

152

In [5]:
ingredient_groups = {
    "poulet": ["poulet", "aile de poulet", "cuisse de poulet", "blanc de poulet", "pilons de poulet", "filet de poulet", "escalope de poulet"],
    "canard": ["canard", "magret de canard", "aiguillette de canard", "confit de canard", "foie gras de canard"],
    "veau": ["veau", "escalope de veau", "foie de veau", "rôti de veau", "ris de veau", "langue de veau"],
    "porc": ["porc", "cote de porc", "rôti de porc", "travers de porc", "e chine de porc", "jambon"],
    "boeuf": ["boeuf", "haché de boeuf", "cote de boeuf", "rôti de boeuf", "jarret de boeuf"],
    "tomate": ["tomate", "coulis de tomate", "purée de tomate", "pulpe de tomate", "concentré de tomate", "sauce tomate"],
    "yaourt": ["yaourt", "yaourts", "yaourt nature", "yaourt à la grecque", "yaourts brassés"],
    "oignon": ["oignon", "oignons nouveaux", "oignon nouveau"],
    "oeuf": ["oeuf", "oeufs", "blancs d'oeuf", "jaunes d'oeuf"],
    "carotte": ["carotte", "carottes"],
    "poireau": ["poireau", "poireaux", "poireal"],
    "courgette": ["courgette", "courgettes"],
    "pomme de terre": ["pomme de terre", "pommes de terre"],
    "riz": ["riz", "riz basmati", "riz thaï", "riz rond"],
    "saumon": ["saumon", "saumon frais", "saumon fumé"],
    "thon": ["thon", "thon rouge"],
    "lapin": ["lapin entier", "rable de lapin", "cuisse de lapin"],
    "beurre": ["beurre sale", "beurre tendre", "petit beurre", "beurre de cacahuete", "beurre doux", "beurre demi-sel", "beurre de amande", "beurre allege", "beurre special cuisson lui et vir"],
    "lentille": ["lentille verte", "lentille corail"],
    "tofu": ["tofu soyeux"],
    "pomme": ["pommeau"],
    "banane": ["peau de banane", "jus de banane"],
    "sirop d'érable": ["sirop de erabl"]
}


In [6]:
nlp = spacy.load("fr_core_news_sm")
reverse_map = {v: k for k, vals in ingredient_groups.items() for v in vals}

In [7]:
normalized_ing = {normalize_ingredient(i, nlp, reverse_map) for i in unique_ing}
print(normalized_ing)

{'lentille', 'saumon', 'boeuf', 'yaourt', 'beurre', 'crevette', 'carotte', 'lapin', 'porc', 'lieu noir', 'potiron', 'thon', 'poulet', 'veau', 'riz', 'mangue', 'oeuf', 'pomme', 'canard', 'courgette', 'poireau', 'banane', 'pomme de terre', 'moule', 'tomate', 'avocat', 'pate avoir pizza', 'bleu de auvergne', 'boulgour', 'crevette rose'}


In [8]:
len(normalized_ing)

30

In [9]:
with open(FAO_FILE, "r", encoding="utf-8") as f:
    fao_table = json.load(f)

In [10]:
ing_to_fao = ing_to_fao_match(fao_table, normalized_ing)

In [11]:
ing_with_empty_fao = get_empty_fao(ing_to_fao)


In [12]:
ing_nutrition = {
    "lapin": {"kcal_per_g": 1.75, "protein_per_g": 0.314, "fat_per_g": 0.071},
    "crevette rose": {"kcal_per_g": 0.88, "protein_per_g": 0.18, "fat_per_g": 0.01},
    "pomme de terre": {"kcal_per_g": 1.68, "protein_per_g": 0.03, "fat_per_g": 0.002},
    "crevette": {"kcal_per_g": 0.88, "protein_per_g": 0.18, "fat_per_g": 0.01},
    "veau": {"kcal_per_g": 1.17, "protein_per_g": 0.2098, "fat_per_g": 0.0308},
    "saumon": {"kcal_per_g": 2.06, "protein_per_g": 0.22, "fat_per_g": 0.13},
    "pate avoir pizza": {"kcal_per_g": 2.91, "protein_per_g": 0.07, "fat_per_g": 0.07},
    "thon": {"kcal_per_g": 2.20, "protein_per_g": 0.33, "fat_per_g": 0.044},
    "courgette": {"kcal_per_g": 0.17, "protein_per_g": 0.012, "fat_per_g": 0.003},
    "bleu de auvergne": {"kcal_per_g": 3.53, "protein_per_g": 0.21, "fat_per_g": 0.28},
    "lieu noir": {"kcal_per_g": 1.05, "protein_per_g": 0.23, "fat_per_g": 0.01},
    "sirop d'érable": {"kcal_per_g": 3.67, "protein_per_g": 0, "fat_per_g": 0},
    "pâte à pizza": {"kcal_per_g": 2.57, "protein_per_g": 0.23, "fat_per_g": 0.46},
    "feta": {"kcal_per_g": 2.64, "protein_per_g": 0.21, "fat_per_g": 0.21},
    "blette": {"kcal_per_g": 0.30, "protein_per_g": 0.02, "fat_per_g": 0.002},
    "courgette": {"kcal_per_g": 0.18, "protein_per_g": 0.02, "fat_per_g": 0.004},
    "bleu": {"kcal_per_g": 3.53, "protein_per_g": 0.75, "fat_per_g": 1.04},
    "pomme de terre": {"kcal_per_g": 0.90, "protein_per_g": 0.02, "fat_per_g": 0},
    "tofu": {"kcal_per_g": 0.83, "protein_per_g": 0.08, "fat_per_g": 0.053},
    "liqueur de pomme": {"kcal_per_g": 3.67, "protein_per_g": 0, "fat_per_g": 0},
    "farine de lentille": {"kcal_per_g": 3.46, "protein_per_g": 0.25, "fat_per_g": 0.02},
    "toblerone": {"kcal_per_g": 5.56, "protein_per_g": 0.06, "fat_per_g": 0.31},
    "baguette": {"kcal_per_g": 2.80, "protein_per_g": 0.09, "fat_per_g": 0.03}
}


In [13]:
ing_to_fao = get_fao_info(ing_to_fao, ing_nutrition)

In [14]:
ing_to_fao = normalize_fao_info(ing_to_fao)

In [15]:
recipes = calculate_recipe_nutrients(recipes, ing_to_fao)

In [16]:
# Check
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(recipes, f, ensure_ascii=False, indent=4)

# Same procedure for recipe vege

In [17]:
RECIPES_FILE = DATA_DIR / "recipes_vege_with_ecv.json"
OUTPUT_FILE = DATA_DIR / "recipes_vege_clean.json"

In [18]:
with open(RECIPES_FILE, "r", encoding="utf-8") as f:
    recipes = json.load(f)

len(recipes)

990

In [19]:
unique_ing = get_unique_ingredients(recipes)
len(unique_ing)

94

In [20]:
normalized_ing = {normalize_ingredient(i, nlp, reverse_map) for i in unique_ing}
print(normalized_ing)

{'lentille', 'yaourt', 'beurre', 'carotte', "sirop d'érable", 'farine de lentille', 'feta', 'potiron', 'blette', 'riz', 'mangue', 'liqueur de pomme', 'oeuf', 'toblerone', 'pomme', 'courgette', 'poireau', 'banane', 'tofu', 'bleu', 'pomme de terre', 'laitue', 'tomate', 'avocat', 'pate avoir pizza', 'boulgour', 'baguette'}


In [21]:
ing_to_fao = ing_to_fao_match(fao_table, normalized_ing)

In [22]:
ing_with_empty_fao = get_empty_fao(ing_to_fao)

In [23]:
ing_to_fao = get_fao_info(ing_to_fao, ing_nutrition)

In [24]:
ing_to_fao = normalize_fao_info(ing_to_fao)

In [25]:
recipes = calculate_recipe_nutrients(recipes, ing_to_fao)

In [26]:
# Check
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(recipes, f, ensure_ascii=False, indent=4)

# Combine the dataset

In [27]:
RECIPES_VEGE = DATA_DIR / "recipes_vege_clean.json"
RECIPES_NON_VEGE = DATA_DIR / "recipes_non_vege_clean.json"
OUTPUT_FILE = DATA_DIR / "all_recipes_clean.json"

In [28]:
with open(RECIPES_VEGE, "r", encoding="utf-8") as f:
    recipes_vege = json.load(f)

with open(RECIPES_NON_VEGE, "r", encoding="utf-8") as f:
    recipes_non_vege = json.load(f)


In [29]:
for recipe in recipes_vege:
    recipe["is_vege"] = 1

for recipe in recipes_non_vege:
    recipe["is_vege"] = 0

all_recipes = recipes_vege + recipes_non_vege

In [30]:
len(all_recipes)

2087

In [31]:
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(all_recipes, f, ensure_ascii=False, indent=2)