Open Food Facts 

In [1]:
import requests
import pandas as pd

In [4]:
def get_openfoodfacts_data(search_term):
    url = f"https://world.openfoodfacts.org/cgi/search.pl?search_terms={search_term}&search_simple=1&action=process&json=1"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        
        if data['products']:
            # Créer un DataFrame avec les informations pertinentes
            products = []
            for product in data['products']:
                products.append({
                    'name': product.get('product_name', ''),
                    'code': product.get('code', ''),
                    'energy_100g': product.get('nutriments', {}).get('energy-kcal_100g', None),
                    'proteins_100g': product.get('nutriments', {}).get('proteins_100g', None),
                    'fat_100g': product.get('nutriments', {}).get('fat_100g', None),
                    'carbohydrates_100g': product.get('nutriments', {}).get('carbohydrates_100g', None),
                    'vitamins': product.get('nutriments', {}).get('vitamins', {}),
                    'minerals': product.get('nutriments', {}).get('minerals', {})
                })
            
            return pd.DataFrame(products)
        else:
            print("Aucun produit trouvé.")
            return pd.DataFrame()
            
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la requête: {e}")
        return pd.DataFrame()

# Exemple d'utilisation
df = get_openfoodfacts_data("pomme")
print(df.head())
len(df)

                                      name           code  energy_100g  \
0        Tartine croustillante Authentique  7300400481595        336.0   
1                Lightly sea salted crisps  5060042641000        520.0   
2                                   FIBRES  7300400481588        333.0   
3  Krisprolls complets sans sucres ajoutés  7311070032611        400.0   
4      Tartines craquantes au sarrasin imp       06175700        388.0   

   proteins_100g  fat_100g  carbohydrates_100g vitamins minerals  
0            9.0       1.5                62.0       {}       {}  
1            7.0      29.3                53.3       {}       {}  
2           13.0       5.0                46.0       {}       {}  
3           13.0       6.6                68.0       {}       {}  
4           13.4       2.8                75.0       {}       {}  


50

Pas intéressante pas de vitamines et de minéraux enregistré

USDA FoodData Central

In [5]:
USDA_API_KEY = "NF6htPXty8W2OCcONW4Su9zALw0QQaIKqfrQXvqf" 

def search_usda_foods(query, max_results=10):
    url = "https://api.nal.usda.gov/fdc/v1/foods/search"
    
    params = {
        'api_key': USDA_API_KEY,
        'query': query,
        'pageSize': max_results,
        'dataType': ["Survey (FNDDS)", "Foundation", "Branded"]
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Vérifie les erreurs HTTP
        
        data = response.json()
        return parse_usda_data(data)
        
    except requests.exceptions.RequestException as e:
        print(f"Erreur API: {e}")
        return pd.DataFrame()

def parse_usda_data(api_response):
    foods = []
    
    for food in api_response.get('foods', []):
        # Créer un dictionnaire pour les nutriments
        nutrients = {n['nutrientName']: n['value'] for n in food.get('foodNutrients', [])}
        
        # Structurez les données importantes
        food_data = {
            'id': food['fdcId'],
            'nom': food.get('description', 'Inconnu'),
            'marque': food.get('brandOwner', ''),
            'type': food.get('dataType', ''),
            'calories': nutrients.get('Energy', None),
            'protéines': nutrients.get('Protein', None),
            'lipides': nutrients.get('Total lipid (fat)', None),
            'glucides': nutrients.get('Carbohydrate, by difference', None),
            'fibres': nutrients.get('Fiber, total dietary', None),
            'sucres': nutrients.get('Sugars, total', None),
            'vitamine_c': nutrients.get('Vitamin C, total ascorbic acid', None),
            'calcium': nutrients.get('Calcium, Ca', None),
            'fer': nutrients.get('Iron, Fe', None)
        }
        
        foods.append(food_data)
    
    return pd.DataFrame(foods)

In [None]:
df = search_usda_foods("apple", max_results=5)
df

Unnamed: 0,id,nom,marque,type,calories,protéines,lipides,glucides,fibres,sucres,vitamine_c,calcium,fer
0,454004,APPLE,TREECRISP 2 GO,Branded,52.0,0.0,0.65,14.3,3.2,,3.1,0.0,0.23
1,2117388,APPLE,"Associated Wholesale Grocers, Inc.",Branded,46.0,0.0,0.0,11.7,,,32.5,,
2,2124902,APPLE,Oneonta Trading Corporation,Branded,54.0,0.41,0.0,14.0,2.1,,2.0,8.0,0.15
3,2709294,"Apple, candied",,Survey (FNDDS),134.0,1.34,2.15,29.61,1.8,,3.6,39.0,0.12
4,2709215,"Apple, raw",,Survey (FNDDS),61.0,0.17,0.15,14.8,2.1,,4.6,5.0,0.03


In [9]:
from collections import defaultdict


def get_all_nutrients(fdc_id):
    """Récupère tous les nutriments pour un aliment spécifique"""
    url = f"https://api.nal.usda.gov/fdc/v1/food/{fdc_id}?api_key={USDA_API_KEY}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Erreur pour l'aliment {fdc_id}: {e}")
        return None

def extract_vitamins_minerals(nutrient_data):
    """Extrait spécifiquement vitamines et minéraux"""
    vitamins = []
    minerals = []
    
    for nutrient in nutrient_data.get('foodNutrients', []):
        name = nutrient.get('nutrientName', '')
        unit = nutrient.get('unitName', '')
        value = nutrient.get('value', 0)
        derivation = nutrient.get('derivationDescription', '')
        
        # Filtre pour vitamines
        if 'vitamin' in name.lower() or any(v in name.lower() for v in ['folate', 'retinol']):
            vitamins.append({
                'nutrient': name,
                'amount': value,
                'unit': unit,
                'derivation': derivation
            })
        
        # Filtre pour minéraux
        elif any(m in name.lower() for m in ['mineral', 'calcium', 'iron', 'magnesium', 
                                           'phosphorus', 'potassium', 'sodium', 'zinc',
                                           'copper', 'manganese', 'selenium', 'iodine']):
            minerals.append({
                'nutrient': name,
                'amount': value,
                'unit': unit,
                'derivation': derivation
            })
    
    return vitamins, minerals

def search_foods_with_details(query, max_results=5):
    """Recherche avancée avec détails complets"""
    search_url = "https://api.nal.usda.gov/fdc/v1/foods/search"
    params = {
        'api_key': USDA_API_KEY,
        'query': query,
        'pageSize': max_results,
        'dataType': ["Survey (FNDDS)", "Foundation", "Branded"]
    }
    
    try:
        # Recherche initiale
        response = requests.get(search_url, params=params)
        response.raise_for_status()
        search_data = response.json()
        
        results = []
        
        # Pour chaque résultat, obtenir les détails complets
        for food in search_data.get('foods', []):
            fdc_id = food['fdcId']
            details = get_all_nutrients(fdc_id)
            
            if details:
                vitamins, minerals = extract_vitamins_minerals(details)
                
                food_info = {
                    'id': fdc_id,
                    'nom': details.get('description', 'Inconnu'),
                    'marque': details.get('brandOwner', 'Non spécifié'),
                    'catégorie': details.get('foodCategory', 'Non spécifié'),
                    'vitamines': vitamins,
                    'minéraux': minerals,
                    'source': details.get('dataType', '')
                }
                
                results.append(food_info)
        
        return results
    
    except requests.exceptions.RequestException as e:
        print(f"Erreur de recherche: {e}")
        return []

def format_results_for_display(results):
    """Formate les résultats pour une lecture facile"""
    formatted = []
    
    for item in results:
        # Formatage des vitamines
        vitam_str = "\n".join([
            f"- {v['nutrient']}: {v['amount']} {v['unit']} ({v['derivation']})" 
            for v in item['vitamines']
        ])
        
        # Formatage des minéraux
        miner_str = "\n".join([
            f"- {m['nutrient']}: {m['amount']} {m['unit']} ({m['derivation']})" 
            for m in item['minéraux']
        ])
        
        formatted.append(
            f"\nNom: {item['nom']} ({item['marque']})\n"
            f"Catégorie: {item['catégorie']}\n"
            f"Source: {item['source']}\n\n"
            f"VITAMINES:\n{vitam_str}\n\n"
            f"MINÉRAUX:\n{miner_str}\n"
            f"{'-'*50}"
        )
    
    return "\n".join(formatted)

In [11]:
query = input("Entrez un aliment à rechercher: ")
    
results = search_foods_with_details(query, max_results=3)
    
if results:
    print("\nRÉSULTATS:")
    print(format_results_for_display(results))
        
        # Export en CSV si besoin
    df = pd.DataFrame([{
        'Nom': r['nom'],
        'Marque': r['marque'],
        **{v['nutrient']: f"{v['amount']} {v['unit']}" for v in r['vitamines']},
        **{m['nutrient']: f"{m['amount']} {m['unit']}" for m in r['minéraux']}
    } for r in results])
        
    df.to_csv(f"nutrition_{query}.csv", index=False, encoding='utf-8-sig')
    print(f"\nDonnées sauvegardées dans nutrition_{query}.csv")
else:
    print("Aucun résultat trouvé.")


RÉSULTATS:

Nom: BROCCOLI (Wal-Mart Stores, Inc.)
Catégorie: Non spécifié
Source: Branded

VITAMINES:


MINÉRAUX:

--------------------------------------------------

Nom: BROCCOLI (Curation Foods, Inc.)
Catégorie: Non spécifié
Source: Branded

VITAMINES:


MINÉRAUX:

--------------------------------------------------

Nom: BROCCOLI (OCEAN MIST)
Catégorie: Non spécifié
Source: Branded

VITAMINES:


MINÉRAUX:

--------------------------------------------------

Données sauvegardées dans nutrition_Broccoli.csv


In [13]:
import requests
import pandas as pd
from pprint import pp

def explore_all_fields(api_key, food_name="apple", max_results=1):
    """Fonction pour explorer toutes les colonnes disponibles"""
    # 1. Recherche initiale
    search_url = "https://api.nal.usda.gov/fdc/v1/foods/search"
    params = {
        'api_key': api_key,
        'query': food_name,
        'pageSize': max_results
    }
    
    try:
        # 2. Obtenir les premiers résultats
        response = requests.get(search_url, params=params)
        response.raise_for_status()
        search_data = response.json()
        
        if not search_data['foods']:
            print("Aucun résultat trouvé.")
            return

        # 3. Prendre le premier aliment et obtenir ses données complètes
        fdc_id = search_data['foods'][0]['fdcId']
        detail_url = f"https://api.nal.usda.gov/fdc/v1/food/{fdc_id}"
        detail_response = requests.get(detail_url, params={'api_key': api_key})
        detail_data = detail_response.json()
        
        print("\nSTRUCTURE COMPLÈTE DES DONNÉES:")
        pp(detail_data, depth=2, width=100, compact=True)
        
        # 4. Extraire toutes les colonnes possibles
        print("\n\nCATÉGORIES PRINCIPALES ET SOUS-CATÉGORIES:")
        explore_structure(detail_data)
        
        # 5. Extraire tous les nutriments disponibles
        print("\n\nLISTE COMPLÈTE DES NUTRIMENTS:")
        nutrients = detail_data.get('foodNutrients', [])
        all_nutrients = sorted(list({(n['nutrient']['name'], n['nutrient']['unitName']) for n in nutrients}), key=lambda x: x[0])
        
        for nutrient, unit in all_nutrients:
            print(f"- {nutrient} ({unit})")
            
        return detail_data
        
    except requests.exceptions.RequestException as e:
        print(f"Erreur API: {e}")

def explore_structure(data, prefix=""):
    """Fonction récursive pour explorer la structure des données"""
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, (dict, list)):
                print(f"{prefix}{key}: {type(value).__name__}")
                explore_structure(value, prefix + "  ")
    elif isinstance(data, list) and data:
        explore_structure(data[0], prefix)

In [18]:
food_to_explore = "broccoli" 
    
print(f"Exploration des données pour: {food_to_explore}")
complete_data = explore_all_fields(USDA_API_KEY, food_to_explore)
    
if complete_data:
    import json
    with open(f"usda_structure_{food_to_explore}.json", "w") as f:
        json.dump(complete_data, f, indent=2)
    print(f"\nDonnées complètes sauvegardées dans usda_structure_{food_to_explore}.json")

Exploration des données pour: broccoli

STRUCTURE COMPLÈTE DES DONNÉES:
{'discontinuedDate': '',
 'foodComponents': [],
 'foodAttributes': [{...}, {...}, {...}],
 'foodPortions': [],
 'fdcId': 2549992,
 'description': 'BROCCOLI',
 'publicationDate': '5/25/2023',
 'foodNutrients': [{...}, {...}, {...}, {...}, {...}, {...}, {...}, {...}, {...}, {...}, {...},
                   {...}, {...}, {...}],
 'dataType': 'Branded',
 'foodClass': 'Branded',
 'shortDescription': '',
 'modifiedDate': '4/3/2023',
 'availableDate': '4/3/2023',
 'brandOwner': 'Wal-Mart Stores, Inc.',
 'brandName': 'GREAT VALUE',
 'dataSource': 'LI',
 'brandedFoodCategory': 'Frozen Vegetables',
 'gtinUpc': '078742237329',
 'householdServingFullText': '1 cup',
 'ingredients': 'BROCCOLI FLORETS.',
 'marketCountry': 'United States',
 'servingSize': 85.0,
 'servingSizeUnit': 'GRM',
 'packageWeight': '12 oz/340 g',
 'foodUpdateLog': [{...}, {...}, {...}, {...}, {...}],
 'labelNutrients': {'fat': {...},
                    'sa

In [19]:
import json
from collections import defaultdict

def lister_tous_nutriments(fichier_json):
    # Charger le fichier JSON
    with open(fichier_json, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Initialiser un dictionnaire pour catégoriser les nutriments
    categories = defaultdict(list)
    
    # Parcourir tous les nutriments
    for nutriment in data.get('foodNutrients', []):
        nom = nutriment.get('nutrient', {}).get('name', 'Inconnu')
        unite = nutriment.get('nutrient', {}).get('unitName', '')
        
        # Catégorisation des nutriments
        if 'vitamin' in nom.lower():
            categories['Vitamines'].append(f"{nom} ({unite})")
        elif any(mineral in nom.lower() for mineral in ['sodium', 'calcium', 'iron', 'potassium', 'magnesium', 'zinc', 'phosphorus']):
            categories['Minéraux'].append(f"{nom} ({unite})")
        elif 'fat' in nom.lower() or 'lipid' in nom.lower():
            categories['Lipides'].append(f"{nom} ({unite})")
        elif 'protein' in nom.lower():
            categories['Protéines'].append(f"{nom} ({unite})")
        elif 'carbohydrate' in nom.lower():
            categories['Glucides'].append(f"{nom} ({unite})")
        elif 'acid' in nom.lower():
            categories['Acides Aminés'].append(f"{nom} ({unite})")
        elif 'fiber' in nom.lower():
            categories['Fibres'].append(f"{nom} ({unite})")
        else:
            categories['Autres'].append(f"{nom} ({unite})")
    
    # Afficher les résultats par catégorie
    for categorie, nutriments in categories.items():
        print(f"\n=== {categorie.upper()} ===")
        for nut in sorted(nutriments):
            print(f"- {nut}")

# Utilisation
if __name__ == "__main__":
    fichier_json = "usda_structure_banana.json"  # Remplacez par votre fichier
    lister_tous_nutriments(fichier_json)


=== LIPIDES ===
- Fatty acids, total monounsaturated (g)
- Fatty acids, total polyunsaturated (g)
- Fatty acids, total saturated (g)
- Fatty acids, total trans (g)
- Total lipid (fat) (g)

=== MINÉRAUX ===
- Calcium, Ca (mg)
- Iron, Fe (mg)
- Sodium, Na (mg)

=== PROTÉINES ===
- Protein (g)

=== AUTRES ===
- Cholesterol (mg)
- Energy (kcal)
- Total Sugars (g)

=== VITAMINES ===
- Vitamin A, IU (IU)
- Vitamin C, total ascorbic acid (mg)

=== GLUCIDES ===
- Carbohydrate, by difference (g)

=== FIBRES ===
- Fiber, total dietary (g)


In [20]:
def analyser_nutriments_complet(fichier_json):
    with open(fichier_json, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Dictionnaire plus détaillé
    classification = {
        'Vitamines': [],
        'Minéraux': [],
        'Acides Gras': [],
        'Acides Aminés': [],
        'Énergie': [],
        'Macronutriments': [],
        'Composés Phytochimiques': [],
        'Autres': []
    }
    
    # Mapping des motifs pour classification automatique
    motifs = {
        'Vitamines': ['vitamin', 'retinol', 'tocopherol', 'carotene', 'folate', 'niacin', 'riboflavin'],
        'Minéraux': ['calcium', 'iron', 'magnesium', 'phosphorus', 'potassium', 'sodium', 'zinc', 'copper', 'manganese', 'selenium', 'iodine', 'chromium', 'fluoride', 'molybdenum', 'chloride'],
        'Acides Gras': ['fatty acid', 'saturated', 'monounsaturated', 'polyunsaturated', 'omega', 'cholesterol', 'trans fat'],
        'Acides Aminés': ['tryptophan', 'threonine', 'isoleucine', 'leucine', 'lysine', 'methionine', 'cystine', 'phenylalanine', 'tyrosine', 'valine', 'arginine', 'histidine', 'alanine', 'aspartic acid', 'glutamic acid', 'glycine', 'proline', 'serine'],
        'Énergie': ['energy', 'calorie'],
        'Macronutriments': ['protein', 'carbohydrate', 'fiber', 'sugar', 'water'],
        'Composés Phytochimiques': ['phytosterol', 'polyphenol', 'flavonoid', 'anthocyanin', 'carotenoid', 'lycopene']
    }
    
    # Analyse
    for nutriment in data.get('foodNutrients', []):
        nom = nutriment.get('nutrient', {}).get('name', '').lower()
        unite = nutriment.get('nutrient', {}).get('unitName', '')
        valeur = nutriment.get('amount', 0)
        
        trouve = False
        for categorie, termes in motifs.items():
            if any(terme in nom for terme in termes):
                classification[categorie].append({
                    'nom': nutriment['nutrient']['name'],
                    'unite': unite,
                    'valeur': valeur
                })
                trouve = True
                break
        
        if not trouve:
            classification['Autres'].append({
                'nom': nutriment['nutrient']['name'],
                'unite': unite,
                'valeur': valeur
            })
    
    # Génération du rapport
    print("=== RAPPORT COMPLET DES NUTRIMENTS ===")
    print(f"Aliment analysé: {data.get('description', 'Inconnu')}\n")
    
    for categorie, nutriments in classification.items():
        if nutriments:
            print(f"\n◆ {categorie.upper()} ({len(nutriments)} nutriments)")
            for nut in sorted(nutriments, key=lambda x: x['nom']):
                print(f"- {nut['nom']}: {nut['valeur']} {nut['unite']}")
    
    # Statistiques
    total_nutriments = sum(len(v) for v in classification.values())
    print(f"\nTotal de nutriments identifiés: {total_nutriments}")
    print("Répartition par catégorie:")
    for categorie, nutriments in classification.items():
        if nutriments:
            print(f"- {categorie}: {len(nutriments)} ({len(nutriments)/total_nutriments:.1%})")

# Utilisation
analyser_nutriments_complet("usda_structure_banana.json")

=== RAPPORT COMPLET DES NUTRIMENTS ===
Aliment analysé: BANANA


◆ VITAMINES (2 nutriments)
- Vitamin A, IU: 0.0 IU
- Vitamin C, total ascorbic acid: 15.0 mg

◆ MINÉRAUX (3 nutriments)
- Calcium, Ca: 125.0 mg
- Iron, Fe: 1.12 mg
- Sodium, Na: 594.0 mg

◆ ACIDES GRAS (5 nutriments)
- Cholesterol: 0.0 mg
- Fatty acids, total monounsaturated: 3.12 g
- Fatty acids, total polyunsaturated: 3.12 g
- Fatty acids, total saturated: 0.0 g
- Fatty acids, total trans: 0.0 g

◆ ÉNERGIE (1 nutriments)
- Energy: 312.0 kcal

◆ MACRONUTRIMENTS (4 nutriments)
- Carbohydrate, by difference: 40.62 g
- Fiber, total dietary: 6.2 g
- Protein: 12.5 g
- Total Sugars: 6.25 g

◆ AUTRES (1 nutriments)
- Total lipid (fat): 6.25 g

Total de nutriments identifiés: 16
Répartition par catégorie:
- Vitamines: 2 (12.5%)
- Minéraux: 3 (18.8%)
- Acides Gras: 5 (31.2%)
- Énergie: 1 (6.2%)
- Macronutriments: 4 (25.0%)
- Autres: 1 (6.2%)


In [17]:
import pandas as pd

def exporter_vers_csv(fichier_json, output_file):
    with open(fichier_json, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    rows = []
    for nutriment in data.get('foodNutrients', []):
        rows.append({
            'Nutriment': nutriment['nutrient']['name'],
            'Valeur': nutriment.get('amount', 'N/A'),
            'Unité': nutriment['nutrient']['unitName'],
            'Catégorie': 'À classifier'
        })
    
    df = pd.DataFrame(rows)
    df.to_csv(output_file, index=False, encoding='utf-8-sig')
    print(f"Exporté vers {output_file}")

exporter_vers_csv("usda_structure_banana.json", "banana_nutriments.csv")

Exporté vers banana_nutriments.csv
