In [1]:
import sys
# Installation  des d√©pendances
!{sys.executable} -m pip install -q -r "../requirements.txt"

import pandas as pd
import requests
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Configuration du style des graphiques
sns.set_theme(style="whitegrid", context="talk")

In [2]:
def fetch_openfoodfacts_data(category, page_size=100):
    """
    R√©cup√®re les donn√©es nutritionnelles d'une cat√©gorie donn√©e sur OpenFoodFacts.
    Args:
        category (str): La cat√©gorie de produits (ex: 'biscuits')
        page_size (int): Nombre de produits √† r√©cup√©rer
    """
    url = "https://world.openfoodfacts.org/cgi/search.pl"
    params = {
        "action": "process",
        "tagtype_0": "categories",
        "tag_contains_0": "contains",
        "tag_0": category,
        "page_size": page_size,
        "json": 1,
        "fields": "product_name,brands,nutriscore_grade,nutriments,nova_group" 
    }
    
    print(f"üì° Interrogation de l'API pour : {category}...")
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status() # V√©rifie les erreurs HTTP
        return response.json().get('products', [])
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Erreur de connexion : {e}")
        return []

# Test de r√©cup√©ration
data_biscuits = fetch_openfoodfacts_data("biscuits", 200)
data_cereals = fetch_openfoodfacts_data("breakfast_cereals", 200)
raw_data = data_biscuits + data_cereals
print(f"‚úÖ Succ√®s : {len(raw_data)} produits bruts r√©cup√©r√©s.")

üì° Interrogation de l'API pour : biscuits...
üì° Interrogation de l'API pour : breakfast_cereals...
‚úÖ Succ√®s : 200 produits bruts r√©cup√©r√©s.


In [3]:
data_biscuits

[{'brands': 'LU',
  'nova_group': 4,
  'nutriments': {'alcohol': 0,
   'alcohol_100g': 0,
   'alcohol_serving': 0,
   'alcohol_unit': '% vol',
   'alcohol_value': 0,
   'carbohydrates': 14,
   'carbohydrates_100g': 70,
   'carbohydrates_prepared_unit': 'g',
   'carbohydrates_serving': 14,
   'carbohydrates_unit': 'g',
   'carbohydrates_value': 14,
   'carbon-footprint-from-known-ingredients_100g': 15,
   'carbon-footprint-from-known-ingredients_product': 45,
   'carbon-footprint-from-known-ingredients_serving': 3,
   'energy': 392,
   'energy-kcal': 93,
   'energy-kcal_100g': 465,
   'energy-kcal_prepared_unit': 'kcal',
   'energy-kcal_serving': 93,
   'energy-kcal_unit': 'kcal',
   'energy-kcal_value': 93,
   'energy-kcal_value_computed': 94.3,
   'energy-kj': 392,
   'energy-kj_100g': 1960,
   'energy-kj_prepared_unit': 'kJ',
   'energy-kj_serving': 392,
   'energy-kj_unit': 'kJ',
   'energy-kj_value': 392,
   'energy-kj_value_computed': 396,
   'energy_100g': 1960,
   'energy_prepar

In [4]:
# Transformation en DataFrame propre
products_list = []
print(raw_data)
for item in raw_data:
    nutriments = item.get('nutriments', {})
    
    products_list.append({
        'Nom': item.get('product_name', 'Inconnu'),
        'Marque': item.get('brands', 'Inconnu'),
        'Nutriscore': item.get('nutriscore_grade', np.nan), # A, B, C, D, E
        'NOVA': item.get('nova_group', np.nan),             # Degr√© de transformation (1-4)
        'Sucre (g/100g)': nutriments.get('sugars_100g', np.nan),
        'Gras (g/100g)': nutriments.get('fat_100g', np.nan),
        'Sel (g/100g)': nutriments.get('salt_100g', np.nan),
        'Energie (kcal)': nutriments.get('energy-kcal_100g', np.nan)
    })

df = pd.DataFrame(products_list)

df['Nutriscore'] = df['Nutriscore'].str.upper()

print("Aper√ßu des donn√©es propres :")
display(df.head())
print(f"Dimensions finales : {df.shape}")

[{'brands': 'LU', 'nova_group': 4, 'nutriments': {'alcohol': 0, 'alcohol_100g': 0, 'alcohol_serving': 0, 'alcohol_unit': '% vol', 'alcohol_value': 0, 'carbohydrates': 14, 'carbohydrates_100g': 70, 'carbohydrates_prepared_unit': 'g', 'carbohydrates_serving': 14, 'carbohydrates_unit': 'g', 'carbohydrates_value': 14, 'carbon-footprint-from-known-ingredients_100g': 15, 'carbon-footprint-from-known-ingredients_product': 45, 'carbon-footprint-from-known-ingredients_serving': 3, 'energy': 392, 'energy-kcal': 93, 'energy-kcal_100g': 465, 'energy-kcal_prepared_unit': 'kcal', 'energy-kcal_serving': 93, 'energy-kcal_unit': 'kcal', 'energy-kcal_value': 93, 'energy-kcal_value_computed': 94.3, 'energy-kj': 392, 'energy-kj_100g': 1960, 'energy-kj_prepared_unit': 'kJ', 'energy-kj_serving': 392, 'energy-kj_unit': 'kJ', 'energy-kj_value': 392, 'energy-kj_value_computed': 396, 'energy_100g': 1960, 'energy_prepared_unit': 'kJ', 'energy_serving': 392, 'energy_unit': 'kJ', 'energy_value': 392, 'fat': 3.5, '

Unnamed: 0,Nom,Marque,Nutriscore,NOVA,Sucre (g/100g),Gras (g/100g),Sel (g/100g),Energie (kcal)
0,Prince Go√ªt Chocolat,LU,E,4.0,31.5,17.5,0.5,465.0
1,Tonik,ÿπÿ±ÿ®Ÿä,E,2.0,40.0,22.2,0.217,504.0
2,S√©same,Gerbl√©,C,4.0,17.0,18.0,0.38,467.0
3,Henry‚Äôs,Henry's,E,4.0,24.2353,10.8235,0.882353,440.0
4,KING COOKIES,Excelo,E,,32.0,24.0,0.27,483.0


Dimensions finales : (200, 8)


In [6]:
# On supprime les lignes o√π il manque des infos critiques
df = df.dropna(subset=['Sucre (g/100g)', 'Gras (g/100g)', 'Nutriscore'])

# On s'assure qu'on a bien que des Nutriscores valides (A, B, C, D, E)
# Parfois l'API renvoie des erreurs ou des valeurs bizarres
valid_scores = ['A', 'B', 'C', 'D', 'E']
df = df[df['Nutriscore'].isin(valid_scores)]

# Tri pour l'ordre alphab√©tique (Esth√©tique pour les graphes)
df = df.sort_values('Nutriscore')

print(f"‚úÖ Donn√©es pr√™tes pour analyse : {len(df)} produits valides.")
display(df.head())

‚úÖ Donn√©es pr√™tes pour analyse : 195 produits valides.


Unnamed: 0,Nom,Marque,Nutriscore,NOVA,Sucre (g/100g),Gras (g/100g),Sel (g/100g),Energie (kcal)
135,Oat so Simple,"Quaker, Quaker Oats",A,1.0,1.11,7.78,0.0,370.0
134,Copos de avena,Br√ºggen,A,1.0,0.7,7.0,0.02,375.0
133,Irish Organic Jumbo Oats,Flahavan's,A,1.0,1.0,5.75,0.0125,370.0
130,Wholegrain Wheat Bixies,Crownfield - Lidl,A,3.0,3.0,2.25,0.225,362.0
129,original shredded wheat,"Nestl√©, Nestl√© UK Ltd",A,1.0,0.7,2.2,0.05,360.0
