## Food101 Nutrition and Diabetes Assessment

This Jupyter Notebook extracts nutritional information for the food categories present in the `Food-101` dataset using the Edamam Food Database API and performs a basic assessment of their suitability for individuals with diabetes or prediabetes. The results are saved to a CSV file.

The Food-101 dataset has already been extracted to the `../data/food101/food101/images` directory.

In [1]:
import os
import requests
import pandas as pd
from tqdm import tqdm

In [2]:
# Define paths based on your structure
DATA_DIR = '../data'
FOOD101_DIR = os.path.join(DATA_DIR, 'food101')
META_DIR = os.path.join(FOOD101_DIR, 'food-101', 'meta')
CLASSES_FILE = os.path.join(META_DIR, 'classes.txt')
FOOD101_NUTRITION_CSV = os.path.join(DATA_DIR, 'food101_diabetes_assessment.csv')

In [3]:
def get_food_categories():
    """Read food categories from the classes file"""
    try:
        with open(CLASSES_FILE, 'r') as f:
            categories = [line.strip() for line in f.readlines()]
        return categories
    except FileNotFoundError:
        print(f"Classes file not found at {CLASSES_FILE}")
        print("Trying to find classes by scanning directories...")
        
        # Fallback: Get class names from test directory structure
        test_dir = os.path.join(FOOD101_DIR, 'test')
        if os.path.exists(test_dir):
            return [d for d in os.listdir(test_dir) if os.path.isdir(os.path.join(test_dir, d))]
        else:
            raise FileNotFoundError(f"Could not find classes file or test directory at {test_dir}")

In [4]:
# Edamam Food Database API Configuration with your credentials
NUTRITION_API_URL = "https://api.edamam.com/api/food-database/v2/parser"
APP_ID = "39d5405a"
APP_KEY = "720dfe11e2b750f96018f10e149142b9"

In [5]:
def format_to_two_decimals(value):
    """Format a numeric value to two decimal places"""
    if value is None:
        return None
    return round(float(value), 2)

In [6]:
def get_nutrition_data(food_item):
    """Query the Edamam API for nutrition data"""
    cleaned_food_name = clean_food_class_name(food_item)
    
    # Prepare API request parameters
    params = {
        'app_id': APP_ID,
        'app_key': APP_KEY,
        'ingr': cleaned_food_name,
    }
    
    try:
        response = requests.get(NUTRITION_API_URL, params=params)
        
        if response.status_code == 200:
            data = response.json()
            if 'hints' in data and len(data['hints']) > 0:
                # Get the first (most relevant) result
                food_info = data['hints'][0]['food']
                
                if 'nutrients' in food_info:
                    nutrients = food_info['nutrients']
                    # Calculate metrics
                    gi = estimate_glycemic_index(
                        nutrients.get('CHOCDF', 0), 
                        nutrients.get('FIBTG', 0), 
                        nutrients.get('SUGAR', 0)
                    )
                    risk_factor = calculate_diabetes_risk_factor(nutrients)
                    
                    # Format all values to two decimal places
                    return {
                        'food_class': food_item,
                        'calories': format_to_two_decimals(nutrients.get('ENERC_KCAL', 0)),
                        'protein_g': format_to_two_decimals(nutrients.get('PROCNT', 0)),
                        'fat_g': format_to_two_decimals(nutrients.get('FAT', 0)),
                        'carbs_g': format_to_two_decimals(nutrients.get('CHOCDF', 0)),
                        'fiber_g': format_to_two_decimals(nutrients.get('FIBTG', 0)),
                        'sugar_g': format_to_two_decimals(nutrients.get('SUGAR', 0)),
                        'sodium_mg': format_to_two_decimals(nutrients.get('NA', 0)),
                        'glycemic_index': format_to_two_decimals(gi),
                        'diabetes_risk_factor': format_to_two_decimals(risk_factor)
                    }
    except Exception as e:
        print(f"Error fetching nutrition data for {food_item}: {e}")
    
    # Return empty data if API call fails or no results
    return {
        'food_class': food_item,
        'calories': None,
        'protein_g': None,
        'fat_g': None,
        'carbs_g': None,
        'fiber_g': None,
        'sugar_g': None,
        'sodium_mg': None,
        'glycemic_index': None,
        'diabetes_risk_factor': None
    }

In [7]:
def estimate_glycemic_index(carbs, fiber, sugar):
    """Estimate glycemic index based on carbs, fiber, and sugar content
    This is a simplified estimation and not medically accurate"""
    if carbs is None or fiber is None or sugar is None:
        return None
    
    # Simple heuristic: Higher sugar and lower fiber leads to higher GI
    if carbs > 0:
        sugar_ratio = sugar / carbs if sugar else 0
        fiber_ratio = fiber / carbs if fiber else 0
        # Rough estimate: Scale of 0-100
        # High sugar raises GI, high fiber lowers it
        gi_estimate = 50 + (sugar_ratio * 30) - (fiber_ratio * 40)
        return max(min(gi_estimate, 100), 0)  # Clamp between 0-100
    return 0

In [8]:
def calculate_diabetes_risk_factor(nutrients):
    """Calculate a simple diabetes risk factor based on nutritional content
    This is for educational purposes and not medically validated"""
    if not nutrients or None in [nutrients.get('CHOCDF'), nutrients.get('FIBTG')]:
        return None
    
    carbs = nutrients.get('CHOCDF', 0)
    fiber = nutrients.get('FIBTG', 0)
    sugar = nutrients.get('SUGAR', 0) if 'SUGAR' in nutrients else carbs * 0.3  # Rough estimate if sugar not available
    fat = nutrients.get('FAT', 0)
    
    # Simple formula: Higher for high sugar/carbs, lower for high fiber
    # Scale: 0-10, where 10 is highest risk
    if carbs == 0:
        return 0
    
    net_carbs = max(0, carbs - fiber)
    sugar_impact = sugar / max(1, carbs) * 5  # Impact up to 5 points
    fiber_benefit = min(3, (fiber / max(1, carbs)) * 5)  # Benefit up to 3 points
    fat_impact = min(2, (fat / 100) * 3)  # Fat can increase insulin resistance
    
    risk_factor = (net_carbs / 100 * 10) + sugar_impact + fat_impact - fiber_benefit
    return max(min(risk_factor, 10), 0)  # Clamp between 0-10

In [9]:
def clean_food_class_name(name):
    """Clean food class names to optimize search results for Edamam Food Database API."""
    # Replace underscores with spaces, convert to lowercase, and strip whitespace
    name = name.replace('_', ' ').lower().strip()
    
    # Comprehensive replacements for all Food101 classes
    replacements = {
        'apple pie': 'apple pie dessert',
        'baby back ribs': 'pork ribs',
        'baklava': 'phyllo pastry dessert',
        'beef carpaccio': 'raw beef slices',
        'beef tartare': 'raw beef',
        'beet salad': 'beetroot salad',
        'beignets': 'fried dough pastry',
        'bibimbap': 'korean mixed rice',
        'bread pudding': 'bread custard dessert',
        'breakfast burrito': 'egg burrito',
        'bruschetta': 'tomato bread appetizer',
        'caesar salad': 'caesar salad romaine',
        'cannoli': 'italian pastry cream',
        'caprese salad': 'tomato mozzarella salad',
        'carrot cake': 'carrot spice cake',
        'ceviche': 'raw fish citrus',
        'cheesecake': 'cream cheese dessert',
        'cheese plate': 'assorted cheese',
        'chicken curry': 'indian chicken curry',
        'chicken quesadilla': 'cheese chicken tortilla',
        'chicken wings': 'buffalo wings',
        'chocolate cake': 'chocolate layer cake',
        'chocolate mousse': 'chocolate cream dessert',
        'churros': 'fried dough sticks',
        'clam chowder': 'creamy clam soup',
        'club sandwich': 'turkey bacon sandwich',
        'crab cakes': 'crab meat patties',
        'creme brulee': 'custard dessert',
        'croque madame': 'ham cheese sandwich egg',
        'cup cakes': 'cupcake dessert',
        'deviled eggs': 'stuffed eggs',
        'donuts': 'doughnut pastry',
        'dumplings': 'steamed dumplings',
        'edamame': 'soybeans steamed',
        'eggs benedict': 'poached egg hollandaise',
        'escargots': 'cooked snails',
        'falafel': 'chickpea fritters',
        'filet mignon': 'beef steak tenderloin',
        'fish and chips': 'fried fish potatoes',
        'foie gras': 'duck liver pate',
        'french fries': 'potato fries',
        'french onion soup': 'caramelized onion soup',
        'french toast': 'egg bread toast',
        'fried calamari': 'fried squid rings',
        'fried rice': 'stir-fried rice',
        'frozen yogurt': 'frozen yogurt dessert',
        'garlic bread': 'garlic butter bread',
        'gnocchi': 'potato dumplings',
        'greek salad': 'feta cucumber salad',
        'grilled cheese sandwich': 'cheese sandwich grilled',
        'grilled salmon': 'salmon fillet grilled',
        'guacamole': 'avocado dip',
        'gyoza': 'japanese dumplings',
        'hamburger': 'beef burger',
        'hot and sour soup': 'spicy chinese soup',
        'hot dog': 'sausage bun',
        'huevos rancheros': 'mexican fried eggs',
        'hummus': 'chickpea dip',
        'ice cream': 'ice cream dessert',
        'lasagna': 'layered pasta bolognese',
        'lobster bisque': 'creamy lobster soup',
        'lobster roll sandwich': 'lobster roll',
        'macaroni and cheese': 'macaroni cheese',
        'macarons': 'french macaron cookies',
        'miso soup': 'miso broth soup',
        'mussels': 'steamed mussels',
        'nachos': 'tortilla chips cheese',
        'omelette': 'egg omelette',
        'onion rings': 'fried onion rings',
        'oysters': 'raw oysters',
        'pad thai': 'thai noodle stir-fry',
        'paella': 'spanish rice seafood',
        'pancakes': 'fluffy pancakes',
        'panna cotta': 'cream gelatin dessert',
        'peking duck': 'roast duck chinese',
        'pho': 'vietnamese noodle soup',
        'pizza': 'pizza margherita',
        'pork chop': 'grilled pork chop',
        'poutine': 'fries cheese curds gravy',
        'prime rib': 'roast beef rib',
        'pulled pork sandwich': 'pulled pork',
        'ramen': 'japanese noodle soup',
        'ravioli': 'stuffed pasta',
        'red velvet cake': 'red velvet cake',
        'risotto': 'creamy rice dish',
        'samosa': 'spiced pastry triangle',
        'sashimi': 'raw fish slices',
        'scallops': 'seared scallops',
        'seaweed salad': 'marinated seaweed',
        'shrimp and grits': 'shrimp grits',
        'spaghetti bolognese': 'bolognese sauce pasta',
        'spaghetti carbonara': 'carbonara sauce pasta',
        'spring rolls': 'fried spring rolls',
        'steak': 'grilled beef steak',
        'strawberry shortcake': 'strawberry cream cake',
        'sushi': 'sushi rolls',
        'tacos': 'mexican tacos',
        'takoyaki': 'octopus balls',
        'tiramisu': 'coffee cream dessert',
        'tuna tartare': 'raw tuna',
        'waffles': 'crispy waffles'
    }
    
    # Return the replacement if it exists, otherwise return the cleaned name
    return replacements.get(name, name)

In [10]:
def main():
    # Create directories if they don't exist
    os.makedirs(os.path.dirname(FOOD101_NUTRITION_CSV), exist_ok=True)
    
    # Get food categories
    try:
        categories = get_food_categories()
        print(f"Found {len(categories)} food categories")
    except Exception as e:
        print(f"Error getting food categories: {e}")
        return
    
    # Initialize results list
    nutrition_data = []
    
    # Process each food category
    for category in tqdm(categories, desc="Processing food categories"):
        data = get_nutrition_data(category)
        if data:
            nutrition_data.append(data)
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(nutrition_data)
    df.to_csv(FOOD101_NUTRITION_CSV, index=False)
    print(f"Nutritional data saved to {FOOD101_NUTRITION_CSV}")

if __name__ == "__main__":
    main()

Found 101 food categories


Processing food categories: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 101/101 [00:37<00:00,  2.67it/s]

Nutritional data saved to ../data/food101_diabetes_assessment.csv





In [12]:
import numpy as np

In [14]:
# Load dataset
data = pd.read_csv('../data/food101_diabetes_assessment.csv')
data.head()

Unnamed: 0,food_class,calories,protein_g,fat_g,carbs_g,fiber_g,sugar_g,sodium_mg,glycemic_index,diabetes_risk_factor
0,apple_pie,265.0,2.4,12.5,37.1,0.0,0.0,0.0,50.0,
1,baby_back_ribs,277.0,15.5,23.4,0.0,0.0,0.0,0.0,0.0,0.0
2,baklava,299.0,7.1,6.0,52.6,1.9,0.0,0.0,48.56,6.57
3,beef_carpaccio,130.0,21.6,4.81,0.12,0.0,0.0,0.0,50.0,0.34
4,beef_tartare,130.0,21.6,4.81,0.12,0.0,0.0,0.0,50.0,0.34


In [15]:
data['food_class'].unique()

array(['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio',
       'beef_tartare', 'beet_salad', 'beignets', 'bibimbap',
       'bread_pudding', 'breakfast_burrito', 'bruschetta', 'caesar_salad',
       'cannoli', 'caprese_salad', 'carrot_cake', 'ceviche', 'cheesecake',
       'cheese_plate', 'chicken_curry', 'chicken_quesadilla',
       'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros',
       'clam_chowder', 'club_sandwich', 'crab_cakes', 'creme_brulee',
       'croque_madame', 'cup_cakes', 'deviled_eggs', 'donuts',
       'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel',
       'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries',
       'french_onion_soup', 'french_toast', 'fried_calamari',
       'fried_rice', 'frozen_yogurt', 'garlic_bread', 'gnocchi',
       'greek_salad', 'grilled_cheese_sandwich', 'grilled_salmon',
       'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup', 'hot_dog',
       'huevos_rancheros', 'hummus', 'ice_cr

In [None]:
def calculate_diabetes_risk_factor(nutrients, total_calories=2000):
    """Calculate a diabetes risk factor based on nutritional content.
    For educational purposes only, not medically validated.
    Args:
        nutrients (dict): Nutritional data with keys 'CHOCDF', 'FIBTG', 'SUGAR', 'FAT' (in grams).
        total_calories (float): Total daily calories for normalization (default: 2000).
    Returns:
        dict: Risk score (0-10) and breakdown of contributions, or None if invalid input.
    """
    if not nutrients or None in [nutrients.get('CHOCDF'), nutrients.get('FIBTG')]:
        return None
    
    carbs = nutrients.get('CHOCDF', 0)
    fiber = nutrients.get('FIBTG', 0)
    sugar = nutrients.get('SUGAR', carbs * 0.2 if carbs > 0 else 0)
    fat = nutrients.get('FAT', 0)
    
    if carbs == 0 and fat == 0:
        return {"score": 0, "breakdown": {"net_carbs": 0, "sugar": 0, "fiber": 0, "fat": 0}}
    
    # Normalize by calories
    carbs_per_1000kcal = (carbs / max(1, total_calories)) * 1000
    fiber_per_1000kcal = (fiber / max(1, total_calories)) * 1000
    sugar_per_1000kcal = (sugar / max(1, total_calories)) * 1000
    fat_per_1000kcal = (fat / max(1, total_calories)) * 1000
    
    # Calculate contributions
    net_carbs = max(0, carbs_per_1000kcal - fiber_per_1000kcal)
    net_carbs_impact = min(6, (net_carbs / 100) * 8)
    sugar_impact = min(3, (sugar_per_1000kcal / max(1, carbs_per_1000kcal)) * 4)
    fiber_benefit = min(2, (fiber_per_1000kcal / 10) * 2)
    fat_impact = min(1, (fat_per_1000kcal / 50) * 2)
    
    risk_factor = net_carbs_impact + sugar_impact - fiber_benefit + fat_impact
    score = max(min(risk_factor, 10), 0)
    
    return {
        "score": round(score, 2),
        "breakdown": {
            "net_carbs": round(net_carbs_impact, 2),
            "sugar": round(sugar_impact, 2),
            "fiber": -round(fiber_benefit, 2),
            "fat": round(fat_impact, 2)
        }
    }