# RAG on Nutrition dataset

In [3]:
# LangChain imports for RAG
from langchain_community.document_loaders import UnstructuredExcelLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# Hybrid search imports
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

# Ollama LLM
from langchain_ollama import OllamaLLM

# Additional utilities
import pandas as pd
import numpy as np
import re

# Path to dataset
DATA_PATH = "../data/raw/nutrition.xlsx"

## 1. Load Dataset

For RAG with tabular data (Excel), we have two approaches:
1. **UnstructuredExcelLoader** - treats Excel as a document (good for text-heavy sheets)
2. **Pandas + Document conversion** - gives more control over how rows become documents

Since nutrition data is typically tabular, we'll use the pandas approach to create meaningful documents from each row.

In [4]:
# Load the Excel dataset
df = pd.read_excel(DATA_PATH)

# Display basic info
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()

Dataset shape: (8789, 77)
Columns: ['Unnamed: 0', 'name', 'serving_size', 'calories', 'total_fat', 'saturated_fat', 'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin', 'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a', 'vitamin_a_rae', 'carotene_alpha', 'carotene_beta', 'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12', 'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha', 'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese', 'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine', 'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine', 'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine', 'methionine', 'phenylalanine', 'proline', 'serine', 'threonine', 'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars', 'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose', 'fat', 'saturated_fatty_acids', 'monounsaturated_fatty_acids', 'polyunsaturated_fatt

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


## 2. Convert Tabular Data to Documents

**Key improvements:**
- Each food item becomes ONE document (no splitting!)
- Natural language format grouped by nutrient category
- Numeric metadata for potential filtering
- Semantically rich text for better embedding matching

In [None]:
from langchain.schema import Document

def extract_numeric(val):
    """Extract numeric value from strings like '9.17 g' or '381'."""
    if pd.isna(val):
        return 0.0
    try:
        # Remove units and extract number
        cleaned = re.sub(r'[a-zA-Z\s]', '', str(val))
        return float(cleaned) if cleaned else 0.0
    except:
        return 0.0

def get_nutrient_tags(row):
    """Generate semantic tags based on nutrient values for better retrieval."""
    tags = []
    
    # Calorie-based tags
    calories = extract_numeric(row.get('calories'))
    if calories < 50:
        tags.append("very low calorie")
    elif calories < 100:
        tags.append("low calorie")
    elif calories > 400:
        tags.append("high calorie")
    
    # Protein tags (per 100g)
    protein = extract_numeric(row.get('protein'))
    if protein >= 20:
        tags.append("high protein")
        tags.append("excellent protein source")
    elif protein >= 10:
        tags.append("good protein source")
    elif protein < 2:
        tags.append("low protein")
    
    # Fiber tags
    fiber = extract_numeric(row.get('fiber'))
    if fiber >= 7:
        tags.append("high fiber")
        tags.append("excellent fiber source")
    elif fiber >= 3:
        tags.append("good fiber source")
    
    # Fat tags
    fat = extract_numeric(row.get('total_fat'))
    sat_fat = extract_numeric(row.get('saturated_fat'))
    if fat < 3:
        tags.append("low fat")
    elif fat > 30:
        tags.append("high fat")
    if sat_fat < 1:
        tags.append("low saturated fat")
    
    # Sugar tags
    sugar = extract_numeric(row.get('sugars'))
    if sugar < 1:
        tags.append("no sugar")
        tags.append("sugar free")
    elif sugar < 5:
        tags.append("low sugar")
    elif sugar > 20:
        tags.append("high sugar")
    
    # Sodium tags
    sodium = extract_numeric(row.get('sodium'))
    if sodium < 50:
        tags.append("low sodium")
        tags.append("heart healthy")
    elif sodium > 500:
        tags.append("high sodium")
    
    # Vitamin C tags
    vit_c = extract_numeric(row.get('vitamin_c'))
    if vit_c >= 60:
        tags.append("high vitamin C")
        tags.append("excellent vitamin C source")
        tags.append("immune support")
        tags.append("antioxidant rich")
    elif vit_c >= 20:
        tags.append("good vitamin C source")
    
    # Vitamin A tags
    vit_a = extract_numeric(row.get('vitamin_a'))
    if vit_a >= 5000:
        tags.append("high vitamin A")
        tags.append("excellent vitamin A source")
        tags.append("good for eye health")
        tags.append("good for vision")
    elif vit_a >= 1000:
        tags.append("good vitamin A source")
    
    # Vitamin D tags
    vit_d = extract_numeric(row.get('vitamin_d'))
    if vit_d >= 100:
        tags.append("high vitamin D")
        tags.append("excellent vitamin D source")
        tags.append("good for bone health")
    elif vit_d >= 40:
        tags.append("good vitamin D source")
    
    # Vitamin E tags
    vit_e = extract_numeric(row.get('vitamin_e'))
    if vit_e >= 7:
        tags.append("high vitamin E")
        tags.append("antioxidant rich")
        tags.append("good for skin health")
    
    # Vitamin K tags
    vit_k = extract_numeric(row.get('vitamin_k'))
    if vit_k >= 80:
        tags.append("high vitamin K")
        tags.append("good for blood clotting")
        tags.append("bone health support")
    
    # B vitamins tags
    vit_b12 = extract_numeric(row.get('vitamin_b12'))
    if vit_b12 >= 2:
        tags.append("high vitamin B12")
        tags.append("good for energy")
        tags.append("nervous system support")
    
    vit_b6 = extract_numeric(row.get('vitamin_b6'))
    if vit_b6 >= 0.5:
        tags.append("good vitamin B6 source")
    
    folate = extract_numeric(row.get('folate'))
    if folate >= 100:
        tags.append("high folate")
        tags.append("good for pregnancy")
        tags.append("cell health support")
    
    # Calcium tags
    calcium = extract_numeric(row.get('calcium'))
    if calcium >= 200:
        tags.append("high calcium")
        tags.append("excellent calcium source")
        tags.append("good for bone health")
        tags.append("good for teeth")
    elif calcium >= 100:
        tags.append("good calcium source")
    
    # Iron tags
    iron = extract_numeric(row.get('irom'))
    if iron >= 5:
        tags.append("high iron")
        tags.append("excellent iron source")
        tags.append("good for anemia")
        tags.append("blood health support")
    elif iron >= 2:
        tags.append("good iron source")
    
    # Potassium tags
    potassium = extract_numeric(row.get('potassium'))
    if potassium >= 400:
        tags.append("high potassium")
        tags.append("excellent potassium source")
        tags.append("heart health support")
        tags.append("blood pressure support")
    elif potassium >= 200:
        tags.append("good potassium source")
    
    # Magnesium tags
    magnesium = extract_numeric(row.get('magnesium'))
    if magnesium >= 80:
        tags.append("high magnesium")
        tags.append("muscle health support")
        tags.append("good for relaxation")
    elif magnesium >= 30:
        tags.append("good magnesium source")
    
    # Zinc tags
    zinc = extract_numeric(row.get('zink'))
    if zinc >= 5:
        tags.append("high zinc")
        tags.append("immune support")
        tags.append("good for wound healing")
    
    # Selenium tags
    selenium = extract_numeric(row.get('selenium'))
    if selenium >= 30:
        tags.append("high selenium")
        tags.append("antioxidant support")
        tags.append("thyroid health support")
    
    # Antioxidant tags (carotenoids)
    beta_carotene = extract_numeric(row.get('carotene_beta'))
    lycopene = extract_numeric(row.get('lucopene'))
    lutein = extract_numeric(row.get('lutein_zeaxanthin'))
    
    if beta_carotene >= 1000:
        tags.append("high beta carotene")
        tags.append("antioxidant rich")
    if lycopene >= 1000:
        tags.append("high lycopene")
        tags.append("antioxidant rich")
        tags.append("prostate health support")
    if lutein >= 1000:
        tags.append("high lutein")
        tags.append("eye health support")
    
    # Cholesterol tags
    cholesterol = extract_numeric(row.get('cholesterol'))
    if cholesterol == 0:
        tags.append("cholesterol free")
    elif cholesterol < 20:
        tags.append("low cholesterol")
    elif cholesterol > 100:
        tags.append("high cholesterol")
    
    # Caffeine tags
    caffeine = extract_numeric(row.get('caffeine'))
    if caffeine > 50:
        tags.append("contains caffeine")
        tags.append("energy boost")
    elif caffeine == 0:
        tags.append("caffeine free")
    
    # Omega fatty acids (approximation from PUFA)
    pufa = extract_numeric(row.get('polyunsaturated_fatty_acids'))
    if pufa >= 5:
        tags.append("omega fatty acids")
        tags.append("healthy fats")
        tags.append("heart healthy fats")
    
    return tags

def get_health_benefits(tags):
    """Generate health benefit summary from tags."""
    benefits = []
    
    if any(t in tags for t in ["good for bone health", "high calcium", "high vitamin D", "high vitamin K"]):
        benefits.append("bone health")
    if any(t in tags for t in ["immune support", "high vitamin C", "high zinc"]):
        benefits.append("immune system support")
    if any(t in tags for t in ["heart healthy", "heart health support", "low sodium", "high potassium"]):
        benefits.append("heart health")
    if any(t in tags for t in ["antioxidant rich", "high vitamin E", "high vitamin C"]):
        benefits.append("antioxidant protection")
    if any(t in tags for t in ["good for eye health", "eye health support", "high lutein"]):
        benefits.append("eye health")
    if any(t in tags for t in ["good for energy", "high vitamin B12"]):
        benefits.append("energy metabolism")
    if any(t in tags for t in ["high protein", "muscle health support"]):
        benefits.append("muscle building and repair")
    if any(t in tags for t in ["high fiber", "good fiber source"]):
        benefits.append("digestive health")
    if any(t in tags for t in ["blood health support", "high iron"]):
        benefits.append("blood health")
    
    return benefits

def create_text_new(row):
    """Create enhanced natural language document text for better RAG retrieval.
    
    Includes:
    - Natural language summary with semantic descriptors
    - Health benefit associations
    - Nutrient tags for semantic matching
    - All original nutrient values organized by category
    """
    name = row.get('name', 'Unknown food')
    serving = row.get('serving_size', '100 g')
    
    # Get semantic tags and health benefits
    tags = get_nutrient_tags(row)
    benefits = get_health_benefits(tags)
    
    # Create natural language summary
    summary_parts = [f"{name} is a food item"]
    
    # Add key characteristics
    calories = extract_numeric(row.get('calories'))
    protein = extract_numeric(row.get('protein'))
    fiber = extract_numeric(row.get('fiber'))
    
    if "high protein" in tags:
        summary_parts.append(f"that is high in protein ({protein}g per {serving})")
    if "high fiber" in tags:
        summary_parts.append(f"rich in dietary fiber ({fiber}g)")
    if "low calorie" in tags or "very low calorie" in tags:
        summary_parts.append(f"with only {calories} calories per serving")
    
    summary = " ".join(summary_parts) + "."
    
    # Build the enhanced text
    text_new = f"""Food: {name}
Serving size: {serving}

SUMMARY:
{summary}

NUTRITIONAL TAGS: {', '.join(tags) if tags else 'None'}

HEALTH BENEFITS: {', '.join(benefits) if benefits else 'General nutrition'}

MACRONUTRIENTS:
- Calories: {row.get('calories', 'N/A')} kcal
- Protein: {row.get('protein', 'N/A')}
- Carbohydrates: {row.get('carbohydrate', 'N/A')}
- Total Fat: {row.get('total_fat', 'N/A')}
- Saturated Fat: {row.get('saturated_fat', 'N/A')}
- Fiber: {row.get('fiber', 'N/A')}
- Sugars: {row.get('sugars', 'N/A')}
- Net Carbs (approx): {max(0, extract_numeric(row.get('carbohydrate')) - extract_numeric(row.get('fiber'))):.1f}g

VITAMINS:
- Vitamin A: {row.get('vitamin_a', 'N/A')} (RAE: {row.get('vitamin_a_rae', 'N/A')})
- Vitamin C: {row.get('vitamin_c', 'N/A')}
- Vitamin D: {row.get('vitamin_d', 'N/A')}
- Vitamin E: {row.get('vitamin_e', 'N/A')}
- Vitamin K: {row.get('vitamin_k', 'N/A')}
- Vitamin B6: {row.get('vitamin_b6', 'N/A')}
- Vitamin B12: {row.get('vitamin_b12', 'N/A')}
- Thiamin (B1): {row.get('thiamin', 'N/A')}
- Riboflavin (B2): {row.get('riboflavin', 'N/A')}
- Niacin (B3): {row.get('niacin', 'N/A')}
- Folate: {row.get('folate', 'N/A')}
- Pantothenic Acid (B5): {row.get('pantothenic_acid', 'N/A')}
- Choline: {row.get('choline', 'N/A')}

MINERALS:
- Calcium: {row.get('calcium', 'N/A')}
- Iron: {row.get('irom', 'N/A')}
- Magnesium: {row.get('magnesium', 'N/A')}
- Phosphorus: {row.get('phosphorous', 'N/A')}
- Potassium: {row.get('potassium', 'N/A')}
- Sodium: {row.get('sodium', 'N/A')}
- Zinc: {row.get('zink', 'N/A')}
- Copper: {row.get('copper', 'N/A')}
- Manganese: {row.get('manganese', 'N/A')}
- Selenium: {row.get('selenium', 'N/A')}

ANTIOXIDANTS & CAROTENOIDS:
- Beta-Carotene: {row.get('carotene_beta', 'N/A')}
- Alpha-Carotene: {row.get('carotene_alpha', 'N/A')}
- Lycopene: {row.get('lucopene', 'N/A')}
- Lutein + Zeaxanthin: {row.get('lutein_zeaxanthin', 'N/A')}
- Cryptoxanthin: {row.get('cryptoxanthin_beta', 'N/A')}

AMINO ACIDS (Protein Quality):
- Essential: Leucine ({row.get('leucine', 'N/A')}), Isoleucine ({row.get('isoleucine', 'N/A')}), Valine ({row.get('valine', 'N/A')}), Lysine ({row.get('lysine', 'N/A')}), Methionine ({row.get('methionine', 'N/A')}), Threonine ({row.get('threonine', 'N/A')}), Tryptophan ({row.get('tryptophan', 'N/A')}), Phenylalanine ({row.get('phenylalanine', 'N/A')}), Histidine ({row.get('histidine', 'N/A')})
- BCAAs (Branched-Chain): Leucine + Isoleucine + Valine = {extract_numeric(row.get('leucine')) + extract_numeric(row.get('isoleucine')) + extract_numeric(row.get('valine')):.2f}g

FATS BREAKDOWN:
- Saturated Fatty Acids: {row.get('saturated_fatty_acids', 'N/A')}
- Monounsaturated Fatty Acids: {row.get('monounsaturated_fatty_acids', 'N/A')} (heart-healthy)
- Polyunsaturated Fatty Acids: {row.get('polyunsaturated_fatty_acids', 'N/A')} (includes omega-3 and omega-6)
- Trans Fat: {row.get('fatty_acids_total_trans', 'N/A')}
- Cholesterol: {row.get('cholesterol', 'N/A')}

OTHER:
- Water: {row.get('water', 'N/A')}
- Alcohol: {row.get('alcohol', 'N/A')}
- Caffeine: {row.get('caffeine', 'N/A')}
- Theobromine: {row.get('theobromine', 'N/A')}
"""
    
    return text_new

def row_to_document(row):
    """Convert a DataFrame row to a semantically rich LangChain Document.
    
    Creates natural language description grouped by nutrient categories
    for better semantic matching during retrieval.
    """
    name = row.get('name', 'Unknown food')
    serving = row.get('serving_size', '100 g')
    
    # Create natural language description with grouped nutrients
    text = f"""Food: {name}
Serving size: {serving}

MACRONUTRIENTS:
- Calories: {row.get('calories', 'N/A')}
- Protein: {row.get('protein', 'N/A')}
- Carbohydrates: {row.get('carbohydrate', 'N/A')}
- Total Fat: {row.get('total_fat', 'N/A')}
- Saturated Fat: {row.get('saturated_fat', 'N/A')}
- Fiber: {row.get('fiber', 'N/A')}
- Sugars: {row.get('sugars', 'N/A')}

VITAMINS:
- Vitamin A: {row.get('vitamin_a', 'N/A')} (RAE: {row.get('vitamin_a_rae', 'N/A')})
- Vitamin C: {row.get('vitamin_c', 'N/A')}
- Vitamin D: {row.get('vitamin_d', 'N/A')}
- Vitamin E: {row.get('vitamin_e', 'N/A')}
- Vitamin K: {row.get('vitamin_k', 'N/A')}
- Vitamin B6: {row.get('vitamin_b6', 'N/A')}
- Vitamin B12: {row.get('vitamin_b12', 'N/A')}
- Thiamin (B1): {row.get('thiamin', 'N/A')}
- Riboflavin (B2): {row.get('riboflavin', 'N/A')}
- Niacin (B3): {row.get('niacin', 'N/A')}
- Folate: {row.get('folate', 'N/A')}
- Pantothenic Acid (B5): {row.get('pantothenic_acid', 'N/A')}
- Choline: {row.get('choline', 'N/A')}

MINERALS:
- Calcium: {row.get('calcium', 'N/A')}
- Iron: {row.get('irom', 'N/A')}
- Magnesium: {row.get('magnesium', 'N/A')}
- Phosphorus: {row.get('phosphorous', 'N/A')}
- Potassium: {row.get('potassium', 'N/A')}
- Sodium: {row.get('sodium', 'N/A')}
- Zinc: {row.get('zink', 'N/A')}
- Copper: {row.get('copper', 'N/A')}
- Manganese: {row.get('manganese', 'N/A')}
- Selenium: {row.get('selenium', 'N/A')}

FATS BREAKDOWN:
- Saturated Fatty Acids: {row.get('saturated_fatty_acids', 'N/A')}
- Monounsaturated Fatty Acids: {row.get('monounsaturated_fatty_acids', 'N/A')}
- Polyunsaturated Fatty Acids: {row.get('polyunsaturated_fatty_acids', 'N/A')}
- Trans Fat: {row.get('fatty_acids_total_trans', 'N/A')}
- Cholesterol: {row.get('cholesterol', 'N/A')}

OTHER:
- Water: {row.get('water', 'N/A')}
- Alcohol: {row.get('alcohol', 'N/A')}
- Caffeine: {row.get('caffeine', 'N/A')}
"""
    
    # Store numeric metadata for filtering capabilities
    # Includes all key nutrients for smart retrieval sorting
    metadata = {
        "name": str(name),
        # Macronutrients
        "calories": extract_numeric(row.get('calories')),
        "protein_g": extract_numeric(row.get('protein')),
        "carbs_g": extract_numeric(row.get('carbohydrate')),
        "fat_g": extract_numeric(row.get('total_fat')),
        "fiber_g": extract_numeric(row.get('fiber')),
        "sugar_g": extract_numeric(row.get('sugars')),
        # Vitamins
        "vitamin_c_mg": extract_numeric(row.get('vitamin_c')),
        "vitamin_a_iu": extract_numeric(row.get('vitamin_a')),
        "vitamin_d_iu": extract_numeric(row.get('vitamin_d')),
        "vitamin_e_mg": extract_numeric(row.get('vitamin_e')),
        "vitamin_k_mcg": extract_numeric(row.get('vitamin_k')),
        "vitamin_b6_mg": extract_numeric(row.get('vitamin_b6')),
        "vitamin_b12_mcg": extract_numeric(row.get('vitamin_b12')),
        "folate_mcg": extract_numeric(row.get('folate')),
        # Minerals
        "calcium_mg": extract_numeric(row.get('calcium')),
        "iron_mg": extract_numeric(row.get('irom')),  # Note: typo in original data
        "magnesium_mg": extract_numeric(row.get('magnesium')),
        "potassium_mg": extract_numeric(row.get('potassium')),
        "sodium_mg": extract_numeric(row.get('sodium')),
        "zinc_mg": extract_numeric(row.get('zink')),  # Note: typo in original data
    }
    
    return Document(page_content=text, metadata=metadata)


def row_to_document_enhanced(row):
    """Convert a DataFrame row to an ENHANCED LangChain Document.
    
    Uses text_new with semantic tags, health benefits, and more nutrients.
    """
    name = row.get('name', 'Unknown food')
    
    # Use the new enhanced text
    text_new = create_text_new(row)
    
    # Get tags for metadata
    tags = get_nutrient_tags(row)
    benefits = get_health_benefits(tags)
    
    # Enhanced metadata with more fields
    metadata = {
        "name": str(name),
        # Macronutrients
        "calories": extract_numeric(row.get('calories')),
        "protein_g": extract_numeric(row.get('protein')),
        "carbs_g": extract_numeric(row.get('carbohydrate')),
        "fat_g": extract_numeric(row.get('total_fat')),
        "fiber_g": extract_numeric(row.get('fiber')),
        "sugar_g": extract_numeric(row.get('sugars')),
        "net_carbs_g": max(0, extract_numeric(row.get('carbohydrate')) - extract_numeric(row.get('fiber'))),
        # Vitamins
        "vitamin_c_mg": extract_numeric(row.get('vitamin_c')),
        "vitamin_a_iu": extract_numeric(row.get('vitamin_a')),
        "vitamin_d_iu": extract_numeric(row.get('vitamin_d')),
        "vitamin_e_mg": extract_numeric(row.get('vitamin_e')),
        "vitamin_k_mcg": extract_numeric(row.get('vitamin_k')),
        "vitamin_b6_mg": extract_numeric(row.get('vitamin_b6')),
        "vitamin_b12_mcg": extract_numeric(row.get('vitamin_b12')),
        "folate_mcg": extract_numeric(row.get('folate')),
        # Minerals
        "calcium_mg": extract_numeric(row.get('calcium')),
        "iron_mg": extract_numeric(row.get('irom')),
        "magnesium_mg": extract_numeric(row.get('magnesium')),
        "potassium_mg": extract_numeric(row.get('potassium')),
        "sodium_mg": extract_numeric(row.get('sodium')),
        "zinc_mg": extract_numeric(row.get('zink')),
        "selenium_mcg": extract_numeric(row.get('selenium')),
        # Antioxidants
        "beta_carotene_mcg": extract_numeric(row.get('carotene_beta')),
        "lycopene_mcg": extract_numeric(row.get('lucopene')),
        "lutein_mcg": extract_numeric(row.get('lutein_zeaxanthin')),
        # Fats detail
        "saturated_fat_g": extract_numeric(row.get('saturated_fatty_acids')),
        "monounsaturated_fat_g": extract_numeric(row.get('monounsaturated_fatty_acids')),
        "polyunsaturated_fat_g": extract_numeric(row.get('polyunsaturated_fatty_acids')),
        "trans_fat_g": extract_numeric(row.get('fatty_acids_total_trans')),
        "cholesterol_mg": extract_numeric(row.get('cholesterol')),
        # Amino acids (BCAAs)
        "bcaa_g": extract_numeric(row.get('leucine')) + extract_numeric(row.get('isoleucine')) + extract_numeric(row.get('valine')),
        # Tags for filtering
        "tags": ", ".join(tags),
        "health_benefits": ", ".join(benefits),
    }
    
    return Document(page_content=text_new, metadata=metadata)


# Convert all rows to documents (original)
# documents = [row_to_document(row) for _, row in df.iterrows()]

# Convert all rows to ENHANCED documents (new)
documents_enhanced = [row_to_document_enhanced(row) for _, row in df.iterrows()]

# print(f"Created {len(documents)} original documents")
print(f"Created {len(documents_enhanced)} enhanced documents")
print("\n--- Example ENHANCED document (text_new) ---")
print(documents_enhanced[1].page_content)
print("\n--- Example ENHANCED metadata ---")
print(documents_enhanced[1].metadata)

Created 8789 enhanced documents

--- Example ENHANCED document (text_new) ---
Food: Nuts, pecans
Serving size: 100 g

SUMMARY:
Nuts, pecans is a food item rich in dietary fiber (9.6g).

NUTRITIONAL TAGS: high calorie, high fiber, excellent fiber source, high fat, low sugar, low sodium, heart healthy, good iron source, high potassium, excellent potassium source, heart health support, blood pressure support, high magnesium, muscle health support, good for relaxation, cholesterol free, caffeine free, omega fatty acids, healthy fats, heart healthy fats

HEALTH BENEFITS: heart health, muscle building and repair, digestive health

MACRONUTRIENTS:
- Calories: 691 kcal
- Protein: 9.17 g
- Carbohydrates: 13.86 g
- Total Fat: 72g
- Saturated Fat: 6.2g
- Fiber: 9.6 g
- Sugars: 3.97 g
- Net Carbs (approx): 4.3g

VITAMINS:
- Vitamin A: 56.00 IU (RAE: 3.00 mcg)
- Vitamin C: 1.1 mg
- Vitamin D: 0.00 IU
- Vitamin E: 1.40 mg
- Vitamin K: 3.5 mcg
- Vitamin B6: 0.210 mg
- Vitamin B12: 0.00 mcg
- Thiamin 

## 3. Document Preparation (No Chunking!)

**Why we DON'T split nutrition documents:**
- Each food item should remain as ONE complete document
- Splitting would separate food names from their nutritional values
- Our structured format (~1200 chars) fits well within embedding model limits
- Keeping documents intact ensures accurate retrieval

**Note:** For text-heavy documents (articles, PDFs), chunking is essential. 
For structured tabular data like this, keep records together.

In [None]:
# For nutrition data, we skip chunking to keep each food as one complete document
# Each document is ~1200 characters which fits well within embedding model limits

# chunks = documents  # No splitting needed!

chunks = documents_enhanced  # No splitting needed!

print(f"Total documents: {len(chunks)}")
print(f"Average document length: {np.mean([len(doc.page_content) for doc in chunks]):.0f} characters")
print(f"Max document length: {max([len(doc.page_content) for doc in chunks])} characters")

Total documents: 8789
Average document length: 969 characters
Max document length: 1079 characters


## 4. Create Embeddings and Vector Store

**Embedding Model Choice:**
We use `BAAI/bge-small-en-v1.5` instead of `all-MiniLM-L6-v2` because:
- Specifically optimized for retrieval tasks
- Better performance on semantic similarity benchmarks
- Same size (384 dimensions) but higher quality embeddings

**Alternative models:**
- `BAAI/bge-base-en-v1.5` - larger, even better quality (768 dim)
- `sentence-transformers/all-mpnet-base-v2` - good general purpose

In [7]:
# Initialize HuggingFace embeddings with a retrieval-optimized model
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  # Optimized for retrieval tasks
    model_kwargs={'device': 'cpu'},  # Use 'cuda' if you have GPU
    encode_kwargs={'normalize_embeddings': True}
)

print("Embedding model loaded: BAAI/bge-small-en-v1.5")
print(f"Embedding dimension: {len(embeddings.embed_query('test'))}")

  from .autonotebook import tqdm as notebook_tqdm


Embedding model loaded: BAAI/bge-small-en-v1.5
Embedding dimension: 384


In [6]:
# Create FAISS vector store from chunks
vectorstore = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings,
    distance_strategy=DistanceStrategy.COSINE  # Cosine similarity for normalized embeddings
)

print(f"Vector store created with {vectorstore.index.ntotal} vectors")

Vector store created with 8789 vectors


In [8]:
# Optional: Save vector store for later use
VECTORSTORE_PATH = "../data/processed/nutrition_vectorstore"

# # Save
# vectorstore.save_local(VECTORSTORE_PATH)
# print(f"Vector store saved to {VECTORSTORE_PATH}")

# To load later (uncomment when needed):
vectorstore = FAISS.load_local(VECTORSTORE_PATH, embeddings, allow_dangerous_deserialization=True)

## 5. Hybrid Retrieval (BM25 + Vector Search)

**Why Hybrid Search?**
- **Vector search** is great for semantic similarity ("foods high in protein" → finds protein-rich foods)
- **BM25 (keyword)** is great for exact matches ("pecans" → finds pecans exactly)
- **Combined** gives best of both worlds!

**MMR (Maximal Marginal Relevance):**
- Balances relevance with diversity
- Prevents returning 5 very similar foods
- `lambda_mult`: 0 = max diversity, 1 = max relevance

In [None]:
# Create BM25 retriever for keyword matching (good for food names)
bm25_retriever = BM25Retriever.from_documents(documents_enhanced)
bm25_retriever.k = 5

# Create vector retriever with MMR for diverse results
vector_retriever = vectorstore.as_retriever(
    search_type="mmr",  # Maximal Marginal Relevance
    search_kwargs={
        "k": 5,           # Return 5 documents
        "fetch_k": 20,    # Fetch 20 candidates first
        "lambda_mult": 0.7  # Balance: 0.7 relevance, 0.3 diversity
    }
)

# Combine both retrievers with ensemble
retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.4, 0.6]  # 40% keyword, 60% semantic
)

print("Hybrid retriever created (BM25 + Vector with MMR)")

# Test retrieval
test_query = "What foods are high in protein?"
retrieved_docs = retriever.invoke(test_query)

print(f"\nQuery: {test_query}")
print(f"Retrieved {len(retrieved_docs)} documents:\n")
for i, doc in enumerate(retrieved_docs[:3], 1):
    print(f"--- Document {i}: {doc.metadata.get('name', 'Unknown')} ---")
    print(f"Protein: {doc.metadata.get('protein_g', 'N/A')}g")
    print()

Hybrid retriever created (BM25 + Vector with MMR)

Query: What foods are high in protein?
Retrieved 10 documents:

--- Document 1: Fast foods, vegetables and mayonnaise, large patty; with condiments, hamburger; single ---
Protein: 11.34g

--- Document 2: Formulated bar, oats and chocolate, chewy, high fiber ---
Protein: 5.0g

--- Document 3: Fat, beef tallow ---
Protein: 0.0g



## 5.1 Smart Retriever (Query-Aware)

**Problem with pure semantic search:**
- "Foods highest in vitamin C" matches documents mentioning "vitamin C" in text
- But it can't sort by the actual numeric values!

**Solution: Query-aware retrieval**
- Detect if query asks for "highest/most/best" of a nutrient
- If yes → sort all documents by that nutrient's metadata value
- If no → use hybrid semantic search as usual

This gives accurate answers for ranking queries like "top sources of protein".

In [None]:
from typing import List

# Nutrient keyword mapping for query detection
NUTRIENT_KEYWORDS = {
    "vitamin_c": ["vitamin c", "vit c", "ascorbic acid"],
    "vitamin_a": ["vitamin a", "vit a", "retinol"],
    "vitamin_d": ["vitamin d", "vit d"],
    "vitamin_e": ["vitamin e", "vit e"],
    "vitamin_k": ["vitamin k", "vit k"],
    "vitamin_b6": ["vitamin b6", "vit b6", "pyridoxine"],
    "vitamin_b12": ["vitamin b12", "vit b12", "cobalamin"],
    "folate": ["folate", "folic acid"],
    "protein": ["protein"],
    "fiber": ["fiber", "fibre"],
    "calcium": ["calcium"],
    "iron": ["iron"],
    "potassium": ["potassium"],
    "magnesium": ["magnesium"],
    "zinc": ["zinc"],
    "calories": ["calories", "calorie", "energy", "kcal"],
    "carbs": ["carbohydrate", "carbs", "carb"],
    "fat": ["fat", "fats"],
    "sugar": ["sugar", "sugars"],
}

# Map nutrient names to metadata keys
METADATA_KEYS = {
    "vitamin_c": "vitamin_c_mg",
    "vitamin_a": "vitamin_a_iu",
    "vitamin_d": "vitamin_d_iu",
    "vitamin_e": "vitamin_e_mg",
    "vitamin_k": "vitamin_k_mcg",
    "vitamin_b6": "vitamin_b6_mg",
    "vitamin_b12": "vitamin_b12_mcg",
    "folate": "folate_mcg",
    "protein": "protein_g",
    "fiber": "fiber_g",
    "calcium": "calcium_mg",
    "iron": "iron_mg",
    "potassium": "potassium_mg",
    "magnesium": "magnesium_mg",
    "zinc": "zinc_mg",
    "calories": "calories",
    "carbs": "carbs_g",
    "fat": "fat_g",
    "sugar": "sugar_g",
}

def detect_ranking_query(query: str):
    """Detect if query asks for 'highest/most/best' of a nutrient.
    
    Returns:
        tuple: (nutrient_name, metadata_key) or (None, None)
    """
    query_lower = query.lower()
    
    # Check for ranking intent
    ranking_patterns = [
        "highest", "most", "best", "top", "rich in", "high in", 
        "good source", "great source", "excellent source",
        "richest", "maximum", "greatest"
    ]
    has_ranking_intent = any(p in query_lower for p in ranking_patterns)
    
    if not has_ranking_intent:
        return None, None
    
    # Find which nutrient is being asked about
    for nutrient, keywords in NUTRIENT_KEYWORDS.items():
        if any(kw in query_lower for kw in keywords):
            return nutrient, METADATA_KEYS.get(nutrient)
    
    return None, None

def get_top_by_nutrient(documents: List[Document], metadata_key: str, k: int = 5):
    """Sort documents by nutrient value and return top k."""
    sorted_docs = sorted(
        documents,
        key=lambda d: d.metadata.get(metadata_key, 0),
        reverse=True
    )
    return sorted_docs[:k]


class SmartNutritionRetriever:
    """Hybrid retriever that handles both semantic and ranking queries.
    
    For ranking queries ("highest in X"), sorts by metadata values.
    For general queries, uses ensemble (BM25 + vector) search.
    """
    
    def __init__(self, ensemble_retriever, all_documents: List[Document]):
        self.ensemble_retriever = ensemble_retriever
        self.all_documents = all_documents
    
    def invoke(self, query: str) -> List[Document]:
        """Retrieve documents based on query type."""
        nutrient, metadata_key = detect_ranking_query(query)
        
        if metadata_key:
            # Ranking query: sort all docs by nutrient value
            print(f"[Smart Retriever] Ranking query detected - sorting by {metadata_key}")
            results = get_top_by_nutrient(self.all_documents, metadata_key, k=5)
            # Show top values for debugging
            for doc in results[:3]:
                print(f"  - {doc.metadata['name']}: {doc.metadata.get(metadata_key, 0)}")
            return results
        else:
            # Semantic query: use hybrid search
            print("[Smart Retriever] Using hybrid search (BM25 + Vector)")
            return self.ensemble_retriever.invoke(query)


# Create smart retriever
smart_retriever = SmartNutritionRetriever(retriever, documents_enhanced)

# Test the smart retriever
print("Testing smart retriever:\n")

test_queries = [
    "Which foods have the most vitamin C?",
    "Tell me about pecans",
    "What foods are high in protein?",
]

for q in test_queries:
    print(f"Query: {q}")
    results = smart_retriever.invoke(q)
    print(f"Top result: {results[0].metadata['name']}\n")

## 6. Connect to Ollama LLM

**Prerequisites:**
1. Install Ollama: https://ollama.ai/
2. Pull a model: `ollama pull llama3.2` or `ollama pull mistral`
3. Ollama runs locally on port 11434 by default

**Popular models:**
- `llama3.2` - Meta's latest, good balance
- `mistral` - Fast and capable
- `phi3` - Microsoft's small but powerful model
- `gemma2` - Google's open model

In [10]:
# Initialize Ollama LLM
# Make sure Ollama is running: `ollama serve`
llm = OllamaLLM(
    model="llama3.2",  # Change to your preferred model
    temperature=0.7,    # Creativity level (0-1)
    # base_url="http://localhost:11434"  # Default Ollama URL
)

# Test the LLM connection
test_response = llm.invoke("Hello! Say 'Connection successful!' if you can read this.")
print(test_response)

Connection successful!


## 7. Create RAG Chain with Improved Prompt

**Prompt Engineering Tips:**
- Be specific about how to handle comparisons
- Tell the LLM to always cite serving sizes
- Instruct on handling missing information
- Guide formatting for better readability

In [27]:
from langchain_core.prompts import ChatPromptTemplate

# Improved system prompt for recipe ingredients and nutritional breakdown
system_prompt = """You are a nutrition calculator assistant for recipe ingredients.

YOUR ROLE:
Help users understand the nutritional content of food ingredients for recipe planning and meal preparation.

RULES:
1. Use ONLY the nutritional data from the CONTEXT below. Do not use external knowledge.
2. For each ingredient mentioned, provide a structured nutritional breakdown including:
   - Serving size (always mention this first)
   - Calories
   - Macros: protein, carbohydrates, fat, fiber
   - Key vitamins and minerals (only list notable amounts, not zeros)
   
3. When multiple ingredients are asked about:
   - List each ingredient's nutrition separately
   - If quantities are provided, calculate scaled values (e.g., "200g chicken" = 2x the 100g values)
   
4. Format responses as clear, scannable lists - easy to use for recipe tracking.

5. Highlight nutritional benefits using tags like:
   - "High protein", "Good source of fiber", "Rich in Vitamin C", etc.

6. If an ingredient is not found in the database, say:
   "I don't have nutritional data for [ingredient]. Try a similar item or check the exact name."

7. For health-related questions, remind users that values are estimates and to consult a professional for dietary advice.

RESPONSE FORMAT EXAMPLE:
Carrots, raw (100g serving):
- Calories: 41 kcal
- Protein: 0.9g | Carbs: 10g | Fat: 0.2g | Fiber: 2.8g
- Notable nutrients: Vitamin A (835mcg), Vitamin K (13mcg), Potassium (320mg)
- Benefits: High vitamin A, good for eye health, low calorie

CONTEXT:
{context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

# Create the RAG chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

print("RAG chain created with recipe-focused prompt!")

RAG chain created with recipe-focused prompt!


In [28]:
# Function to ask questions
def ask_nutrition(question: str) -> str:
    """Ask a question about nutrition data."""
    response = rag_chain.invoke({"input": question})
    return response["answer"]

# Test with diverse sample questions
sample_questions = [
    "What foods are high in protein?",
    "Tell me about the nutritional content of pecans",
    "Which foods have the most vitamin C?",
    "Compare eggplant and cornstarch nutritionally",
    "What are good sources of vitamin C?",
    "How much fiber is in lentils?",
    "List the top 5 foods highest in calcium.",
    "What is the vitamin D content in salmon?",
    "Which food has the highest iron content?",
    "What foods are rich in magnesium?",
]

for question in sample_questions:
    print(f"Q: {question}")
    print(f"A: {ask_nutrition(question)}")
    print("-" * 60)
    print()

Q: What foods are high in protein?
A: According to the data, some of the foods that are high in protein are:

1. Babyfood, dry, with apple and orange (25.40g of protein per serving)
2. Cereals, high fiber, Cinnamon Swirl, Instant Oatmeal, QUAKER (8.79g of protein per serving)
3. Babyfood, prepared with whole milk, high protein, cereal (8.70g of protein per serving)

Note that these values are based on a single serving size and may vary depending on the specific product and serving size.

Other foods that are high in protein include:

* Meat and poultry
* Fish and seafood
* Eggs
* Dairy products
* Legumes
* Nuts and seeds

It's worth noting that the data provided only includes babyfood, cereals, and a few other specific foods. If you're looking for more general information on high-protein foods, I'd be happy to provide some suggestions!
------------------------------------------------------------

Q: Tell me about the nutritional content of pecans
A: Pecans are a nutrient-rich food that

## 8. Interactive Chat

Use this cell to ask your own questions about the nutrition data:

In [29]:
# Ask your own question!
your_question = "What are good sources of vitamin C?"

response = rag_chain.invoke({"input": your_question})

print(f"Question: {your_question}\n")
print(f"Answer: {response['answer']}\n")
print("=" * 60)
print("Retrieved foods:")
for i, doc in enumerate(response['context'], 1):
    name = doc.metadata.get('name', 'Unknown')
    calories = doc.metadata.get('calories', 'N/A')
    protein = doc.metadata.get('protein_g', 'N/A')
    print(f"  {i}. {name} ({calories} cal, {protein}g protein)")

Question: What are good sources of vitamin C?

Answer: According to the information provided, good sources of vitamin C include:

* Milk (fortified with vitamin A and D) - contains about 1.9 mg per 100g serving
* Evaporated milk, canned - contains about 1.9 mg per 100g serving
* Lowfat chocolate milk - contains about 0.4 mg per 100g serving

Note that these values are based on the specific types of milk listed and may vary depending on the brand and type of milk you consume.

Retrieved foods:
  1. Beverages, low calorie, with high vitamin C with other added vitamins, powder, fruit-flavored drink (227.0 cal, 0.25g protein)
  2. Beverages, fortified with vitamin C, unsweetened, lemon, decaffeinated, instant, green, tea (378.0 cal, 0.0g protein)
  3. Spices, red or cayenne, pepper (318.0 cal, 12.01g protein)
  4. Snacks, yogurt covered with vitamin C, candy bits (415.0 cal, 0.0g protein)
  5. Beverages, fortified with vitamin C, diet, citrus, ready-to-drink, green, tea (1.0 cal, 0.0g prot

In [31]:
# Ask your own question!
your_question = "What's the nutrition in 200g chicken breast and 150g broccoli?"

response = rag_chain.invoke({"input": your_question})

print(f"Question: {your_question}\n")
print(f"Answer: {response['answer']}\n")
print("=" * 60)
print("Retrieved foods:")
for i, doc in enumerate(response['context'], 1):
    name = doc.metadata.get('name', 'Unknown')
    calories = doc.metadata.get('calories', 'N/A')
    protein = doc.metadata.get('protein_g', 'N/A')
    print(f"  {i}. {name} ({calories} cal, {protein}g protein)")

Question: What's the nutrition in 200g chicken breast and 150g broccoli?

Answer: To calculate the total nutrition of 200g chicken breast and 150g broccoli, I'll add up the nutrition values from each food item.

**Chicken Breast (200g)**

* Calories: 134
* Protein: 14.59g
* Carbohydrates: 1.79g
* Total Fat: 7.7g
* Saturated Fat: 2.5g
* Fiber: 0g
* Sugars: 0.43g

**Broccoli (150g)**

* Calories: 55
* Protein: 4.17g
* Carbohydrates: 11.84g
* Total Fat: 0.6g
* Saturated Fat: 0.1g
* Fiber: 5.1g
* Sugars: 2.6g

**Total Nutrition**

* Calories: 134 + 55 = 189
* Protein: 14.59g + 4.17g = 18.76g
* Carbohydrates: 1.79g + 11.84g = 13.63g
* Total Fat: 7.7g + 0.6g = 8.3g
* Saturated Fat: 2.5g + 0.1g = 2.6g
* Fiber: 0g + 5.1g = 5.1g
* Sugars: 0.43g + 2.6g = 3.03g

So, the nutrition of 200g chicken breast and 150g broccoli is approximately:

* Calories: 189
* Protein: 18.76g
* Carbohydrates: 13.63g
* Total Fat: 8.3g
* Saturated Fat: 2.6g
* Fiber: 5.1g
* Sugars: 3.03g

Please note that this calculati

In [32]:
# Ask your own question!
your_question = "I'm making a salad with spinach, avocado, and almonds - what are the nutrients?"

response = rag_chain.invoke({"input": your_question})

print(f"Question: {your_question}\n")
print(f"Answer: {response['answer']}\n")
print("=" * 60)
print("Retrieved foods:")
for i, doc in enumerate(response['context'], 1):
    name = doc.metadata.get('name', 'Unknown')
    calories = doc.metadata.get('calories', 'N/A')
    protein = doc.metadata.get('protein_g', 'N/A')
    print(f"  {i}. {name} ({calories} cal, {protein}g protein)")

Question: I'm making a salad with spinach, avocado, and almonds - what are the nutrients?

Answer: Let's calculate the nutrients for your salad:

**Spinach**

* Calories: 7 per 100g serving
* Protein: 3.2g per 100g serving
* Fat: 0.9g per 100g serving (mostly unsaturated)
* Fiber: 4.1g per 100g serving
* Vitamins:
	+ Vitamin A: 213mcg (RAE) per 100g serving
	+ Vitamin C: 28mg (RAE) per 100g serving
	+ Folate: 115mcg (FAO) per 100g serving
* Minerals:
	+ Calcium: 35mg per 100g serving
	+ Iron: 2.7mg per 100g serving
	+ Magnesium: 69mg per 100g serving

**Avocado**

* Calories: 160 per 100g serving
* Protein: 3.4g per 100g serving
* Fat: 14.5g per 100g serving (mostly monounsaturated)
* Fiber: 10.5g per 100g serving
* Vitamins:
	+ Vitamin A: 20mcg (RAE) per 100g serving
	+ Vitamin C: 10mg (RAE) per 100g serving
	+ Folate: 20mcg (FAO) per 100g serving
* Minerals:
	+ Calcium: 17mg per 100g serving
	+ Iron: 0.9mg per 100g serving
	+ Magnesium: 59mg per 100g serving

**Almonds**

* Calories:

In [33]:
# Ask your own question!
your_question = "Give me the nutritional breakdown for eggs and olive oil"

response = rag_chain.invoke({"input": your_question})

print(f"Question: {your_question}\n")
print(f"Answer: {response['answer']}\n")
print("=" * 60)
print("Retrieved foods:")
for i, doc in enumerate(response['context'], 1):
    name = doc.metadata.get('name', 'Unknown')
    calories = doc.metadata.get('calories', 'N/A')
    protein = doc.metadata.get('protein_g', 'N/A')
    print(f"  {i}. {name} ({calories} cal, {protein}g protein)")

Question: Give me the nutritional breakdown for eggs and olive oil

Answer: Here is the nutritional breakdown for eggs and olive oil:

**Eggs**

* Serving size: 1 large egg
* Calories: 70
* Protein: 6 grams
* Fat: 5 grams (1.5 grams saturated)
* Carbohydrates: 0.6 grams
* Fiber: 0 grams
* Sugars: 0.6 grams
* Sodium: 60 milligrams

Vitamins:

* Vitamin A: 6 micrograms (RE)
* Vitamin D: 0.5 micrograms (RE)
* Vitamin E: 0.1 milligram (10% RE)
* Vitamin K: 7.4 micrograms (9% RE)
* Vitamin B12: 0.6 micrograms (10% RE)

Minerals:

* Calcium: 6 milligrams
* Iron: 0.5 milligrams
* Magnesium: 8 milligrams
* Phosphorus: 52 milligrams
* Potassium: 54 milligrams
* Sodium: 60 milligrams

**Olive Oil**

* Serving size: 1 tablespoon (14 grams)
* Calories: 120
* Fat: 14 grams (mostly monounsaturated)
* Saturated fat: 2 grams
* Carbohydrates: 0 grams
* Fiber: 0 grams
* Sugars: 0 grams
* Sodium: 0 milligrams

Vitamins:

* Vitamin A: 0.1 micrograms (RE)
* Vitamin D: 0 micrograms (RE)
* Vitamin E: 0.4 mil