**OCR to extract nutritional information from barcode.**

In [2]:
import cv2
import requests
import json
from tabulate import tabulate
from PIL import Image
from pyzbar.pyzbar import decode
import time

# =============== CONFIGURATION ===============
# Replace with your actual keys
USDA_API_KEY = "hIUtavRvs0jwjzyKhFFS6JW2upaabSVX74q0dih0"  # Get a free key from https://fdc.nal.usda.gov/api-guide.html

# =============== API FUNCTIONS FOR NUTRITION ===============

def fetch_openfoodfacts_nutrition(barcode):
    """
    Fetches product name, ingredients, and detailed nutritional information
    from the OpenFoodFacts API.
    """
    url = f"https://world.openfoodfacts.org/api/v2/product/{barcode}.json"
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        data = res.json()
        
        if data.get("status") == 1 and "product" in data:
            product = data["product"]
            nutriments = product.get("nutriments", {})
            
            nutrition_info = {
                "Calories (kcal)": nutriments.get("energy-kcal_100g"),
                "Fat (g)": nutriments.get("fat_100g"),
                "Saturated Fat (g)": nutriments.get("saturated-fat_100g"),
                "Carbohydrates (g)": nutriments.get("carbohydrates_100g"),
                "Sugars (g)": nutriments.get("sugars_100g"),
                "Protein (g)": nutriments.get("proteins_100g"),
                "Salt (g)": nutriments.get("salt_100g"),
                "Sodium (mg)": nutriments.get("sodium_100g", 0) * 1000 # Convert g to mg
            }
            
            # Filter out any keys with None values for a cleaner JSON
            nutrition_info = {k: v for k, v in nutrition_info.items() if v is not None}

            return {
                "source": "OpenFoodFacts",
                "barcode": barcode,
                "name": product.get("product_name", "Unknown Product"),
                "ingredients": product.get("ingredients_text_en", "Not specified"),
                "nutrition_per_100g": nutrition_info
            }
        return None
    except requests.RequestException as e:
        print(f"[-] OpenFoodFacts API error: {e}")
        return None

def fetch_usda_nutrition(barcode):
    """
    Fallback API: Fetches nutrition details from USDA FoodData Central.
    """
    if not USDA_API_KEY or USDA_API_KEY == "YOUR_USDA_API_KEY":
        return None  # Skip if no API key
        
    url = f"https://api.nal.usda.gov/fdc/v1/foods/search?query={barcode}&api_key={USDA_API_KEY}"
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        data = res.json()
        
        if data.get("foods"):
            food = data["foods"][0]
            nutrients_map = {n['nutrientName']: n.get('value', "N/A") for n in food.get("foodNutrients", [])}
            
            nutrition_info = {
                "Calories (kcal)": nutrients_map.get("Energy"),
                "Fat (g)": nutrients_map.get("Total lipid (fat)"),
                "Carbohydrates (g)": nutrients_map.get("Carbohydrate, by difference"),
                "Sugars (g)": nutrients_map.get("Sugars, total including NLEA"),
                "Protein (g)": nutrients_map.get("Protein"),
                "Sodium (mg)": nutrients_map.get("Sodium, Na")
            }
            # Filter out any keys with None values
            nutrition_info = {k: v for k, v in nutrition_info.items() if v is not None}

            return {
                "source": "USDA FoodData Central",
                "barcode": barcode,
                "name": food.get("description", "Unknown Product"),
                "ingredients": food.get("ingredients", "Not specified"),
                "nutrition_per_100g": nutrition_info
            }
        return None
    except requests.RequestException as e:
        print(f"[-] USDA API error: {e}")
        return None

# =============== CAMERA + BARCODE (UNCHANGED) ===============
class FoodLabelScanner:
    def capture_from_camera(self):
        cap = cv2.VideoCapture(1)
        if not cap.isOpened():
            print("Error: Could not open camera.")
            return None
        print("👉 Point camera at barcode... (Press 'q' to quit)")
        barcode = None
        while True:
            ret, frame = cap.read()
            if not ret: break
            
            decoded_objects = decode(Image.fromarray(frame))
            if decoded_objects:
                barcode = decoded_objects[0].data.decode("utf-8")
                cv2.putText(frame, f"Detected: {barcode}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            cv2.imshow("Scanning...", frame)

            if barcode:
                print(f"✅ Barcode Detected: {barcode}")
                time.sleep(2)
                break
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()
        return barcode

# =============== MAIN PIPELINE ===============
if __name__ == "__main__":
    scanner = FoodLabelScanner()
    barcode = scanner.capture_from_camera()

    if not barcode:
        print("\n❌ No barcode detected. Exiting.")
    else:
        print(f"\n🚀 Starting nutrition search for barcode: {barcode}\n")
        
        print("[1/2] Checking OpenFoodFacts API...")
        result = fetch_openfoodfacts_nutrition(barcode)
        
        if not result:
            print("[2/2] Checking USDA FoodData Central API...")
            result = fetch_usda_nutrition(barcode)

        print("\n" + "="*50)
        if result and result.get("nutrition_per_100g"):
            print(f"✅ Success! Found data from: {result['source']}\n")
            
            print(f"📦 Product: {result['name']}")
            print(f"🌿 Ingredients: {result['ingredients']}\n")

            nutrition_data = result["nutrition_per_100g"]
            table_data = [[key, value] for key, value in nutrition_data.items()]
            
            print("--- Nutrition Facts (per 100g) ---")
            print(tabulate(table_data, headers=["Nutrient", "Value"], tablefmt="grid"))

            # --- NEW: SAVE OUTPUT TO JSON FILE ---
            try:
                filename = f"{barcode}_data.json"
                with open(filename, 'w') as json_file:
                    json.dump(result, json_file, indent=4)
                print(f"\n💾 Data successfully saved to: {filename}")
            except Exception as e:
                print(f"\n❌ Error saving data to JSON file: {e}")

        else:
            print("❌ Failure: Could not retrieve nutritional information from any API.")
        print("="*50)

👉 Point camera at barcode... (Press 'q' to quit)

❌ No barcode detected. Exiting.


**Normalization** 

In [5]:
import json
import re

# --- Step 1: Load the Data from the Previous Step ---

# Use the barcode from the scan to load the correct file
# Replace this with the actual barcode you scanned, or make it dynamic
barcode = "8904004402827" 
filename = f"{barcode}_data.json"

try:
    with open(filename, 'r') as f:
        product_data = json.load(f)
    print(f"✅ Successfully loaded '{filename}' for normalization.")
except FileNotFoundError:
    print(f"❌ Error: The file {filename} was not found. Please run the first script.")
    # Exit or handle the error appropriately in a larger application
    product_data = None
except Exception as e:
    print(f"An error occurred: {e}")
    product_data = None

# --- Step 2: Define Normalization Maps ---

# This map standardizes the keys from the 'nutrition_per_100g' dictionary
nutrient_mapping = {
    "Calories (kcal)": "calories",
    "Fat (g)": "fat",
    "Saturated Fat (g)": "saturated_fat",
    "Carbohydrates (g)": "carbohydrates",
    "Sugars (g)": "sugar",
    "Protein (g)": "protein",
    "Salt (g)": "salt",
    "Sodium (mg)": "sodium"
}

# This map helps identify and standardize different names for common ingredients
# The key is the term to search for, the value is the canonical name.
ingredient_mapping = {
    # Sugars
    "sugar": "sugar",
    "sucrose": "sugar",
    "glucose": "sugar",
    "fructose": "sugar",
    "dextrose": "sugar",
    "corn syrup": "sugar",
    "high fructose corn syrup": "sugar",
    "cane sugar": "sugar",
    "invert sugar": "sugar",
    # Salts
    "salt": "salt",
    "sodium chloride": "salt",
    # Fats
    "palm oil": "palm_oil",
    "palmolein": "palm_oil",
    "hydrogenated vegetable fat": "hydrogenated_fat"
}


# --- Step 3: Perform Normalization ---

normalized_data = {}

if product_data:
    # A. Normalize the nutrition dictionary keys
    normalized_nutrition = {}
    raw_nutrition = product_data.get("nutrition_per_100g", {})
    for key, value in raw_nutrition.items():
        # Use the mapped key if it exists, otherwise keep the original
        normalized_key = nutrient_mapping.get(key)
        if normalized_key:
            normalized_nutrition[normalized_key] = value

    # B. Normalize ingredients by finding canonical terms in the ingredient string
    found_standard_ingredients = set() # Use a set to avoid duplicates
    ingredients_text = product_data.get("ingredients", "").lower()
    
    # Split ingredients by common delimiters like commas or parentheses
    individual_ingredients = re.split(r'[,\(\)\[\]\.]+', ingredients_text)

    for ingredient_chunk in individual_ingredients:
        for search_term, canonical_name in ingredient_mapping.items():
            if search_term in ingredient_chunk.strip():
                found_standard_ingredients.add(canonical_name)

    # C. Assemble the final normalized data object
    normalized_data = {
        "barcode": product_data.get("barcode"),
        "name": product_data.get("name"),
        "source": product_data.get("source"),
        "normalized_nutrition": normalized_nutrition,
        "found_standard_ingredients": list(found_standard_ingredients), # Convert set to list for JSON
        "raw_ingredients_text": product_data.get("ingredients")
    }

    print("\n✅ Normalization Complete!")
    # Pretty print the final normalized data
    print(json.dumps(normalized_data, indent=4))

✅ Successfully loaded '8904004402827_data.json' for normalization.

✅ Normalization Complete!
{
    "barcode": "8904004402827",
    "name": "Panchratan mix",
    "source": "OpenFoodFacts",
    "normalized_nutrition": {
        "calories": 526,
        "fat": 32.13,
        "saturated_fat": 7.13,
        "carbohydrates": 52.61,
        "sugar": 10.3,
        "protein": 6.61,
        "salt": 1200,
        "sodium": 480000
    },
    "found_standard_ingredients": [
        "palm_oil",
        "salt",
        "sugar"
    ],
    "raw_ingredients_text": "Potato, Refined Palmolein Oil, Almond. Refined Sugar, Raisins (7%), Cashew Nuts (7%), Curry Leaves, Sesame Seed, Rock Salt, Red Chilli Powder, Poppy Seed, Acidity Regulator NS 330), Cumin Powder & Black Salt. ergen: Contains Cashews, Almonds & Sesame Seeds. May Contains Peanuts, Other Tree Nuts, Milk & Sulphite. 8904004 4028271 Please call consumer care executive no. +91-9209109999 For any questions, comments or complainis on quality +91-750