In [None]:
from transformers import pipeline
import google.generativeai as genai
from dotenv import load_dotenv
import os
import re
from fractions import Fraction
import json

# Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Initialize models
food_ner = pipeline(
    "token-classification",
    model="Dizex/InstaFoodRoBERTa-NER",
    aggregation_strategy="simple",
    device=-1
)
genai.configure(api_key=GEMINI_API_KEY)
gemini = genai.GenerativeModel("gemini-2.0-flash")

def parse_quantity(qty_str):
    try:
        if ' ' in qty_str and '/' in qty_str:
            whole, fraction = qty_str.split()
            return float(whole) + float(Fraction(fraction))
        return float(Fraction(qty_str))
    except:
        return None

def extract_ingredients(text):
    # Step 1: Extract potential ingredient lines using regex
    ingredient_lines = re.findall(r'-\s*(.*?)(?=\n|$)', text)
    ingredients = []
    
    for line in ingredient_lines:
        # Step 2: Extract quantity/unit with robust regex
        match = re.search(
            r'(\d+/\d+|\d+\.\d+|\d+\s\d+/\d+|\d+)\s*(cup|tbsp|tsp|oz|lb|teaspoon|tablespoon)s?\s*(.*)',
            line,
            re.IGNORECASE
        )
        
        if match:
            qty, unit, ingredient_part = match.groups()
            quantity = parse_quantity(qty)
            
            # Step 3: Validate ingredient using NER
            ner_results = food_ner(ingredient_part)
            food_items = [e['word'].strip() for e in ner_results if e['entity_group'] == 'FOOD']
            
            if quantity and unit and food_items:
                # Step 4: Reconstruct full ingredient name
                full_ingredient = ' '.join(food_items).replace(' - ', '-')
                ingredients.append({
                    "ingredient": full_ingredient.lower(),
                    "quantity": quantity,
                    "unit": unit.lower()
                })
    
    return ingredients

def convert_with_gemini(ingredients):
    prompt = f"""Convert these baking measurements to grams. Return JSON format:
    [{{"ingredient": "...", "grams": number, "notes": "..."}}]
    
    Standard conversions:
    1 cup flour = 125g (spooned & leveled)
    1 cup sugar = 200g
    1 cup butter = 227g
    1 tbsp = 15g
    1 tsp = 5g
    
    Ingredients to convert:
    {json.dumps(ingredients, indent=2)}"""
    
    try:
        response = gemini.generate_content(prompt)
        clean_response = response.text.replace('```json', '').replace('```', '').strip()
        return json.loads(clean_response)
    except Exception as e:
        print(f"Gemini Error: {str(e)}")
        return None

def process_recipe(text):
    print("🔍 Extracting ingredients...")
    ingredients = extract_ingredients(text)
    
    if not ingredients:
        print("❌ No ingredients detected in:", text)
        return
    
    print("\n📋 Detected Ingredients:")
    for ing in ingredients:
        print(f"- {ing['quantity']} {ing['unit']} {ing['ingredient']}")
    
    print("\n⚡ Converting with Gemini...")
    result = convert_with_gemini(ingredients)
    
    if result:
        print("\n✅ Precision Conversions:")
        for item in result:
            print(f"{item['ingredient']}: {item['grams']}g - {item.get('notes', '')}")

# Test with your recipe
if __name__ == "__main__":
    recipe = """
    Classic Cookies:
    - 2 1/4 cups all-purpose flour
    - 1 teaspoon baking soda
    - 1 cup unsalted butter
    - 3/4 cup packed brown sugar
    - 2 cups chocolate chips
    """
    process_recipe(recipe)

Device set to use cpu


🔍 Extracting ingredients...
❌ Failed extraction. Raw NER entities:
[
  {
    "entity_group": "FOOD",
    "score": 0.9750493764877319,
    "word": " Cookies",
    "start": 13,
    "end": 20
  },
  {
    "entity_group": "FOOD",
    "score": 0.9996922016143799,
    "word": " all",
    "start": 39,
    "end": 42
  },
  {
    "entity_group": "FOOD",
    "score": 0.9996534585952759,
    "word": "-",
    "start": 42,
    "end": 43
  },
  {
    "entity_group": "FOOD",
    "score": 0.9945042133331299,
    "word": "purpose flour",
    "start": 43,
    "end": 56
  },
  {
    "entity_group": "FOOD",
    "score": 0.9962606430053711,
    "word": " baking soda",
    "start": 74,
    "end": 85
  },
  {
    "entity_group": "FOOD",
    "score": 0.9965425133705139,
    "word": " butter",
    "start": 107,
    "end": 113
  },
  {
    "entity_group": "FOOD",
    "score": 0.9990413188934326,
    "word": " brown sugar",
    "start": 135,
    "end": 146
  },
  {
    "entity_group": "FOOD",
    "score": 0.9995