In [18]:
from transformers import pipeline
import google.generativeai as genai
from dotenv import load_dotenv
import os
import re
from fractions import Fraction
import json

# Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Initialize models
food_ner = pipeline(
    "token-classification",
    model="Dizex/InstaFoodRoBERTa-NER",
    aggregation_strategy="simple",
    device=-1
)
genai.configure(api_key=GEMINI_API_KEY)
gemini = genai.GenerativeModel("gemini-2.0-flash")

def parse_quantity(qty_str):
    try:
        if ' ' in qty_str and '/' in qty_str:
            whole, fraction = qty_str.split()
            return float(whole) + float(Fraction(fraction))
        return float(Fraction(qty_str))
    except:
        return None

def extract_ingredients(text):
    entities = food_ner(text)
    ingredients = []
    
    # Merge hyphenated ingredients and list markers
    merged_entities = []
    buffer = []
    
    for entity in entities:
        word = entity['word'].strip()
        
        if word == '-':
            if buffer:
                buffer[-1]['word'] += '-'
        elif word.startswith('-'):
            if buffer:
                buffer[-1]['word'] += word
            else:
                entity['word'] = word[1:].strip()
                buffer.append(entity)
        else:
            if buffer:
                merged_entities.append({
                    'entity_group': 'FOOD',
                    'word': ' '.join([e['word'] for e in buffer]),
                    'score': sum(e['score'] for e in buffer)/len(buffer),
                    'start': buffer[0]['start'],
                    'end': entity['end']
                })
                buffer = []
            buffer.append(entity)
    
    # Process remaining buffer
    if buffer:
        merged_entities.append({
            'entity_group': 'FOOD',
            'word': ' '.join([e['word'] for e in buffer]),
            'score': sum(e['score'] for e in buffer)/len(buffer),
            'start': buffer[0]['start'],
            'end': buffer[-1]['end']
        })
    
    # Now process merged entities
    for entity in merged_entities:
        if entity['score'] < 0.9:  # Confidence threshold
            continue
            
        # Enhanced pattern for recipe quantities
        match = re.search(
            r'(\d+/\d+|\d+\.\d+|\d+\s\d+/\d+|\d+)\s*(cup|tbsp|tsp|oz|lb|teaspoon|tablespoon)s?\s*(.*)',
            entity['word'],
            re.IGNORECASE
        )
        
        if match:
            qty, unit, ingredient = match.groups()
            quantity = parse_quantity(qty)
            if quantity and unit:
                ingredients.append({
                    "ingredient": ingredient.replace('-', ' ').strip().lower(),
                    "quantity": quantity,
                    "unit": unit.lower()
                })
    
    return ingredients

def process_phrase(entities, ingredients):
    # Reconstruct full text with original spacing
    full_text = " ".join([e['word'].strip() for e in entities]).replace(" - ", "-")
    
    # Enhanced pattern for recipe-style measurements
    match = re.search(
        r'(-?\s*)?(\d+/\d+|\d+\.\d+|\d+\s\d+/\d+|\d+)\s*(cup|tbsp|tsp|oz|lb|teaspoon|tablespoon)s?\s*(.*)',
        full_text,
        re.IGNORECASE
    )
    
    if match:
        _, qty, unit, ingredient = match.groups()
        quantity = parse_quantity(qty)
        if quantity and unit:
            ingredients.append({
                "ingredient": ingredient.strip().lower(),
                "quantity": quantity,
                "unit": unit.lower()
            })

def convert_with_gemini(ingredients):
    prompt = f"""Convert these baking measurements to grams. Return JSON format:
    [{{"ingredient": "...", "grams": number, "notes": "..."}}]
    
    Rules:
    1 cup flour = 125g (spooned & leveled)
    1 cup sugar = 200g
    1 cup butter = 227g
    1 tbsp = 3 teaspoons
    
    Ingredients:
    {json.dumps(ingredients, indent=2)}"""
    
    try:
        response = gemini.generate_content(prompt)
        clean_response = response.text.replace('```json', '').replace('```', '').strip()
        return json.loads(clean_response)
    except Exception as e:
        print(f"Gemini Error: {str(e)}")
        return None

def process_recipe(text):
    print("🔍 Extracting ingredients...")
    ingredients = extract_ingredients(text)
    
    if not ingredients:
        print("❌ Failed extraction. Raw NER entities:")
        # Convert numpy floats to Python floats for JSON serialization
        raw_entities = food_ner(text)
        serializable_entities = [
            {
                "entity_group": e["entity_group"],
                "score": float(e["score"]),  # Convert numpy float32 to Python float
                "word": e["word"],
                "start": e["start"],
                "end": e["end"]
            }
            for e in raw_entities
        ]
        print(json.dumps(serializable_entities, indent=2))
        return
    
    # Rest of the code remains the same...

# Test with your recipe
if __name__ == "__main__":
    recipe = """
    Classic Cookies:
    - 2 1/4 cups all-purpose flour
    - 1 teaspoon baking soda
    - 1 cup unsalted butter
    - 3/4 cup packed brown sugar
    - 2 cups chocolate chips
    """
    process_recipe(recipe)

Device set to use cpu


🔍 Extracting ingredients...
❌ Failed extraction. Raw NER entities:
[
  {
    "entity_group": "FOOD",
    "score": 0.9750493764877319,
    "word": " Cookies",
    "start": 13,
    "end": 20
  },
  {
    "entity_group": "FOOD",
    "score": 0.9996922016143799,
    "word": " all",
    "start": 39,
    "end": 42
  },
  {
    "entity_group": "FOOD",
    "score": 0.9996534585952759,
    "word": "-",
    "start": 42,
    "end": 43
  },
  {
    "entity_group": "FOOD",
    "score": 0.9945042133331299,
    "word": "purpose flour",
    "start": 43,
    "end": 56
  },
  {
    "entity_group": "FOOD",
    "score": 0.9962606430053711,
    "word": " baking soda",
    "start": 74,
    "end": 85
  },
  {
    "entity_group": "FOOD",
    "score": 0.9965425133705139,
    "word": " butter",
    "start": 107,
    "end": 113
  },
  {
    "entity_group": "FOOD",
    "score": 0.9990413188934326,
    "word": " brown sugar",
    "start": 135,
    "end": 146
  },
  {
    "entity_group": "FOOD",
    "score": 0.9995