In [None]:
from transformers import pipeline
import google.generativeai as genai
from dotenv import load_dotenv
import os
import re
from fractions import Fraction
import json

# 1. Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# 2. Initialize models (CPU compatible)
food_ner = pipeline(
    "token-classification",
    model="Dizex/InstaFoodRoBERTa-NER",
    aggregation_strategy="simple",
    device=-1  # Force CPU usage explicitly
)
genai.configure(api_key=GEMINI_API_KEY)
gemini = genai.GenerativeModel("gemini-pro")

# 3. Enhanced quantity parser
def parse_quantity(qty_str):
    try:
        if ' ' in qty_str and '/' in qty_str:  # Handle "1 1/2"
            whole, fraction = qty_str.split()
            return float(whole) + float(Fraction(fraction))
        return float(Fraction(qty_str))
    except:
        return None

# 4. Fixed NER-based ingredient extractor
def extract_ingredients(text):
    entities = food_ner(text)
    ingredients = []
    
    # Merge consecutive FOOD entities
    current_ingredient = ""
    current_start = 0
    current_score = 0
    count = 0
    
    for entity in entities:
        if entity['entity_group'] == 'FOOD':
            # Handle hyphenated words and list markers
            clean_word = entity['word'].replace('-', ' ').strip()
            
            if not current_ingredient:
                current_ingredient = clean_word
                current_start = entity['start']
                current_score = entity['score']
                count = 1
            else:
                # Merge if continuous
                if entity['start'] == current_start + len(current_ingredient):
                    current_ingredient += " " + clean_word
                    current_score += entity['score']
                    count += 1
                else:
                    # Save previous
                    match = process_merged_entity(current_ingredient, current_score/count)
                    if match:
                        ingredients.append(match)
                    # Reset
                    current_ingredient = clean_word
                    current_start = entity['start']
                    current_score = entity['score']
                    count = 1
    
    # Process last entity
    if current_ingredient:
        match = process_merged_entity(current_ingredient, current_score/count)
        if match:
            ingredients.append(match)
    
    return ingredients

def process_merged_entity(full_text, avg_score):
    # Enhanced pattern for quantities with merged entities
    match = re.search(
        r'(\d+/\d+|\d+\.\d+|\d+\s\d+/\d+|\d+)\s*(cup|tbsp|tsp|oz|lb|teaspoon|tablespoon)s?\s*(.*)',
        full_text,
        re.IGNORECASE
    )
    
    if match and avg_score > 0.9:  # Confidence threshold
        qty, unit, ingredient = match.groups()
        quantity = parse_quantity(qty)
        if quantity:
            return {
                "ingredient": ingredient.strip().lower(),
                "quantity": quantity,
                "unit": unit.lower()
            }
    return None

# 5. Adjusted Gemini conversion
def convert_with_gemini(ingredients):
    system_prompt = """Convert these baking measurements to grams. Return JSON in format:
    [{"ingredient": "...", "grams": number, "notes": "..."}]"""
    
    try:
        response = gemini.generate_content(system_prompt + "\nInput:\n" + json.dumps(ingredients))
        # Handle Gemini's response format changes
        response_text = response.text.replace('```json', '').replace('```', '').strip()
        return json.loads(response_text)
    except Exception as e:
        print(f"Gemini Error: {str(e)}")
        return None

# 6. Main processor with debug info
def process_recipe(text):
    print("🔍 Extracting with InstaFoodRoBERTa-NER...")
    ingredients = extract_ingredients(text)
    
    if not ingredients:
        print("❌ No ingredients detected - here's why:")
        print("Raw NER output:", food_ner(text))
        return
    
    print("\n📋 Detected Ingredients:")
    for ing in ingredients:
        print(f"- {ing['quantity']} {ing['unit']} {ing['ingredient']}")
    
    print("\n⚡ Converting with Gemini...")
    result = convert_with_gemini(ingredients)
    
    if result:
        print("\n✅ Precision Conversions:")
        for item in result:
            print(f"{item['ingredient']}: {item['grams']}g ({item.get('notes', '')})")

# Test with your recipe
if __name__ == "__main__":
    recipe = """
    Classic Cookies:
    - 2 1/4 cups all-purpose flour
    - 1 teaspoon baking soda
    - 1 cup unsalted butter (softened)
    - 3/4 cup packed brown sugar
    - 2 cups chocolate chips
    """
    process_recipe(recipe)

Device set to use cpu


🔍 Extracting with InstaFoodRoBERTa-NER...
❌ No ingredients detected - here's why:
Raw NER output: [{'entity_group': 'FOOD', 'score': 0.9928342, 'word': ' Cookies', 'start': 13, 'end': 20}, {'entity_group': 'FOOD', 'score': 0.9997056, 'word': ' all', 'start': 39, 'end': 42}, {'entity_group': 'FOOD', 'score': 0.9996468, 'word': '-', 'start': 42, 'end': 43}, {'entity_group': 'FOOD', 'score': 0.99483895, 'word': 'purpose flour', 'start': 43, 'end': 56}, {'entity_group': 'FOOD', 'score': 0.99612004, 'word': ' baking soda', 'start': 74, 'end': 85}, {'entity_group': 'FOOD', 'score': 0.9983456, 'word': ' butter', 'start': 107, 'end': 113}, {'entity_group': 'FOOD', 'score': 0.99849534, 'word': ' brown sugar', 'start': 146, 'end': 157}, {'entity_group': 'FOOD', 'score': 0.99933064, 'word': ' chocolate chips', 'start': 171, 'end': 186}]
