In [23]:
from transformers import pipeline
import google.generativeai as genai
from dotenv import load_dotenv
import os
import re
from fractions import Fraction
import json

# Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Initialize models
food_ner = pipeline(
    "token-classification",
    model="Dizex/InstaFoodRoBERTa-NER",
    aggregation_strategy="simple",
    device=-1
)
genai.configure(api_key=GEMINI_API_KEY)
gemini = genai.GenerativeModel("gemini-2.0-flash")

def parse_quantity(qty_str):
    try:
        if ' ' in qty_str and '/' in qty_str:
            whole, fraction = qty_str.split()
            return float(whole) + float(Fraction(fraction))
        return float(Fraction(qty_str))
    except:
        return None

def extract_ingredients(text):
    # Step 1: Extract potential ingredient lines using regex
    ingredient_lines = re.findall(r'-\s*(.*?)(?=\n|$)', text)
    ingredients = []
    
    for line in ingredient_lines:
        # Step 2: Extract quantity/unit with robust regex
        match = re.search(
            r'(\d+/\d+|\d+\.\d+|\d+\s\d+/\d+|\d+)\s*(cup|tbsp|tsp|oz|lb|teaspoon|tablespoon)s?\s*(.*)',
            line,
            re.IGNORECASE
        )
        
        if match:
            qty, unit, ingredient_part = match.groups()
            quantity = parse_quantity(qty)
            
            # Step 3: Validate ingredient using NER
            ner_results = food_ner(ingredient_part)
            food_items = [e['word'].strip() for e in ner_results if e['entity_group'] == 'FOOD']
            
            if quantity and unit and food_items:
                # Step 4: Reconstruct full ingredient name
                full_ingredient = ' '.join(food_items).replace(' - ', '-')
                ingredients.append({
                    "ingredient": full_ingredient.lower(),
                    "quantity": quantity,
                    "unit": unit.lower()
                })
    
    return ingredients

def convert_with_gemini(ingredients):
    prompt = f"""
    Convert these baking ingredients to exact grams or milliletres. Return ONLY JSON:

    Rules:
    - Use professional baking standards.
    - For dry ingredients (flour, sugar), specify if packed/loose.
    - Return format:
    {{
      "ingredient": string,
      "grams": number, (if dry)
      "ml": number, (if wet)
    }}

    Input to convert:
    {json.dumps(ingredients, indent=2)}
    """
    
    try:
        response = gemini.generate_content(prompt)
        clean_response = response.text.replace('```json', '').replace('```', '').strip()
        return json.loads(clean_response)
    except Exception as e:
        print(f"Gemini Error: {str(e)}")
        return None

def process_recipe(text):
    print("🔍 Extracting ingredients...")
    ingredients = extract_ingredients(text)
    
    if not ingredients:
        print("❌ No ingredients detected in:", text)
        return
    
    print("\n📋 Detected Ingredients:")
    for ing in ingredients:
        print(f"- {ing['quantity']} {ing['unit']} {ing['ingredient']}")
    
    print("\n⚡ Converting with Gemini...")
    result = convert_with_gemini(ingredients)
    
    if result:
        print("\n✅ Precision Conversions:")
        for item in result:
            print(f"{item['ingredient']}: {item['grams']}g - {item.get('notes', '')}")

# Test with your recipe
if __name__ == "__main__":
    recipe = """
    - 2 1/4 cups all-purpose flour
    - 1 teaspoon baking soda
    - 1 cup unsalted butter
    - 3/4 cup packed brown sugar
    - 2 cups chocolate chips
    """
    process_recipe(recipe)

Device set to use cpu


🔍 Extracting ingredients...

📋 Detected Ingredients:
- 2.25 cup all-purpose flour
- 1.0 teaspoon baking soda
- 1.0 cup butter
- 0.75 cup brown sugar
- 2.0 cup chocolate chips

⚡ Converting with Gemini...

✅ Precision Conversions:
all-purpose flour: 281g - 
baking soda: 4.5g - 
butter: 227g - 
brown sugar: 165g - 
chocolate chips: 340g - 
