### BakingLLM - Structured Recipe Conversion System

### Import required libraries

In [1]:
import torch
from transformers import (
    AutoTokenizer,
    DistilBertForTokenClassification,
    pipeline,
    TrainingArguments,
    Trainer
)
from pydantic import BaseModel
from typing import List, Literal
import google.generativeai as genai
import json
import os
from dotenv import load_dotenv

    ImportError: DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) initialization routine failed.
  warn(message, cls)
    ImportError: DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) initialization routine failed.
  warn(message, cls)





### Configuration Setup

In [2]:
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini_client = genai.GenerativeModel("gemini-2.0-flash")

### FoodBERT Model Setup
### Initialize model with food-specific weights

In [3]:
FOODBERT_CHECKPOINT = "chambliss/distilbert-for-food-extraction"
tokenizer = AutoTokenizer.from_pretrained(FOODBERT_CHECKPOINT)
model = DistilBertForTokenClassification.from_pretrained(FOODBERT_CHECKPOINT)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

In [4]:
# Define label mapping (Search Result 2, 8)
LABEL_MAP = {
    0: "B-INGREDIENT",
    1: "I-INGREDIENT",
    2: "B-AMOUNT",
    3: "I-AMOUNT",
    4: "B-UNIT",
    5: "I-UNIT",
    6: "O"
}
## Data Models
class Ingredient(BaseModel):
    name: str
    amount: str
    unit: str
    type: Literal["dry", "liquid"]

class Recipe(BaseModel):
    ingredients: List[Ingredient]

In [5]:
## Enhanced Extraction Pipeline (Search Result 1, 6)
def parse_entities(text: str) -> Recipe:
    """Convert model outputs to structured recipe format"""
    ner_pipeline = pipeline(
        "token-classification",
        model=model,
        tokenizer=tokenizer,
        aggregation_strategy="simple"
    )
    
    entities = ner_pipeline(text)
    ingredients = []
    current = {"name": "", "amount": "", "unit": ""}
    
    for entity in entities:
        token = text[entity["start"]:entity["end"]]
        label = LABEL_MAP[entity["entity"][-1]]
        
        if label == "B-INGREDIENT":
            if current["name"]:
                ingredients.append(current)
                current = {"name": "", "amount": "", "unit": ""}
            current["name"] = token
        elif label == "I-INGREDIENT":
            current["name"] += " " + token
        elif label.startswith("B-AMOUNT"):
            current["amount"] = token
        elif label.startswith("B-UNIT"):
            current["unit"] = token
    
    # Add final ingredient and determine types
    if current["name"]:
        ingredients.append(current)
    
    return Recipe(ingredients=[
        Ingredient(
            name=ing["name"].strip(),
            amount=ing["amount"],
            unit=ing["unit"],
            type=ingredient_type_lookup(ing["name"])
        ) for ing in ingredients
    ])


In [6]:
## Ingredient Type Classifier (Search Result 5)
INGREDIENT_TYPE_LOOKUP = {
    # Dry ingredients
    "flour": "dry", "sugar": "dry", "baking powder": "dry",
    "salt": "dry", "cocoa powder": "dry", "spices": "dry",
    
    # Liquid ingredients
    "milk": "liquid", "water": "liquid", "oil": "liquid",
    "vanilla extract": "liquid", "honey": "liquid"
}

def ingredient_type_lookup(name: str) -> str:
    """Determine ingredient type using lookup table"""
    lower_name = name.lower()
    for key, value in INGREDIENT_TYPE_LOOKUP.items():
        if key in lower_name:
            return value
    return "dry"  # Default assumption

# %% [markdown]
## Measurement Conversion Engine
CONVERSION_PROMPT = """... (keep your existing Gemini prompt) ..."""

def convert_measurements(recipe: Recipe) -> dict:
    """Convert ingredients using Gemini API"""
    ingredients_str = ", ".join(
        f"{i.amount} {i.unit} {i.name} ({i.type})"
        for i in recipe.ingredients
    )
    
    response = gemini_client.generate_content(
        CONVERSION_PROMPT + f"\n\nConvert: {ingredients_str}"
    )
    
    try:
        return json.loads(response.text)
    except json.JSONDecodeError:
        print("Conversion failed, returning empty data")
        return {}


In [9]:
## Main Execution Flow
if __name__ == "__main__":
    # Sample input
    recipe_text = "2 cups all-purpose flour, 1.5 cups granulated sugar, 3/4 cup whole milk"
    
    # Process pipeline
    extracted = parse_entities(recipe_text)
    print("Extracted Ingredients:")
    print(extracted.model_dump_json(indent=2))
    
    conversions = convert_measurements(extracted)
    print("\nConverted Measurements:")
    print(json.dumps(conversions, indent=2))
    
    # Save output
    with open("baking_conversions.json", "w") as f:
        json.dump(conversions, f, indent=2)
    print("\nSaved conversions to baking_conversions.json")


KeyError: 'entity'