In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForTokenClassification
from pydantic import BaseModel
from typing import List, Literal
import google.generativeai as genai
import json
import os
from dotenv import load_dotenv

# --- Configuration ---
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini_client = genai.GenerativeModel("gemini-2.0-flash")

# --- DistilBERT Setup ---
class Ingredient(BaseModel):
    name: str
    amount: str
    unit: str
    type: Literal["dry", "liquid"]  # New field for ingredient type

class Recipe(BaseModel):
    ingredients: List[Ingredient]

# Load pre-trained model
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels=4)  # Added type classification

# --- Enhanced Gemini System Prompt ---
CONVERSION_PROMPT = """You are a precise measurement converter for baking ingredients. 
For DRY ingredients (flour, sugar, etc.), return weights in grams and ounces.
For LIQUID ingredients (milk, oil, etc.), return volumes in milliliters and fluid ounces.

Return ONLY JSON in this format:
{
  "<ingredient_name>": {
    "original": "<amount> <unit>",
    "type": "<dry|liquid>",
    "metric": "<converted> <g|ml>",
    "imperial": "<converted> <oz|fl oz>"
  }
}

Examples:
1. Input: "2 cups flour (dry)"
   Output: {"flour": {"original": "2 cups", "type": "dry", "metric": "240 g", "imperial": "8.47 oz"}}

2. Input: "1 cup milk (liquid)"
   Output: {"milk": {"original": "1 cup", "type": "liquid", "metric": "240 ml", "imperial": "8.12 fl oz"}}"""

def extract_ingredients(text: str) -> Recipe:
    """Enhanced extraction with ingredient type detection"""
    # Mock output - in practice you'd:
    # 1. Fine-tune DistilBERT to detect ingredient types
    # 2. Use a lookup table for common ingredient types
    return Recipe(ingredients=[
        Ingredient(name="flour", amount="2", unit="cups", type="dry"),
        Ingredient(name="sugar", amount="1/2", unit="cup", type="dry"),
        Ingredient(name="milk", amount="1", unit="cup", type="liquid"),
        Ingredient(name="vanilla extract", amount="1", unit="tsp", type="liquid")
    ])

def convert_with_gemini(ingredients: List[Ingredient]) -> dict:
    """Enhanced conversion with type handling"""
    ingredients_str = ", ".join(
        f"{i.amount} {i.unit} {i.name} ({i.type})" 
        for i in ingredients
    )
    
    response = gemini_client.generate_content(
        CONVERSION_PROMPT + f"\n\nConvert: {ingredients_str}\nReturn ONLY JSON."
    )
    
    try:
        return json.loads(response.text)
    except json.JSONDecodeError:
        print("Failed to parse Gemini response")
        return {}

def save_to_json(data: dict, filename: str = "return_recipe.json"):
    """Save with error handling"""
    try:
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)
        print(f"Successfully saved to {filename}")
    except Exception as e:
        print(f"Error saving file: {e}")

if __name__ == "__main__":
    recipe_text = "12 cups flour, 1/4 cup sugar, 5 cups milk, 7 tsp vanilla extract"
    
    # Extract with type detection
    recipe = extract_ingredients(recipe_text)
    print("Extracted ingredients:", recipe.model_dump_json(indent=2))
    
    # Convert with type-specific handling
    conversions = convert_with_gemini(recipe.ingredients)
    print("\nConverted measurements:")
    print(json.dumps(conversions, indent=2))
    
    # Save to file
    save_to_json(conversions)

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Extracted ingredients: {
  "ingredients": [
    {
      "name": "flour",
      "amount": "2",
      "unit": "cups",
      "type": "dry"
    },
    {
      "name": "sugar",
      "amount": "1/2",
      "unit": "cup",
      "type": "dry"
    },
    {
      "name": "milk",
      "amount": "1",
      "unit": "cup",
      "type": "liquid"
    },
    {
      "name": "vanilla extract",
      "amount": "1",
      "unit": "tsp",
      "type": "liquid"
    }
  ]
}
Failed to parse Gemini response

Converted measurements:
{}
Successfully saved to return_recipe.json
