In [3]:
from dotenv import load_dotenv
import os 
import pymongo

from enum import Enum
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate

from tqdm import tqdm
import threading

load_dotenv()

False

In [4]:
class DrinkTaste(str, Enum):
    SWEET = "Sweet"
    SOUR = "Sour"
    BITTER = "Bitter"
    SALTY = "Salty"
    UMAMI = "Umami"
    FRUITY = "Fruity"
    FLORAL = "Floral"
    SPICY = "Spicy"
    CREAMY = "Creamy"
    TART = "Tart"
    REFRESHING = "Refreshing"
    RICH = "Rich"
    LIGHT = "Light"
    EARTHY = "Earthy"
    CITRUSY = "Citrusy"
    HERBAL = "Herbal"
    MILKY = "Milky"

VALID_TASTES = {
    "Sweet", "Sour", "Bitter", "Salty", "Umami", "Fruity", "Floral", 
    "Spicy", "Creamy", "Tart", "Refreshing", "Rich", "Light", 
    "Earthy", "Citrusy", "Herbal", "Milky"
}
    
class TasteAnalysis(BaseModel):
    think: str = Field(default="", description="Detailed reasoning for the taste analysis")
    tastes: list[DrinkTaste] = Field(default_factory=list, description="Final dominant taste characteristics as a list")


llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0.5, include_raw=True)
structured_llm = llm.with_structured_output(TasteAnalysis, method="json_schema")

In [6]:
client = pymongo.MongoClient(os.getenv('MONGO_CONNECTION_STRING'))
db = client["monin"]
collection = db["drinks"]

In [None]:
prompt = PromptTemplate.from_template(
    """Analyze the drink recipe and determine the dominant taste characteristics based on the ingredients and their interactions.
Taste options:
SWEET = "Sweet"
SOUR = "Sour"
BITTER = "Bitter"
SALTY = "Salty"
UMAMI = "Umami"
FRUITY = "Fruity"
FLORAL = "Floral"
SPICY = "Spicy"
CREAMY = "Creamy"
TART = "Tart"
REFRESHING = "Refreshing"
RICH = "Rich"
LIGHT = "Light"
EARTHY = "Earthy"
CITRUSY = "Citrusy"
HERBAL = "Herbal"
MILKY = "Milky"

Recipe: {recipe}

Output a JSON object with the following structure:
{{
  "think": "Detailed reasoning for the taste analysis based on the ingredients. Reasoning about it, come up with a logical list of 2-4 flavors, and at the end, in a separate sentence, list them comma-separated and uncluttered.",
  "tastes": ["Pick 2-4 most suitable tastes from "think" part and print out them as a comma-separated list without any other words"]
}}

The "think" part should end with a specific list listed with commas. You get a paycheck for the right list, and a fine for the wrong one. If there is no comma-separated list at the end of the reflection part, you will be fired!

Make sure the "tastes" list contains only valid options from the provided list. And be careful, all tastes in "tastes" should be the same as in ending of "think" part.
AI:"""
)

def invoke_with_timeout(recipe, timeout=30):
    result = [None]
    def target():
        try:
            result[0] = structured_llm.invoke(prompt.format(recipe=recipe))
        except Exception as e:
            result[0] = e

    thread = threading.Thread(target=target)
    thread.start()
    thread.join(timeout)

    if thread.is_alive():
        return None
    return result[0]


def get_tastes_result(recipe, timeout=30):
    tastes_result = invoke_with_timeout(recipe, timeout)
    if tastes_result is None:
        print(f"Timeout: Retrying after first attempt...")
        tastes_result = invoke_with_timeout(recipe, timeout)
    return tastes_result


drinks_to_process = collection.find({"taste": {"$exists": False}})
total_drinks = collection.count_documents({"taste": {"$exists": False}})

for drink in tqdm(drinks_to_process, total=total_drinks, desc="Processing drinks", leave=False):
    recipe = drink['name'] + "\n" + " ".join(drink['recipie'])

    tastes_result = get_tastes_result(recipe)

    if tastes_result is None:
        print(f"Timeout: Skipping {drink['name']} after second attempt")
        continue

    if tastes_result and tastes_result.tastes:

        valid_tastes = [taste for taste in tastes_result.tastes if taste in VALID_TASTES]

        if not valid_tastes:
            print(f"Invalid tastes generated for {drink['name']}, retrying...")
            tastes_result = get_tastes_result(recipe)

            if tastes_result is None:
                print(f"Timeout: Skipping {drink['name']} after second attempt")
                continue

            valid_tastes = [taste for taste in tastes_result.tastes if taste in VALID_TASTES]


        if valid_tastes:
            collection.update_one(
                {"_id": drink["_id"]},
                {"$set": {"taste": valid_tastes}}
            )
        else:
            print(f"Invalid tastes generated for {drink['name']}: {tastes_result.tastes}")

Processing drinks:  57%|█████▋    | 95/166 [02:03<01:11,  1.01s/it]

Timeout: Retrying after first attempt...


Processing drinks:  93%|█████████▎| 154/166 [05:02<00:38,  3.18s/it]

Timeout: Retrying after first attempt...


                                                                    