In [1]:
# 1. Imports and Setup section

import os
from ultralytics import YOLO
from PIL import Image, ImageDraw
from IPython.display import display, Markdown
import ipywidgets as widgets
from llama_cpp import Llama

# Paths
# MODEL_PATH = "D:/Project/llama.cpp/models/phi-2.Q4_K_M.gguf"
MODEL_PATH = "./Models/phi-2.Q4_K_M.gguf"
AVAILABLE_INGREDIENTS_DIR = "./Available_Ingredients"

# Load local LLaMA model
llm = Llama(model_path=MODEL_PATH, use_mmap=False)

# Load YOLOv8 model (offline from models/ directory)
yolo_models = {
    "nano": YOLO("models/yolov8n.pt"),
    "small": YOLO("models/yolov8s.pt"),
    "medium": YOLO("models/yolov8m.pt"),
    "large": YOLO("models/yolov8l.pt"),
    "xlarge": YOLO("models/yolov8x.pt")
}
yolo_model = yolo_models["nano"]

llama_model_loader: loaded meta data with 20 key-value pairs and 325 tensors from ./Models/phi-2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = phi2
llama_model_loader: - kv   1:                               general.name str              = Phi2
llama_model_loader: - kv   2:                        phi2.context_length u32              = 2048
llama_model_loader: - kv   3:                      phi2.embedding_length u32              = 2560
llama_model_loader: - kv   4:                   phi2.feed_forward_length u32              = 10240
llama_model_loader: - kv   5:                           phi2.block_count u32              = 32
llama_model_loader: - kv   6:                  phi2.attention.head_count u32              = 32
llama_model_loader: - kv   7:               phi2.attention.head_count_kv u32              

In [6]:
# 2. Ingredient Detection + Image Annotation using yolo

def detect_ingredients_and_annotate(folder_path=AVAILABLE_INGREDIENTS_DIR):
    detected_ingredients = set()
    annotated_images = []

    for file in os.listdir(folder_path):
        if file.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(folder_path, file)
            results = yolo_model(image_path)

            img = Image.open(image_path).convert("RGB")
            draw = ImageDraw.Draw(img)

            for r in results:
                names = r.names
                boxes = r.boxes.xyxy.cpu().numpy()
                classes = r.boxes.cls.cpu().numpy()

                for box, cls in zip(boxes, classes):
                    x1, y1, x2, y2 = map(int, box)
                    label = names[int(cls)]
                    detected_ingredients.add(label)
                    draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
                    draw.text((x1, y1 - 10), label, fill="red")

            annotated_images.append(img)

    return sorted(detected_ingredients), annotated_images

In [14]:
# 3. LLaMa based generation for our specific request

def generate_recipe_llama(ingredients, dish_type="main course"):
    prompt = (
        f"You are a professional chef AI. Create a detailed {dish_type} recipe using the following ingredients:\n"
        f"{ingredients}.\nInclude clear steps and assume basic kitchen tools are available."
    )

    result = llm(prompt=prompt, max_tokens=512, temperature=0.7)  # remove stop param for now

    # debug print to check raw output
    #print("Raw output:", repr(result))

    # handle different return formats
    if isinstance(result, dict) and "choices" in result:
        text = result["choices"][0]["text"].strip()
    else:
        text = result.strip()

    return text


In [15]:
# 4. UI elements in order to Combine Detected + Manually input Ingredients
manual_input = widgets.Textarea(
    placeholder="Add undetected items here (e.g. salt, pepper, spices)...",
    description="Extra Items:",
    layout=widgets.Layout(width="100%", height="60px")
)

dish_dropdown = widgets.Dropdown(
    options=["main course", "dessert", "salad", "vegan", "quick meal"],
    value="main course",
    description="Dish Type:"
)

generate_btn = widgets.Button(description="Generate Recipe", button_style="success")
output_area = widgets.Output()


In [16]:
# 5. Button Callback to Run Detection + Show Images + Generate Recipe based on input

def on_generate(b):
    with output_area:
        output_area.clear_output()
        display(Markdown("### 🧑‍🍳 Detecting ingredients from images..."))

        detected, images = detect_ingredients_and_annotate()

        for img in images:
            display(img)

        combined_ingredients = detected + manual_input.value.split(',')
        cleaned = [i.strip() for i in combined_ingredients if i.strip()]
        final_ingredient_list = ", ".join(cleaned)

        display(Markdown(f"**Ingredients Used:** {final_ingredient_list}"))
        display(Markdown("### 📜 Generating recipe locally using LLaMA..."))

        recipe = generate_recipe_llama(final_ingredient_list, dish_dropdown.value)
        display(Markdown(recipe))

generate_btn.on_click(on_generate)

In [17]:
# 6. Run main program to activate application

ui = widgets.VBox([
    widgets.HTML("<h2>🧠 AI Cooking Assistant (Prototype)</h2>"),
    widgets.HTML(f"<p><b>Detected from:</b> <code>{AVAILABLE_INGREDIENTS_DIR}/</code></p>"),
    manual_input,
    dish_dropdown,
    generate_btn,
    output_area
])

display(ui)


VBox(children=(HTML(value='<h2>🧠 AI Cooking Assistant (Prototype)</h2>'), HTML(value='<p><b>Detected from:</b>…