In [None]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-8k5ikxzf
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-8k5ikxzf
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=bc5ca2842a422e6840f61dd5d38f587cc9f516d981d77538d05cd1de3ea56863
  Stored in directory: /tmp/pip-ephem-wheel-cache-p4z9ne01/wheels/da/2b/4c/d6691fa9597aac8bb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

base_path = "/content/drive/My Drive/Final Project DL/images/"

Mounted at /content/drive


In [None]:
import torch
import clip
from PIL import Image
from torchvision.datasets import Food101
from sklearn.metrics.pairwise import cosine_similarity

# Load CLIP Model
def load_clip_model(device='cpu', backbone='ViT-B/16'):
    clip_model, preprocess = clip.load(backbone, device=device)
    return clip_model, preprocess

# Process Image with CLIP
def process_image_with_clip(image_path, clip_model, preprocess, device='cpu'):
    image = Image.open(image_path)
    image_input = preprocess(image).unsqueeze(0).to(device)  # Move image tensor to the device

    with torch.no_grad():
        image_features = clip_model.encode_image(image_input)
    return image_features

# Predict the food class from CLIP
def predict_food_class(image_features, clip_model, dataset, device='cpu'):
    class_names = dataset.classes
    text_inputs = torch.cat([clip.tokenize(class_name) for class_name in class_names]).to(device)

    with torch.no_grad():
        text_features = clip_model.encode_text(text_inputs)

    similarity = cosine_similarity(image_features.cpu().numpy(), text_features.cpu().numpy())
    predicted_class_index = similarity.argmax()
    predicted_class_name = class_names[predicted_class_index]
    return predicted_class_name

# Main function for CLIP-based prediction
def main(image_path, clip_model, preprocess, dataset, device='cpu'):
    image_features = process_image_with_clip(image_path, clip_model, preprocess, device)
    predicted_food_class = predict_food_class(image_features, clip_model, dataset, device)
    return predicted_food_class


In [None]:
import re
from transformers import FlaxAutoModelForSeq2SeqLM, AutoTokenizer

# Load the T5 model and tokenizer
MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, use_fast=True)
model = FlaxAutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)

prefix = "items: "
generation_kwargs = {
    "max_length": 512,
    "min_length": 128,
    "no_repeat_ngram_size": 3,
    "do_sample": True,
    "top_k": 60,
    "top_p": 0.95
}

def generation_function(texts):
    _inputs = texts if isinstance(texts, list) else [texts]
    inputs = [prefix + inp for inp in _inputs]
    inputs = tokenizer(
        inputs,
        max_length=256,
        padding="max_length",
        truncation=True,
        return_tensors="jax"
    )

    input_ids = inputs.input_ids
    attention_mask = inputs.attention_mask

    output_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        **generation_kwargs
    )
    generated = output_ids.sequences
    return tokenizer.decode(generated[0], skip_special_tokens=True)

def process_recipe_output(recipe_text):
    # Use regex to capture title, ingredients, and directions sections
    title = ""
    ingredients = ""
    directions = ""

   # Define regex patterns for extracting sections
    title_pattern = r"(?<=^title:)(.*?)(?=ingredients:|directions:|$)"
    ingredients_pattern = r"(?<=ingredients:)(.*?)(?=directions:|$)"
    directions_pattern = r"(?<=directions:)(.*)"
    # Extract sections using regex
    title_match = re.search(title_pattern, recipe_text, re.IGNORECASE)
    ingredients_match = re.search(ingredients_pattern, recipe_text, re.IGNORECASE)
    directions_match = re.search(directions_pattern, recipe_text, re.IGNORECASE)

    if title_match:
        title = title_match.group(1).strip()
    if ingredients_match:
        ingredients = ingredients_match.group(1).strip()
    if directions_match:
        directions = directions_match.group(1).strip()

    return title, ingredients, directions

# Function to generate recipe from the dish name and separate it into title, ingredients, and directions
def generate_recipe_from_dish(dish_name):
    recipe = generation_function(dish_name)
    print(f"Generated Recipe for {dish_name}:\n")

    # Process the output into separate sections
    title, ingredients, directions = process_recipe_output(recipe)

    # Print the structured output
    print(f"Title: {title}")
    print(f"Ingredients: {ingredients}")
    print(f"Directions: {directions}")



In [None]:
def generate_recipe_from_image(image_path, clip_model, preprocess, dataset, device='cpu'):
    # Step 1: Predict the dish name using CLIP
    predicted_dish_name = main(image_path, clip_model, preprocess, dataset, device)

    # Step 2: Generate the recipe for the predicted dish name using t5-recipe-generation
    generate_recipe_from_dish(predicted_dish_name)

# Usage example:
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, preprocess = load_clip_model(device)
dataset = Food101(root='/content/food101', split='train', download=True)

# Provide an image path of a dish for testing
image_path = base_path + "bolognese.jpg"
generate_recipe_from_image(image_path, clip_model, preprocess, dataset, device)


Generated Recipe for spaghetti_bolognese:

Dish Name: spaghetti with bolognese and pecorino romano
Ingredients: spaghetti from ibarra spaghetti parma spaghetti from your favorite pasta recipe
Directions: prepare the pasta and serve. cook the pasta in plenty of salted water. drain and serve with a little extra pecorine romano. you can also do this in a pot with lots of garlic and olive oil and steam for a moment or two. serve with an italian bread. a la cartera ripe pecoriola is made from the same pasta from cippolini, such as la chianti. see the notes.
