In [1]:
import os
os.environ['CUDA_DEVICE_ORDER']="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']='0'

import torch

print(torch.__version__)
print(torch.version.cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name)

2.1.0
11.8
cuda
True
0
<function get_device_name at 0x7f27d34bc670>


In [None]:
import pandas as pd
import ast
from tqdm import tqdm
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration

# Load the recipes data
data_path = "../../data/jeehoshin/allrecipe_dataset/"
recipes = pd.read_csv(data_path + "core-data_recipe.csv")

def extract_directions(cell):
    try:
        d = ast.literal_eval(cell)
        return d.get('directions', "")
    except:
        return ""

# Preprocess nutrition data (Use 7 major nutritions and format it as a string)
for i in range(len(recipes)):
    nutri = recipes.iloc[i]['nutritions']
    nutri_dict = ast.literal_eval(nutri)

    selected_keys = ['sugars', 'sodium', 'carbohydrates', 'calories', 'fat', 'saturatedFat', 'fiber']
    filtered_nutri = {key: nutri_dict[key] for key in selected_keys if key in nutri_dict}

    nutrition_summary = ""
    for key in selected_keys:
        value = filtered_nutri.get(key, {})
        amount = value.get('displayValue', 0)
        unit = value.get('unit', '')
        nutrition_summary += f"{key}: {amount}{unit}, "
    
    recipes.at[i, 'nutrition_summary'] = nutrition_summary

recipes['cooking_directions'] = recipes['cooking_directions'].apply(extract_directions)

recipes.drop(columns=['nutritions'], inplace=True)

print("nutrition data processing completed")
print(recipes.head())

nutrition data processing completed
   recipe_id                              recipe_name  \
0     240488        Pork Loin, Apples, and Sauerkraut   
1     218939         Foolproof Rosemary Chicken Wings   
2      87211                    Chicken Pesto Paninis   
3     245714                       Potato Bacon Pizza   
4     218545  Latin-Inspired Spicy Cream Chicken Stew   

                                           image_url  \
0  https://images.media-allrecipes.com/userphotos...   
1  https://images.media-allrecipes.com/userphotos...   
2  https://images.media-allrecipes.com/userphotos...   
3  https://images.media-allrecipes.com/userphotos...   
4  https://images.media-allrecipes.com/userphotos...   

                                         ingredients  \
0  sauerkraut drained^Granny Smith apples sliced^...   
1  chicken wings^sprigs rosemary^head garlic^oliv...   
2  focaccia bread quartered^prepared basil pesto^...   
3  red potatoes^strips bacon^Sauce:^heavy whippin...   
4  s

In [3]:
row = recipes.iloc[1]
print(row['recipe_name'])
print(row['ingredients'])
print(row['cooking_directions'])
print(row['nutrition_summary'])

Foolproof Rosemary Chicken Wings
chicken wings^sprigs rosemary^head garlic^olive oil^lemon pepper^seasoned salt
Prep
20 m
Cook
40 m
Ready In
1 h
Preheat an oven to 350 degrees F (175 degrees C).
Arrange the chicken wings, rosemary, and garlic cloves on a broiler-proof baking sheet, making sure the chicken pieces are not touching. Drizzle the olive oil over the chicken and garlic. Season the wings on all sides with the lemon pepper and seasoned salt.
Bake in the preheated oven until chicken meat is no longer pink at the bone and the juices run clear, 35 to 40 minutes, turning the wings once about halfway through the cooking time. An instant-read thermometer inserted into the thickest part of a wing should read at least 160 degrees F (70 degrees C).
Take the baking sheet from the oven and set the oven's broiler to High. Remove the garlic and rosemary from the sheet and set aside. Turn the wings again.
Broil the wings until golden brown, about 5 minutes. Garnish with the rosemary sprigs a

In [None]:
import re

cache_dir = "../../data/jeehoshin/huggingface"
model_id = "llava-hf/llava-1.5-7b-hf"
model = LlavaForConditionalGeneration.from_pretrained(
    model_id, 
    cache_dir=cache_dir,
    torch_dtype=torch.float16, 
    low_cpu_mem_usage=True, 
).to(0)

processor = AutoProcessor.from_pretrained(
    model_id,
    cache_dir=cache_dir
)

def extract_summary_with_tags(generated_text):
    matches = re.search(r"<summary>(.*)", generated_text, re.DOTALL | re.IGNORECASE)
    if matches:
        return matches.group(1).strip()
    else:
        return generated_text.strip()  # fallback

def generate_batch_summary(model, processor, data_batch, device):
    images, prompts = [], []

    for recipe_id, recipe_name, ingredients, directions, nutritions in data_batch:
        image_path = data_path + f"core-data-images/core-data-images/{recipe_id}.jpg"
        
        try:
            image = Image.open(image_path).convert("RGB")  # Load local image
        except Exception as e:
            print(f"Failed to load image: {image_path}")
            image = Image.new("RGB", (224, 224), (255, 255, 255))  # fallback blank image
        
        images.append(image)

        prompt = f"""<image>
You are a professional culinary analyst. Based on the provided image and the detailed recipe information below, write a rich and comprehensive summary of the dish.

Your summary should thoroughly describe:
1. The visual presentation of the dish (color, texture, portioning, plating, etc.)
2. The core ingredients and any notable combinations or preparation methods
3. The step-by-step impression of the cooking process or any special techniques
4. The dish’s flavor profile, aroma, and expected texture
5. Relevant nutritional highlights (e.g., high fiber, protein-rich, low in saturated fat)
6. The type of person or dietary preference this dish may appeal to (e.g., health-conscious eaters, comfort food lovers)

Name: {recipe_name}  
Ingredients: {ingredients}  
Directions: {directions}  
Nutrition: {nutritions}

<summary>"""
        
        prompts.append(prompt)
  
    inputs = processor(text = prompts, images = images, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=200)
    decoded = processor.batch_decode(output, skip_special_tokens=True)
    
    summaries = [extract_summary_with_tags(txt) for txt in decoded]

    return summaries

# Example usage
batch_size = 8
results = []
for i in tqdm(range(0, len(recipes), batch_size)):  
    batch = recipes.iloc[i:i+batch_size][['recipe_id', 'recipe_name', 'ingredients', 'cooking_directions', 'nutrition_summary']].values.tolist()
    batch_results = generate_batch_summary(model, processor, batch, device=torch.device("cuda"))

    results.extend(batch_results)

recipes['summary'] = results

recipes.drop(columns=['image_url', 'ingredients', 'cooking_directions', 'nutrition_summary'], inplace=True)

recipes.to_csv(data_path + "detailed_recipe_summaries.csv", index=False)

print("Summaries generated and saved.")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
100%|█████████████████████████████████████████████████████████████████████| 5704/5704 [20:36:16<00:00, 13.00s/it]


Summaries generated and saved.


In [4]:
import pandas as pd

summaries = pd.read_csv(data_path + "detailed_recipe_summaries.csv")
for i in range(10):
    print(f"Recipe id : {summaries.iloc[i]['recipe_id']}")
    print(f"Recipe name : {summaries.iloc[i]['recipe_name']}")
    print(f"Recipe summary : {summaries.iloc[i]['summary']}")
    print()

Recipe id : 240488
Recipe name : Pork Loin, Apples, and Sauerkraut
Recipe summary : The dish is a pork loin roast with apples and sauerkraut, served with a brown sugar and apple cider glaze. The pork is seasoned with Thai spices, salt, garlic powder, and black pepper. The roast is placed in a roasting pan with the sauerkraut, apples, onion, and caraway seeds, and the glaze is poured over the top. The dish is cooked in the oven for 2 hours and 30 minutes, or until the pork is cooked through and reaches an internal temperature of 145 degrees Fahrenheit. The nutritional highlights of the dish include a good amount of fiber and protein, but it is also relatively high in sugar and saturated fat. This dish may appeal to comfort food lovers and those who enjoy flavorful, hearty meals.

Recipe id : 218939
Recipe name : Foolproof Rosemary Chicken Wings
Recipe summary : The dish is a delicious and flavorful roasted chicken wing with a hint of rosemary and garlic. The chicken wings are seasoned w