In [2]:
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor, BitsAndBytesConfig
import torch
from qwen_vl_utils import process_vision_info
import nltk

from rouge_score import rouge_scorer

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Set False for 8-bit
    bnb_4bit_compute_dtype=torch.float16
)

In [4]:
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-3B-Instruct", torch_dtype="auto", device_map="auto", quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.


In [90]:
filename = "FriedRice"
image_path = f"./Test_Images/{filename}.jpeg"
dish = "Rice"

In [91]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "image": image_path,
            },
            {"type": "text", "text": f"Describe the food image with ingredients, texture etc present. Dish: {dish}"},
        ],
    }
]

In [92]:
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
inputs = inputs.to("cuda")

In [93]:
generated_ids = model.generate(**inputs, max_new_tokens=128)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
description_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
description_text = description_text[0]
print(description_text)

The image shows a plate of fried rice. The rice is golden brown and appears to be mixed with various ingredients. Some of the visible ingredients include:

1. Shredded carrots
2. Pieces of shrimp or prawns
3. Green onions or scallions
4. Red chili peppers or chili flakes

The dish is garnished with red chili peppers on top. The rice looks slightly crispy on the outside while maintaining a soft texture inside. The ingredients are evenly distributed throughout the rice, creating a colorful and appetizing presentation.

The plate is set on a light gray surface, with a white napkin placed next to it.


In [94]:
recipe_generator = f"""You are a professional recipe generator.
Dish: {dish}
Description of the food : {description_text}

Format: The recipe should be in "3-steps" in new lines.
1. [steps]
2. [steps]
3. [steps]

Examples: 
Dish: Mustard Fish
Summary: 1. Clean and slice the fish. Soak mustard seeds for 2 hours, then grind with salt and green chillies to a paste. Mix in grated coconut, yoghurt, sugar, turmeric, and mustard oil to make a marinade.
2. Coat the fish pieces with the mixture, place them in a tiffin box with halved green chillies on top, fasten the lid, and marinate for at least 15 minutes.
3. Steam the sealed tiffin in a covered pan with hot water for 15 minutes. Let it rest for 5 minutes before serving.

Dish: Butter Chicken
Summary: 1. Marinate boneless chicken pieces in yogurt, lemon juice, ginger-garlic paste, chili powder, and garam masala for at least 1 hour. Then grill, bake, or pan-fry until cooked and slightly charred.
2. In a pan, heat butter and sauté ginger, garlic, and tomatoes until soft. Blend this into a smooth puree, then return to the pan and add cream, kasuri methi (dried fenugreek), chili powder, and salt. Simmer until rich and creamy.
3. Add the cooked chicken to the sauce, simmer for 10–15 minutes, adjust seasoning, and serve hot with naan or rice.

Dish: Cheesy Bake
Summary: 1. Boil or steam your choice of veggies (e.g., broccoli, cauliflower, carrots, corn) or cooked pasta/chicken. Lightly season with salt, pepper, and herbs.
2. In a pan, melt butter, add flour, and cook for a minute. Slowly whisk in milk to make a smooth white sauce. Add grated cheese (like cheddar or mozzarella) and stir until melted and creamy.
3. Mix the base with the cheese sauce, pour into a greased baking dish, top with more cheese, and bake at 180°C (350°F) for 20–25 minutes until golden and bubbly. Serve hot!"""

In [95]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "image": image_path,
            },
            {"type": "text", "text": recipe_generator},
        ],
    }
]

In [96]:
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
).to("cuda")

In [97]:
generated_ids = model.generate(
    **inputs,
    max_new_tokens=300,
    ).to("cpu")
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
recipe_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(recipe_text[0])
generated_recipe = recipe_text[0]

Dish: Fried Rice
Summary:
1. Cook the rice according to package instructions. While the rice is cooking, prepare the vegetables by cutting them into small pieces. Shred carrots, chop green onions, and dice red chili peppers. Peel and devein shrimp or prawns.

2. Heat a large skillet over medium-high heat. Add the rice and cook for 2-3 minutes until heated through. Add the vegetables and cook for another 2-3 minutes until they are heated through.

3. Stir in the shrimp or prawns, green onions, and chili peppers. Cook for 1-2 minutes until the shrimp or prawns are cooked. Season with salt and pepper to taste. Garnish with additional green onions and chili peppers if desired. Serve hot.


In [98]:
reference_summary_path = f"./Test_Summaries/{filename}.txt"  # TODO: Insert the path to your Test_Summaries file
with open(reference_summary_path, "r", encoding="utf-8") as ref_file:
    reference_summary = ref_file.read().strip()

In [99]:
scorer = rouge_scorer.RougeScorer(
    ['rouge1', 'rouge2', 'rougeL'], 
    use_stemmer=True
)

In [100]:
scores = scorer.score(reference_summary, generated_recipe)

In [101]:
print("ROUGE Scores:")
for metric, score in scores.items():
    print(f"{metric.upper()}:")
    print(f"  Precision: {score.precision:.4f}")
    print(f"  Recall:    {score.recall:.4f}")
    print(f"  F1:        {score.fmeasure:.4f}")
    print()

ROUGE Scores:
ROUGE1:
  Precision: 0.3884
  Recall:    0.4845
  F1:        0.4312

ROUGE2:
  Precision: 0.1250
  Recall:    0.1562
  F1:        0.1389

ROUGEL:
  Precision: 0.2314
  Recall:    0.2887
  F1:        0.2569

