In [None]:
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration


In [None]:
# Load the processor and model from the local directory
processor = BlipProcessor.from_pretrained("./blip_image_captioning")
model = BlipForConditionalGeneration.from_pretrained("./blip_image_captioning")

In [None]:
# Load a local image
local_image_path = r"test\img1.webp"  # Replace with your local image path
raw_image = Image.open(local_image_path).convert('RGB')

In [None]:
# Conditional image captioning with controlled length
text = "wallpaper of"
inputs = processor(raw_image, text, return_tensors="pt")

out = model.generate(
    **inputs,
    max_length=30,  # Maximum length of the generated caption
    min_length=5,   # Minimum length of the generated caption
    length_penalty=2.0,  # Length penalty to encourage longer sentences
    num_beams=4  # Number of beams for beam search (for better quality)
)
print("Conditional Caption:", processor.decode(out[0], skip_special_tokens=True))

In [None]:
# Unconditional image captioning with controlled length
inputs = processor(raw_image, return_tensors="pt")

out = model.generate(
    **inputs,
    max_length=30,  # Maximum length of the generated caption
    min_length=5,   # Minimum length of the generated caption
    length_penalty=2.0,  # Length penalty to encourage longer sentences
    num_beams=4  # Number of beams for beam search (for better quality)
)
print("Unconditional Caption:", processor.decode(out[0], skip_special_tokens=True))