In [2]:
import os
import random
import shutil



swam_path = "dataset/swan"
# Randomly select 40 images

# List all files in the directory
all_files = os.listdir(swam_path)

# Filter only image files (assuming they have extensions like .jpg, .png, etc.)
image_files = [f for f in all_files if f.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]

selected_images = random.sample(image_files, 40)

# Define the path to the new directory
selected_path = "dataset/selected_swan_images"

# Create the directory if it doesn't exist
os.makedirs(selected_path, exist_ok=True)

# Copy the selected images to the new directory
for image in selected_images:
    shutil.copy(os.path.join(swam_path, image), os.path.join(selected_path, image))

print(f"Selected {len(selected_images)} images and copied them to {selected_path}")



In [31]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration



# Load the processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

# Load and preprocess the image from local file
image_path = "cat1.jpg"  # Replace with your image path
raw_image = Image.open(image_path).convert("RGB")  # Ensure the image is in RGB mode

# Conditional image captioning
text = "a photography of"
inputs = processor(raw_image, text, return_tensors="pt")

out = model.generate(**inputs)
conditional_caption = processor.decode(out[0], skip_special_tokens=True)
print("Conditional Caption:", conditional_caption)

# Unconditional image captioning
inputs = processor(raw_image, return_tensors="pt")

out = model.generate(**inputs)
unconditional_caption = processor.decode(out[0], skip_special_tokens=True)
print("Unconditional Caption:", unconditional_caption)

# Generate caption with minimum and maximum length settings
inputs = processor(raw_image, return_tensors="pt")

generated_ids = model.generate(
    **inputs,
    min_length=25,  # Set a minimum length to encourage longer captions
    max_length=100,  # Set a maximum length to avoid excessively long captions
    num_beams=5,  # Use beam search to improve the quality of the generated caption
    no_repeat_ngram_size=2,  # Avoid repeating n-grams of the specified size
    early_stopping=True  # Stop early when the end token is generated
)

caption = processor.decode(generated_ids[0], skip_special_tokens=True)

# Ensure the caption has exactly 20 words, adjusting if necessary
words = caption.split()
if len(words) > 20:
    caption = ' '.join(words[:20])
elif len(words) < 20:
    # Optionally, pad with additional words or symbols if less than 20
    caption = ' '.join(words + ['<pad>'] * (20 - len(words)))

print("Caption:", caption)


Conditional Caption: a photography of a cat yawning with its mouth open
Unconditional Caption: there is a cat that is yawning and looking at the camera
Caption: there is an orange and white cat yawning with its mouth wide open while sitting on top of a window
