In [1]:
pip install --upgrade dataset-tools

Collecting dataset-tools
  Downloading dataset_tools-0.1.4-py3-none-any.whl.metadata (2.0 kB)
Collecting supervisely>=6.72.28 (from dataset-tools)
  Downloading supervisely-6.73.233-py3-none-any.whl.metadata (33 kB)
Collecting requests-toolbelt<1.0.0,>=0.9.1 (from dataset-tools)
  Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl.metadata (14 kB)
Collecting pandas<=1.5.2,>=1.1.3 (from dataset-tools)
  Downloading pandas-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scikit-image<1.0.0,>=0.17.1 (from dataset-tools)
  Downloading scikit_image-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting dataframe-image<1.0.0,>=0.1.11 (from dataset-tools)
  Downloading dataframe_image-0.2.6-py3-none-any.whl.metadata (9.1 kB)
Collecting inflect>=6.0.0 (from dataset-tools)
  Downloading inflect-7.4.0-py3-none-any.whl.metadata (21 kB)
Collecting gdown>=4.7.1 (from dataset-tools)
  Downloading gdown-5.2.0-py

In [2]:
import dataset_tools as dtools

dtools.download(dataset='PASCAL Context', dst_dir='~/dataset-ninja/')

Downloading 'PASCAL Context': 100%|██████████| 1.21G/1.21G [00:44<00:00, 29.2MB/s]
Unpacking 'pascal-context.tar': 100%|██████████| 20209/20209 [00:10<00:00, 1874.65file/s]


'/teamspace/studios/this_studio/dataset-ninja/pascal-context'

In [6]:
import os
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

# Load the BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Define the root directory for Pascal Context
root_dir = "/teamspace/studios/this_studio/dataset-ninja/pascal-context/"

# Define output files for prompts
train_output_file = "train_prompts.txt"
val_output_file = "val_prompts.txt"

def generate_caption(image_path):
    """Generate a caption for a given image using the BLIP model."""
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = processor(image, return_tensors="pt")
        output = model.generate(**inputs)
        caption = processor.decode(output[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def process_folder(folder_path, output_file):
    """Loop through images in a folder and generate captions."""
    prompts = []
    img_folder = os.path.join(folder_path, "img")

    if not os.path.exists(img_folder):
        print(f"Image folder not found: {img_folder}")
        return

    for image_file in os.listdir(img_folder):
        if image_file.lower().endswith((".jpeg", ".png")):
            image_path = os.path.join(img_folder, image_file)
            caption = generate_caption(image_path)
            if caption:
                prompts.append(f"{image_file}: {caption}")
                print(f"Generated caption for {image_file}: {caption}")

    # Save prompts to the output file
    with open(output_file, "w") as f:
        for prompt in prompts:
            f.write(prompt + "\n")
    print(f"Saved prompts to {output_file}")

# Process the train and val folders
process_folder(os.path.join(root_dir, "train"), train_output_file)
process_folder(os.path.join(root_dir, "val"), val_output_file)



Generated caption for 2008_003283.jpeg: a woman sitting on a donkey
Generated caption for 2009_001412.jpeg: a train on the tracks
Generated caption for 2008_007261.jpeg: a table with a bunch of food on it
Generated caption for 2009_000882.jpeg: a red motorcycle parked on the sidewalk
Generated caption for 2010_003816.jpeg: a plane taking off
Generated caption for 2009_005133.jpeg: a street with houses and cars parked on it
Generated caption for 2008_006370.jpeg: a man sitting on a couch
Generated caption for 2010_002498.jpeg: a man and woman in a boat in the ocean
Generated caption for 2009_004397.jpeg: the water is blue
Generated caption for 2008_002834.jpeg: a car driving down a street in a residential area
Generated caption for 2008_006864.jpeg: a man and a little girl sitting on a bed
Generated caption for 2009_004890.jpeg: a group of men
Generated caption for 2010_000492.jpeg: a man sitting on a chair
Generated caption for 2008_006820.jpeg: a man standing outside of a building
Gen