In [1]:
from transformers import BlipForConditionalGeneration, BlipProcessor
from PIL import Image
import os
import requests



In [2]:
# Define paths
cases_dir = '/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/'
cache_dir = '/media/RLAB-Disk01/Large-Language-Models-Weights'

In [3]:


# Collect list of cases that do not have 'blip-response.txt'
cases_to_process = []
for case in os.listdir(cases_dir):
    case_dir = os.path.join(cases_dir, case)
    if os.path.isdir(case_dir):
        response_path = os.path.join(case_dir, 'blip-response.txt')
        if not os.path.exists(response_path):
            cases_to_process.append(case)
print(f"Found {len(cases_to_process)} cases to process.")

Found 40 cases to process.


In [4]:
# Load the BLIP model and processor
print("Loading BLIP model and processor...")
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=cache_dir)
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=cache_dir)
print("BLIP model and processor loaded successfully!\n")

def load_image(image_path):
    try:
        img = Image.open(image_path)
        return img
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

Loading BLIP model and processor...


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


BLIP model and processor loaded successfully!



In [5]:

failed_cases = []

# Iterate through cases in the directory
for case in cases_to_process:
    case_dir = os.path.join(cases_dir, case)
    if not os.path.isdir(case_dir):
        continue  # Skip non-directory files

    image_files = [f for f in os.listdir(case_dir) if f.lower().endswith('.png')]
    print(f"Found {len(image_files)} image files for case {case}")

    # Collect and process images
    captions = []
    for image_file in image_files:
        image_path = os.path.join(case_dir, image_file)
        img = load_image(image_path)
        if img is not None:
            print(f"Generating caption for image: {image_file} in case {case}")
            # Preprocess the image
            inputs = processor(images=img, return_tensors="pt")
            # Generate caption
            out = model.generate(**inputs, max_length=2048, num_beams=6)
            # Decode the generated text
            caption = processor.decode(out[0], skip_special_tokens=True)
            captions.append(f"Image: {image_file}\nCaption: {caption}\n")
            print(f"Generated caption for {image_file}: {caption}")
        else:
            print(f"Skipping invalid image: {image_file} in case {case}")

    if captions:
        # Save the captions to a text file
        response_path = os.path.join(case_dir, 'blip-response.txt')
        with open(response_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(captions))
        print(f"Captions saved for case {case}.")
    else:
        print(f"No valid images to process for case {case}.")
        continue

    # Memory management
    del inputs, out, caption
    print(f"Memory cleared for case {case}")

# After processing all cases, save failed cases
failed_cases_path = os.path.join(cases_dir, 'failed_cases_blip.txt')
with open(failed_cases_path, 'w', encoding='utf-8') as f:
    for failed_case in failed_cases:
        f.write(f"{failed_case}\n")
print(f"Failed cases logged in {failed_cases_path}")

Found 9 image files for case RHUH-0019
Generating caption for image: RHUH-0019_batch_1.png in case RHUH-0019
Generated caption for RHUH-0019_batch_1.png: a black and white image of a bunch of flowers
Generating caption for image: RHUH-0019_batch_2.png in case RHUH-0019
Generated caption for RHUH-0019_batch_2.png: a black and white image of a number of circles
Generating caption for image: RHUH-0019_batch_3.png in case RHUH-0019
Generated caption for RHUH-0019_batch_3.png: a black and white image of a number of circles
Generating caption for image: RHUH-0019_batch_4.png in case RHUH-0019
Generated caption for RHUH-0019_batch_4.png: a black and white image of a number of different circles
Generating caption for image: RHUH-0019_batch_5.png in case RHUH-0019
Generated caption for RHUH-0019_batch_5.png: a black and white image of a number of circles
Generating caption for image: RHUH-0019_batch_6.png in case RHUH-0019
Generated caption for RHUH-0019_batch_6.png: a black and white image of 