In [2]:
import os
from PIL import Image
import json
import pytesseract  # For OCR
from transformers import BlipProcessor, BlipForConditionalGeneration

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def describe_image(image_path, num_captions=3):
    """Generate multiple captions for an image."""
    # Load the processor and model
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
    
    # Open and convert the image to RGB
    raw_image = Image.open(image_path).convert('RGB')

    # Process the image
    inputs = processor(raw_image, return_tensors="pt") 

    # Generate multiple captions using beam search
    outputs = model.generate(
        **inputs,
        max_length=100,              # Increase for detailed captions
        num_return_sequences=num_captions,  # Generate multiple captions
        num_beams=num_captions * 2   # Increase the beam size for diversity
    )

    # Decode each output to get the captions
    captions = [processor.decode(output, skip_special_tokens=True) for output in outputs]
    
    return captions

def extract_text_from_image(image_path):
    """Extract text from an image using OCR"""
    try:
        # Open the image
        raw_image = Image.open(image_path)
        
        # convert to Grayscale
        raw_image = raw_image.convert('L')
        
        # Use Tesseract to extract text
        text = pytesseract.image_to_string(raw_image, lang='eng+hin')
        return text.strip()  # Remove leading/trailing whitespace
    except Exception as e:
        print(f"Error extracting text from {image_path}: {e}")
        return None

def process_images_in_folder(folder_path, output_file):
    outputs = {
        'images': []
    }
    photos = [i for i in os.listdir(folder_path) if i.endswith(('.png', '.jpg', '.jpeg','.bmp', '.gif','.JPG'))] 
    for photo in photos:
        image_path = os.path.join(folder_path, photo)
        try:
            # Generate image description using BLIP
            captions = describe_image(image_path)
            description= '\n'.join(captions)
            print("this is description", description)
            
            # Extract text from the image using OCR
            extracted_text = extract_text_from_image(image_path)
            
            # Store the results
            image_data = {
                'filename': photo,  # photo: 1.jpg
                'path': image_path,
                'description': description,
                'extracted_text': extracted_text if extracted_text else "No text found"
            }
            
            outputs['images'].append(image_data)
            print(f"Description and text extracted for {photo}")
            
        except Exception as e:
            print(f"Error processing {photo}: {e}")
    
    print("This is output", outputs)     
            
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(outputs, f, indent=2, ensure_ascii=False)

    except Exception as e:
        print(f"Error saving descriptions to {output_file}: {e}")
        
    
folder_path = './images'
output_file = 'output.json'
# Process images and save descriptions
print("=== Processing Images ===")
process_images_in_folder(folder_path, output_file)
print(f"Descriptions saved to {output_file}")

=== Processing Images ===
this is description there is a paper with a picture of a star of david on it
there is a paper with a picture of a man and a woman on it
there is a paper with a picture of a star of david written on it
Description and text extracted for test2.jpg
This is output {'images': [{'filename': 'test2.jpg', 'path': './images\\test2.jpg', 'description': 'there is a paper with a picture of a star of david on it\nthere is a paper with a picture of a man and a woman on it\nthere is a paper with a picture of a star of david written on it', 'extracted_text': "ब्रिभुवन विश्वविद्यालय\nwae Tribhuvan University\nCe X इन्जिनियरिड अध्ययन संस्थान Malling Address: Gangalal Marga, Teenkune\n\ny \\ee Institute of Engineering (es 13\nहर Campus Chief @: 977-25-520410\n\nmA\nVe\nwoe E-mail: loepcd@ioe.edu.np\n\nae\n\ny Yalodct क्याम्पस er Fax: 977-25-520405\nPURWANCHAL CAMPUS रन्चल ee ye Bape\nपू.क्\u200d्या.फा.नं. ((य') च.नं. नव७०६. 16621 eo समिति 20501216\n\nfro वि० रजिष्टेशन फारम भर्ने