conda create -n claude-ocr -c conda-forge  
conda activate claude-ocr  
conda install python anthropic pillow -c conda-forge  

setup ANTHROPIC_API_KEY env var  

In [5]:
import base64
import os
from anthropic import Anthropic

In [6]:
def encode_image(image_path):
    """
    Encode an image file to base64 for API transmission.
    
    :param image_path: Path to the image file
    :return: Base64 encoded string of the image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [7]:
def extract_text_from_image(api_key, image_path):
    """
    Use Claude Haiku to extract text from an image.
    
    :param api_key: Your Anthropic API key
    :param image_path: Path to the image file
    :return: Extracted text from the image
    """
    # Initialize the Anthropic client
    client = Anthropic(api_key=api_key)
    
    try:
        # Encode the image
        base64_image = encode_image(image_path)
        
        # Send request to Claude Haiku, Sonnet or Opus
        response = client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=1000,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/jpeg",  # or image/png, adjust as needed
                                "data": base64_image
                            }
                        },
                        {
                            "type": "text",
                            "text": "Please extract all the text from this image. If there are multiple text regions, list them clearly."
                        }
                    ]
                }
            ]
        )
        
        # Return the extracted text
        return response.content[0].text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [8]:
def main():
    # Replace with your actual Anthropic API key
    API_KEY = os.getenv('ANTHROPIC_API_KEY')
    
    if not API_KEY:
        print("Please set the ANTHROPIC_API_KEY environment variable.")
        return
    
    # Path to your image file
    IMAGE_PATH = r'D:\Documents\Projects\OCR\content\dl1.jpg'
    
    # Extract text from the image
    extracted_text = extract_text_from_image(API_KEY, IMAGE_PATH)
    
    if extracted_text:
        print("Extracted Text:")
        print(extracted_text)
    else:
        print("Failed to extract text from the image.")

In [9]:
if __name__ == '__main__':
    main()

Extracted Text:
The text extracted from the image includes:

1. Pennsylvania
2. Commercial Driver's License
3. 44 DOB: 99 999 999
4. 3 DOB: 08/04/1975
5. 4b EXP: 08/05/2023
6. 4a ISS: 03/01/2019
7. SAMPLE
8. JANICE ANN
9. 123 MAIN STREET
10. HARRISBURG, PA 17101-0000
11. 15 SEX: F 18 EYES: BRO
12. 16 HGT: 5-06"
13. CLASS: A
14. END: NONE
15. RESTR: NONE
16. DD: 12345678901234
17. 456789012345
18. CDL
19. ORGAN DONOR
