In [9]:
import torch
import os
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from craft_pytorch.craft import Craft



# Load CRAFT for text detection
craft = Craft(output_dir='output', crop_type="poly", cuda=True)  # Set cuda=False if no GPU

# Load TrOCR model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Image transformation
transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
])

# Path to images
image_folder = "file1_images"
image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith((".jpg", ".png"))]

for image_path in image_paths:
    # Load image
    image = Image.open(image_path).convert("RGB")
    image_np = np.array(image)

    # Detect text regions
    prediction = craft.detect_text(image_path)
    
    # Loop through detected boxes
    for box in prediction["boxes"]:
        x_min, y_min, x_max, y_max = map(int, box)
        text_crop = image_np[y_min:y_max, x_min:x_max]  # Crop detected text region
        text_image = Image.fromarray(text_crop)

        # Preprocess and pass to TrOCR
        pixel_values = transform(text_image).unsqueeze(0)  # Add batch dimension
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        print(f"Detected Text: {generated_text}")

# Cleanup CRAFT
craft.unload_craftnet_model()
craft.unload_refinenet_model()


SyntaxError: invalid syntax (craft.py, line 1)