In [None]:
from PIL import Image
from pathlib import Path
import pytesseract
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import torch

def load_trocr_model():
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
    return processor, model
        
processor, model = load_trocr_model()

In [None]:

def trocr_multiline(processor, model, image_path: str, y1: int = 70, y2: int = 120, y3: int = 165, y4: int = 220):
    """Crop image into 5 lines, run TrOCR on each, combine results."""
    img = Image.open(image_path).convert("RGB")
    width = img.width
    
    crops = [
        img.crop((0, 0, width, y1)),
        img.crop((0, y1, width, y2)),
        img.crop((0, y2, width, y3)),
        img.crop((0, y3, width, y4)),
        img.crop((0, y4, width, img.height))
    ]
    
    lines = []
    for crop in crops:
        pixel_values = processor(images=crop, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        lines.append(text)
    
    return "\n".join(lines)

# def test_ocr_methods(processor, model, image_path: str = "./canvas_output.png"):
#     image_path = Path(image_path)
#     if not image_path.exists():
#         raise FileNotFoundError(f"Image not found at {image_path}")
    
#     image = Image.open(image_path)
    
#     # Convert to RGB if necessary (TrOCR requires RGB)
#     if image.mode != "RGB":
#         image = image.convert("RGB")
    
#     results = {}
    
#     print("Running Tesseract OCR...")
#     try:
#         tesseract_output = pytesseract.image_to_string(image)
#         results["Tesseract"] = {
#             "output": tesseract_output.strip()
#         }
#         print(f"✓ Tesseract completed")
#     except Exception as e:
#         results["Tesseract"] = {
#             "output": f"Error: {str(e)}"
#         }
#         print(f"✗ Tesseract failed: {e}")
    
#     print("\nRunning TrOCR...")
#     try:
#         trocr_output = trocr_multiline(processor, model, image_path)
#         results['TrOCR'] = {
#             "output": trocr_output
#         }

#     except Exception as e:
#         results["TrOCR"] = {
#             "output": f"Error: {str(e)}"
#         }
#         print(f"✗ TrOCR failed: {e}")
    
#     return results
def test_ocr_methods(processor, model, image_path: str, y1: int, y2: int, y3: int, y4: int):
    """
    Run Tesseract and TrOCR (multiline) on an image.
    
    Args:
        y1: First split point (separates line 1 and 2)
        y2: Second split point (separates line 2 and 3)
        y3: Third split point (separates line 3 and 4)
        y4: Fourth split point (separates line 4 and 5)
    """
    image = Image.open(image_path).convert("RGB")
    
    # Tesseract (full image)
    try:
        tesseract_output = pytesseract.image_to_string(image).strip()
    except Exception as e:
        tesseract_output = f"Error: {str(e)}"
    
    # TrOCR (multiline)
    try:
        trocr_output = trocr_multiline(processor, model, image_path)
    except Exception as e:
        trocr_output = f"Error: {str(e)}"
    
    return {
        "Tesseract": {"output": tesseract_output},
        "TrOCR": {"output": trocr_output}
    }

In [15]:
def crop_and_save(image_path: str, y1: int, y2: int, y3: int, y4: int, output_dir: str = "./cropped"):
    """Crop image into 5 lines and save them."""
    img = Image.open(image_path)
    width = img.width
    
    Path(output_dir).mkdir(exist_ok=True)
    
    crops = [
        img.crop((0, 0, width, y1)),           # Line 1
        img.crop((0, y1, width, y2)),          # Line 2
        img.crop((0, y2, width, y3)),          # Line 3
        img.crop((0, y3, width, y4)),          # Line 4
        img.crop((0, y4, width, img.height))   # Line 5
    ]
    
    for i, crop in enumerate(crops, 1):
        crop.save(f"{output_dir}/line_{i}.png")
    
    return crops

crop_and_save("./canvas_output.png", 70, 120, 165, 220)

[<PIL.Image.Image image mode=RGBA size=600x70>,
 <PIL.Image.Image image mode=RGBA size=600x50>,
 <PIL.Image.Image image mode=RGBA size=600x45>,
 <PIL.Image.Image image mode=RGBA size=600x55>,
 <PIL.Image.Image image mode=RGBA size=600x180>]

In [19]:
results = test_ocr_methods(processor, model, "./canvas_output.png")
    
# Display results
print("\n" + "="*50)
print("RESULTS")
print("="*50)

for method, data in results.items():
    print(f"\n{method}:")
    print("-" * 30)
    print(data["output"])

print("\n" + "="*50)
print("FORMATTED OUTPUT")
print("="*50)
print(results)

TypeError: test_ocr_methods() missing 4 required positional arguments: 'y1', 'y2', 'y3', and 'y4'