In [None]:
! pip install easyocr

In [None]:
! pip install torch torchvision torchaudio

In [None]:
! pip install transformers

In [None]:
! pip install pytesseract
! pip install opencv-python pillow

In [None]:
import cv2
import json
import easyocr
import pytesseract
from PIL import Image
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from torchvision.transforms import ToTensor, Resize, Compose
import torchvision.transforms.functional as TF

# Path to image
image_path = "sample template.jpeg"

# -------------------- OpenCV Preprocessing --------------------
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
processed_path = "processed.jpg"
cv2.imwrite(processed_path, gray)

# -------------------- EasyOCR --------------------
easy_reader = easyocr.Reader(['en'], gpu=False)
easy_results = easy_reader.readtext(processed_path)

easy_texts = [text for (_, text, _) in easy_results]

# -------------------- pytesseract --------------------
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Or Linux path
# pytess_text = pytesseract.image_to_string(Image.open(processed_path))

# -------------------- TrOCR --------------------
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

transform = Compose([
    Resize((384, 384)),
    ToTensor()
])

img_pil = Image.open(image_path).convert("RGB")
pixel_values = transform(img_pil).unsqueeze(0)

with torch.no_grad():
    generated_ids = model.generate(pixel_values)
    trocr_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

# -------------------- Combine and Output --------------------
output = {
    "easyocr": easy_texts,
    # "pytesseract": pytess_text.strip(),
    "trocr": trocr_text.strip()
}

print(json.dumps(output, indent=2))
