<a href="https://colab.research.google.com/github/SaurabhIndi/OCR-project/blob/main/OCR_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# !pip install paddleocr
# !pip install paddlepaddle==2.4.2
import cv2
import numpy as np
from paddleocr import PaddleOCR

# Initialize CNN-based OCR (PaddleOCR)
ocr = PaddleOCR(use_angle_cls=True, lang='en')

def preprocess_image(image_path):
    """Preprocess image for OCR (grayscale, thresholding)."""
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
    return image, gray, thresh

def template_matching(gray_image, templates, threshold=0.8):
    """Perform template matching for predictable fonts."""
    recognized_text = ""
    for char, template_path in templates.items():
        # Load template
        template = cv2.imread(template_path, 0)

        # Check if template loaded successfully
        if template is None:
            print(f"Error: Could not load template from {template_path}")
            continue  # Skip to the next template

        # Resize template if it's larger than the image
        if template.shape[0] > gray_image.shape[0] or template.shape[1] > gray_image.shape[1]:
            h, w = template.shape
            scale_factor = min(gray_image.shape[0] / h, gray_image.shape[1] / w)
            new_size = (int(w * scale_factor), int(h * scale_factor))
            template = cv2.resize(template, new_size)
            print(f"Resized template '{char}' to {new_size}")

        # Print sizes for debugging (optional)
        # print(f"Template size ({char}): {template.shape}")
        # print(f"Image size: {gray_image.shape}")

        # Match template
        result = cv2.matchTemplate(gray_image, template, cv2.TM_CCOEFF_NORMED)
        _, max_val, _, _ = cv2.minMaxLoc(result)
        if max_val > threshold:
            recognized_text += char
    return recognized_text

def cnn_ocr(image_path):
    """Fallback to CNN-based OCR."""
    result = ocr.ocr(image_path, det=True, rec=True)
    detected_text = ""
    for line in result[0]:
        detected_text += line[1][0] + " "
    return detected_text.strip()

def hybrid_ocr(image_path, templates):
    """Combine template matching and CNN-based OCR."""
    _, gray, _ = preprocess_image(image_path)

    # Step 1: Try template matching
    tm_result = template_matching(gray, templates)

    # Step 2: Fallback to CNN OCR if template matching fails
    if not tm_result:
        tm_result = cnn_ocr(image_path)

    return tm_result

# Define templates for predictable fonts (e.g., digital displays)
templates = {
    "0": "templates/0.jpg",
    "1": "templates/1.jpg",
    "2": "templates/2.jpg",
    # Add paths for other digits/characters
}

# Hybrid OCR on example images
image_paths = [
    "display (3).jpg",
    "display (2).jpg",
    "capchas (1).jpeg",
    "display (1).jpg",
    "capchas (2).jpeg"

]

for path in image_paths:
    print(f"Text from {path}: {hybrid_ocr(path, templates)}")

[2024/11/16 19:53:58] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_c