In [1]:
import cv2
#Importing OpenCV for Image Manipulation

In [2]:
import pytesseract
#For extracting Text from Image 

In [3]:
def perform_ocr(image_path, lang='eng', psm=6, oem=3):
    """
    Perform OCR (Optical Character Recognition) on an image using Tesseract.

    Args:
        image_path (str): path to input image.
        lang (str): language code for OCR (default 'eng').
        psm (int): Page Segmentation Mode (6 = Assume a single block of text).
        oem (int): OCR Engine Mode (3 = Default, both legacy + LSTM).

    Returns:
        text (str): Extracted text from image.
    """
    # Load as grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise FileNotFoundError(f"Could not load image: {image_path}")

    # Optional preprocessing (binarization for better OCR)
    _, thresh = cv2.threshold(img, 0, 255,
                              cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Config options for Tesseract
    config = f'--oem {oem} --psm {psm}'
    text = pytesseract.image_to_string(thresh, lang=lang, config=config)

    return text

In [4]:
def get_contours(image_path, min_area=100):
    """
    Detect contours (shapes/connected regions) in the image.

    Args:
        image_path (str): path to input image.
        min_area (int): filter small contours below this area.

    Returns:
        contours (list): list of contours kept after filtering.
        contour_img (ndarray): image with contours drawn.
    """
    # Load as grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise FileNotFoundError(f"Could not load image: {image_path}")

    # Preprocess: binarize
    _, binary = cv2.threshold(img, 0, 255,
                              cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL,
                                   cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours by area
    filtered = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]

    # Draw contours on a copy
    contour_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    cv2.drawContours(contour_img, filtered, -1, (0, 255, 0), 2)

    return filtered, contour_img
