In [51]:
!pip install opencv-python numpy matplotlib




In [52]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Dict

In [53]:
def load_gray(path: str) -> np.ndarray:
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Invalid image path")
    return img


In [54]:
def remove_background(gray: np.ndarray) -> np.ndarray:
    # Estimate background using large blur
    bg = cv2.GaussianBlur(gray, (51, 51), 0)

    # Subtract background
    normalized = cv2.divide(gray, bg, scale=255)

    return normalized


In [55]:
def enhance_contrast(gray: np.ndarray) -> np.ndarray:
    clahe = cv2.createCLAHE(
        clipLimit=2.0,
        tileGridSize=(8, 8)
    )
    return clahe.apply(gray)


In [56]:
def denoise_handwriting(gray: np.ndarray) -> np.ndarray:
    return cv2.medianBlur(gray, 3)


In [57]:
def binarize_handwriting(gray: np.ndarray) -> np.ndarray:
    otsu = cv2.threshold(
        gray, 0, 255,
        cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )[1]

    adaptive = cv2.adaptiveThreshold(
        gray,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        25,
        15
    )

    # Logical AND keeps real ink
    combined = cv2.bitwise_and(otsu, adaptive)
    return combined


In [58]:
def deskew(binary: np.ndarray) -> np.ndarray:
    coords = np.column_stack(np.where(binary < 255))
    if len(coords) < 100:
        return binary  # nothing to rotate

    angle = cv2.minAreaRect(coords)[-1]

    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    h, w = binary.shape
    M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)

    return cv2.warpAffine(
        binary, M, (w, h),
        flags=cv2.INTER_CUBIC,
        borderMode=cv2.BORDER_REPLICATE
    )


In [59]:
def segment_handwritten_blocks(binary: np.ndarray):
    inverted = 255 - binary

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
    merged = cv2.dilate(inverted, kernel, iterations=1)

    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        merged, connectivity=8
    )

    blocks = []

    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]

        if area < 600:
            continue

        blocks.append({
            "bbox": (x, y, w, h),
            "image": binary[y:y+h, x:x+w]
        })

    blocks.sort(key=lambda b: (b["bbox"][1], b["bbox"][0]))
    return blocks


In [60]:
def preprocess_handwritten_document(image_path: str):
    gray = load_gray(image_path)

    bg_removed = remove_background(gray)
    denoised = denoise_handwriting(bg_removed)
    contrast = enhance_contrast(denoised)

    binary = binarize_handwriting(contrast)
    deskewed = deskew(binary)

    blocks = segment_handwritten_blocks(deskewed)

    return {
        "binary": deskewed,
        "blocks": blocks
    }


In [None]:
import os

# Gemini api key
os.environ["LLM_API_KEY"] = "AIzaSyCWGaF9_dWb3Dzr1v32H4pWLIulbxbCFGc"

def get_llm_key():
    key = os.getenv("LLM_API_KEY")
    if not key:
        raise RuntimeError("LLM API key not set")
    return key


In [62]:
def save_image(image: np.ndarray, output_path: str):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cv2.imwrite(output_path, image)


In [63]:
def preprocess_handwritten_document_to_image(
    image_path: str,
    output_image_path: str
):
    # Load grayscale
    gray = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if gray is None:
        raise ValueError("Invalid image path")

    # ---- Handwriting-safe preprocessing ----
    bg = cv2.GaussianBlur(gray, (51, 51), 0)
    bg_removed = cv2.divide(gray, bg, scale=255)

    denoised = cv2.medianBlur(bg_removed, 3)

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast = clahe.apply(denoised)

    otsu = cv2.threshold(
        contrast, 0, 255,
        cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )[1]

    adaptive = cv2.adaptiveThreshold(
        contrast,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        25,
        15
    )

    binary = cv2.bitwise_and(otsu, adaptive)

    # Deskew
    coords = np.column_stack(np.where(binary < 255))
    if len(coords) > 100:
        angle = cv2.minAreaRect(coords)[-1]
        angle = -(90 + angle) if angle < -45 else -angle

        h, w = binary.shape
        M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
        binary = cv2.warpAffine(
            binary, M, (w, h),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_REPLICATE
        )

    # Save final image
    save_image(binary, output_image_path)

    return output_image_path


In [65]:
output_path = preprocess_handwritten_document_to_image(
    "/content/WhatsApp Image 2026-01-18 at 13.04.00.jpeg",
    "/content/output/preprocessed_handwriting.png"
)

print("Saved OCR-ready image at:", output_path)


Saved OCR-ready image at: /content/output/preprocessed_handwriting.png
