# **Project: Document Scanning for Book Pages**

In [None]:
import cv2
import numpy as np
from scipy.interpolate import UnivariateSpline
import matplotlib.pyplot as plt

def detect_text_lines(image, debug=False):
    """
    Detect and extract text line contours with enhanced filtering.

    Args:
        image (np.ndarray): Input BGR image.
        debug (bool): If True, display intermediate steps.

    Returns:
        List[np.ndarray]: Filtered contours representing text lines.
        np.ndarray: Grayscale image.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur to reduce noise before thresholding
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive Thresholding for binarization
    thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 15, 10
    )

    if debug:
        plt.figure(figsize=(10, 8))
        plt.title('Adaptive Threshold')
        plt.imshow(thresh, cmap='gray')
        plt.axis('off')
        plt.show()

    # Morphological operations to connect text characters into lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    dilated = cv2.dilate(thresh, kernel, iterations=2)

    if debug:
        plt.figure(figsize=(10, 8))
        plt.title('Dilated Image')
        plt.imshow(dilated, cmap='gray')
        plt.axis('off')
        plt.show()

    # Find contours with external retrieval mode
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours based on size and aspect ratio
    filtered_contours = []
    img_area = image.shape[0] * image.shape[1]
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = cv2.contourArea(cnt)
        aspect_ratio = w / float(h) if h != 0 else 0
        if area > img_area * 0.0005 and aspect_ratio > 2:
            filtered_contours.append(cnt)

    if debug:
        debug_image = image.copy()
        cv2.drawContours(debug_image, filtered_contours, -1, (0, 255, 0), 2)
        plt.figure(figsize=(10, 8))
        plt.title('Filtered Contours')
        plt.imshow(cv2.cvtColor(debug_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

    return filtered_contours, gray

def fit_text_line_splines(contours, image_shape, debug=False):
    """
    Fit splines to detected text lines.

    Args:
        contours (List[np.ndarray]): List of contours representing text lines.
        image_shape (tuple): Shape of the grayscale image (height, width).
        debug (bool): If True, display spline plots.

    Returns:
        List[UnivariateSpline]: Fitted splines for each text line.
    """
    height, width = image_shape
    splines = []

    for contour in contours:
        x = contour[:, 0, 0]
        y = contour[:, 0, 1]

        # Remove duplicate y-values by averaging corresponding x-values
        unique_y, indices = np.unique(y, return_index=True)
        x_unique = x[indices]

        # Sort the points by y-coordinate
        sorted_indices = np.argsort(unique_y)
        y_sorted = unique_y[sorted_indices]
        x_sorted = x_unique[sorted_indices]

        if len(y_sorted) > 10:  # Ensure enough points for spline fitting
            try:
                spline = UnivariateSpline(y_sorted, x_sorted, s=len(y_sorted) * 0.5)
                splines.append(spline)
            except Exception as e:
                print(f"Skipping a contour due to fitting error: {e}")

    if debug:
        plt.figure(figsize=(10, 8))
        plt.imshow(np.ones(image_shape) * 255, cmap='gray')
        for spline in splines:
            y_vals = np.linspace(spline.get_knots()[0], spline.get_knots()[-1], num=500)
            x_vals = spline(y_vals)
            plt.plot(x_vals, y_vals, 'r', linewidth=1)
        plt.title("Fitted Splines")
        plt.gca().invert_yaxis()
        plt.axis('off')
        plt.show()

    return splines

def create_transformation_map(splines, image_shape):
    """
    Generate transformation maps to flatten the page.

    Args:
        splines (List[UnivariateSpline]): Fitted splines for each text line.
        image_shape (tuple): Shape of the grayscale image (height, width).

    Returns:
        Tuple[np.ndarray, np.ndarray]: Transformation maps (map_x, map_y).
    """
    height, width = image_shape
    map_x = np.zeros((height, width), dtype=np.float32)
    map_y = np.zeros((height, width), dtype=np.float32)

    for y in range(height):
        line_positions = [spline(y) for spline in splines if spline.get_knots()[0] <= y <= spline.get_knots()[-1]]

        if len(line_positions) >= 2:
            left, right = min(line_positions), max(line_positions)
            map_x[y, :] = np.linspace(left, right, width)
            map_y[y, :] = y
        else:
            map_x[y, :] = np.arange(width)
            map_y[y, :] = y

    return map_x, map_y

def dewarp_and_visualize(image_path):
    """
    Dewarp the page image and display intermediate steps.

    Args:
        image_path (str): Path to the input image.
    """
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to load image at path '{image_path}'.")
        return

    # Step 1: Detect text lines and threshold
    contours, gray = detect_text_lines(image, debug=False)
    if not contours:
        print("No text lines detected.")
        return

    # Step 2: Fit splines
    splines = fit_text_line_splines(contours, gray.shape, debug=False)
    if not splines:
        print("No valid splines were fitted.")
        return

    # Step 3: Create transformation maps and dewarp
    map_x, map_y = create_transformation_map(splines, gray.shape)
    dewarped = cv2.remap(
        gray, map_x, map_y, interpolation=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT, borderValue=255
    )

    # Step 4: Binarize dewarped image
    dewarped_binarized = cv2.adaptiveThreshold(
        dewarped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 15, 10
    )

    # Display results
    plt.figure(figsize=(18, 6))

    # Original Image
    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')

    # Processed Threshold
    plt.subplot(1, 3, 2)
    plt.title("Processed Threshold")
    plt.imshow(gray, cmap='gray')
    plt.axis('off')

    # Detected Text Blobs
    debug_image = image.copy()
    cv2.drawContours(debug_image, contours, -1, (0, 255, 0), 2)
    plt.subplot(1, 3, 3)
    plt.title("Detected Text Blobs")
    plt.imshow(cv2.cvtColor(debug_image, cv2.COLOR_BGR2RGB))
    plt.axis('off')

    plt.tight_layout()
    plt.show()

# Example usage
dewarp_and_visualize("linguistics_thesis_a.jpg")


In [None]:
import cv2
import numpy as np
from scipy.interpolate import UnivariateSpline
import matplotlib.pyplot as plt

def detect_text_lines(image, debug=False):
    """
    Detect and extract text line contours with enhanced filtering.

    Args:
        image (np.ndarray): Input BGR image.
        debug (bool): If True, display intermediate steps.

    Returns:
        List[np.ndarray]: Filtered contours representing text lines.
        np.ndarray: Grayscale image.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur to reduce noise before thresholding
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive Thresholding for binarization
    thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 15, 10
    )

    if debug:
        plt.figure(figsize=(10, 8))
        plt.title('Adaptive Threshold')
        plt.imshow(thresh, cmap='gray')
        plt.axis('off')
        plt.show()

    # Morphological operations to connect text characters into lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    dilated = cv2.dilate(thresh, kernel, iterations=2)

    if debug:
        plt.figure(figsize=(10, 8))
        plt.title('Dilated Image')
        plt.imshow(dilated, cmap='gray')
        plt.axis('off')
        plt.show()

    # Find contours with external retrieval mode
    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours based on size and aspect ratio
    filtered_contours = []
    img_area = image.shape[0] * image.shape[1]
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = cv2.contourArea(cnt)
        aspect_ratio = w / float(h) if h != 0 else 0
        if area > img_area * 0.0005 and aspect_ratio > 2:
            filtered_contours.append(cnt)

    if debug:
        debug_image = image.copy()
        cv2.drawContours(debug_image, filtered_contours, -1, (0, 255, 0), 2)
        plt.figure(figsize=(10, 8))
        plt.title('Filtered Contours')
        plt.imshow(cv2.cvtColor(debug_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

    return filtered_contours, gray

def fit_text_line_splines(contours, image_shape, debug=False):
    """
    Fit splines to detected text lines.

    Args:
        contours (List[np.ndarray]): List of contours representing text lines.
        image_shape (tuple): Shape of the grayscale image (height, width).
        debug (bool): If True, display spline plots.

    Returns:
        List[UnivariateSpline]: Fitted splines for each text line.
    """
    height, width = image_shape
    splines = []

    for contour in contours:
        x = contour[:, 0, 0]
        y = contour[:, 0, 1]

        # Remove duplicate y-values by averaging corresponding x-values
        unique_y, indices = np.unique(y, return_index=True)
        x_unique = x[indices]

        # Sort the points by y-coordinate
        sorted_indices = np.argsort(unique_y)
        y_sorted = unique_y[sorted_indices]
        x_sorted = x_unique[sorted_indices]

        if len(y_sorted) > 10:  # Ensure enough points for spline fitting
            try:
                spline = UnivariateSpline(y_sorted, x_sorted, s=len(y_sorted) * 0.5)
                splines.append(spline)
            except Exception as e:
                print(f"Skipping a contour due to fitting error: {e}")

    if debug:
        plt.figure(figsize=(10, 8))
        plt.imshow(np.ones(image_shape) * 255, cmap='gray')
        for spline in splines:
            y_vals = np.linspace(spline.get_knots()[0], spline.get_knots()[-1], num=500)
            x_vals = spline(y_vals)
            plt.plot(x_vals, y_vals, 'r', linewidth=1)
        plt.title("Fitted Splines")
        plt.gca().invert_yaxis()
        plt.axis('off')
        plt.show()

    return splines

def create_transformation_map(splines, image_shape):
    """
    Generate transformation maps to flatten the page.

    Args:
        splines (List[UnivariateSpline]): Fitted splines for each text line.
        image_shape (tuple): Shape of the grayscale image (height, width).

    Returns:
        Tuple[np.ndarray, np.ndarray]: Transformation maps (map_x, map_y).
    """
    height, width = image_shape
    map_x = np.zeros((height, width), dtype=np.float32)
    map_y = np.zeros((height, width), dtype=np.float32)

    for y in range(height):
        line_positions = [spline(y) for spline in splines if spline.get_knots()[0] <= y <= spline.get_knots()[-1]]

        if len(line_positions) >= 2:
            left, right = min(line_positions), max(line_positions)
            map_x[y, :] = np.linspace(left, right, width)
            map_y[y, :] = y
        else:
            map_x[y, :] = np.arange(width)
            map_y[y, :] = y

    return map_x, map_y

def dewarp_image(image_path, debug=False):
    """
    Dewarp the page image using text line splines.

    Args:
        image_path (str): Path to the input image.
        debug (bool): If True, display intermediate steps.

    Returns:
        np.ndarray: Dewarped and binarized image.
    """
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to load image at path '{image_path}'.")
        return None

    contours, gray = detect_text_lines(image, debug=debug)
    if not contours:
        print("No text lines detected. Exiting.")
        return None

    splines = fit_text_line_splines(contours, gray.shape, debug=debug)
    if not splines:
        print("No valid splines were fitted. Exiting.")
        return None

    map_x, map_y = create_transformation_map(splines, gray.shape)

    dewarped = cv2.remap(
        gray, map_x, map_y, interpolation=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT, borderValue=255
    )

    dewarped_binarized = cv2.adaptiveThreshold(
        dewarped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 15, 10
    )

    if debug:
        plt.figure(figsize=(15, 10))

        plt.subplot(1, 2, 1)
        plt.title("Original Image")
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.title("Dewarped Image")
        plt.imshow(dewarped_binarized, cmap='gray')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

    return dewarped_binarized

# Example usage
decrypted_image = dewarp_image("linguistics_thesis_a.jpg", debug=True)
if decrypted_image is not None:
    cv2.imwrite("dewarped_output.jpg", decrypted_image)
