# PDF Orientation Fix

-----




In [None]:
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from pdf2image import convert_from_path
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from skimage.transform import rotate
import math

# Check if CUDA is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if torch.cuda.is_available():
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")


## Define GPU-Accelerated Image Processing Functions


In [None]:
class EdgeDetector(nn.Module):
    """PyTorch-based Canny-like edge detector that can run on GPU"""
    def __init__(self, kernel_size=None, sigma=None, threshold=0.2):
        super(EdgeDetector, self).__init__()

        # Hyperparameters
        if sigma is None:
	        sigma = [1.0, 1.0]
        if kernel_size is None:
	        kernel_size = [5, 5]
        self.kernel_size = kernel_size
        self.sigma = sigma
        self.threshold = threshold

        # Define Sobel filters for edge detection
        self.sobel_x = torch.tensor([[-1, 0, 1], 
                                     [-2, 0, 2], 
                                     [-1, 0, 1]], dtype=torch.float32).view(1, 1, 3, 3).to(device)

        self.sobel_y = torch.tensor([[-1, -2, -1], 
                                     [0, 0, 0], 
                                     [1, 2, 1]], dtype=torch.float32).view(1, 1, 3, 3).to(device)

    def forward(self, x):
        # Apply Gaussian blur
        x = TF.gaussian_blur(x, kernel_size=self.kernel_size, sigma=self.sigma)

        # Apply Sobel filters
        grad_x = F.conv2d(x, self.sobel_x, padding=1)
        grad_y = F.conv2d(x, self.sobel_y, padding=1)

        # Calculate gradient magnitude
        grad_magnitude = torch.sqrt(grad_x**2 + grad_y**2)

        # Normalize to 0-1 range
        grad_magnitude = grad_magnitude / grad_magnitude.max()

        # Apply threshold
        edges = (grad_magnitude > self.threshold).float()

        return edges

def detect_skew_angle_gpu(image_np, kernel_size=None, sigma=None, edge_threshold=0.2,
                          hough_threshold=150, min_line_length=100, max_line_gap=10,
                          min_angle=-45, max_angle=45) -> float:
    # Convert numpy image to PyTorch tensor
    if sigma is None:
	    sigma = [1.0, 1.0]
    if kernel_size is None:
	    kernel_size = [5, 5]
    if len(image_np.shape) == 3 and image_np.shape[2] == 3:  # Color image
        # Convert to grayscale
        image_gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
    else:  # Already grayscale
        image_gray = image_np

    # Normalize and convert to tensor
    image_tensor = torch.from_numpy(image_gray).float().div(255.0).unsqueeze(0).unsqueeze(0).to(device)

    # Detect edges using GPU-accelerated edge detector
    edge_detector = EdgeDetector(kernel_size=kernel_size, sigma=sigma, threshold=edge_threshold).to(device)
    edges = edge_detector(image_tensor)

    # Convert to numpy
    edges_np = (edges.squeeze().cpu().numpy() * 255).astype(np.uint8)

    # Hough Line Transform to detect lines
    lines = cv2.HoughLinesP(edges_np, 1, np.pi/180, threshold=hough_threshold, 
                           minLineLength=min_line_length, maxLineGap=max_line_gap)

    if lines is None or len(lines) == 0:
        return 0.0

    # Calculate angles of lines
    angles = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        if x2 - x1 == 0:  # Avoid division by zero
            angle = 90.0
        else:
            angle = math.degrees(math.atan2(y2 - y1, x2 - x1))

        # Keep angles between min_angle and max_angle
        if angle < min_angle:
            angle += 90
        elif angle > max_angle:
            angle -= 90

        angles.append(angle)

    # Use median as the skew angle
    if angles:
        skew_angle = np.median(angles)
    else:
        skew_angle = 0.0

    return skew_angle

def correct_skew_gpu(image_np, angle) -> np.ndarray:
    # Convert to PyTorch tensor
    if len(image_np.shape) == 3 and image_np.shape[2] == 3: # Color
        image_tensor = torch.from_numpy(image_np.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0).to(device)
    else:  # Grayscale
        image_tensor = torch.from_numpy(image_np).float().div(255.0).unsqueeze(0).unsqueeze(0).to(device)

    # Convert an angle to radians
    angle_rad = torch.tensor(angle * np.pi / 180).to(device)

    # Create a rotation matrix
    cos_theta = torch.cos(angle_rad)
    sin_theta = torch.sin(angle_rad)

    # Build affine transformation matrix
    rotation_matrix = torch.tensor([
        [cos_theta, -sin_theta, 0],
        [sin_theta, cos_theta, 0]
    ], dtype=torch.float32).to(device)

    # Reshape for batch processing
    rotation_matrix = rotation_matrix.unsqueeze(0)

    # Create sampling grid
    grid = F.affine_grid(rotation_matrix, image_tensor.size(), align_corners=False)

    # Apply the transformation
    rotated_image = F.grid_sample(image_tensor, grid, align_corners=False, mode='bilinear')

    # Convert to numpy
    if len(image_np.shape) == 3 and image_np.shape[2] == 3: # Color
        rotated_np = (rotated_image.squeeze(0).cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0)
    else:  # Grayscale
        rotated_np = (rotated_image.squeeze().cpu().numpy() * 255).astype(np.uint8)

    return rotated_np


## PDF Processing Functions


In [None]:
def process_pdf(input_path, output_path,
                gaussian_kernel_size=None, gaussian_sigma=None, edge_threshold=0.2,
                hough_threshold=150, min_line_length=100, max_line_gap=10,
                min_angle=-45, max_angle=45, skew_threshold=0.5):
	# -----------------------------
    if gaussian_sigma is None:
	    gaussian_sigma = [1.0, 1.0]
    if gaussian_kernel_size is None:
	    gaussian_kernel_size = [5, 5]
    print(f"Processing PDF: {input_path}")

    # Convert PDF to images
    print("Converting PDF to images...")
    images = convert_from_path(input_path)
    print(f"PDF has {len(images)} pages")

    # Process each page
    corrected_images = []
    for i, img in enumerate(tqdm(images, desc="Processing pages")):
        # Convert PIL image -> numpy array
        img_np = np.array(img)

        # Detect skew angle
        skew_angle = detect_skew_angle_gpu(
            img_np,
            kernel_size=gaussian_kernel_size,
            sigma=gaussian_sigma,
            edge_threshold=edge_threshold,
            hough_threshold=hough_threshold,
            min_line_length=min_line_length,
            max_line_gap=max_line_gap,
            min_angle=min_angle,
            max_angle=max_angle
        )
        print(f"Page {i+1}: Detected skew angle = {skew_angle:.2f} degrees")

        # Correct skew if the angle is significant
        if abs(skew_angle) > skew_threshold:
            corrected_img_np = correct_skew_gpu(img_np, skew_angle)
            corrected_img = Image.fromarray(corrected_img_np)
        else:
            corrected_img = img

        corrected_images.append(corrected_img)

    # Save corrected images -> PDF
    print(f"Saving corrected PDF to {output_path}")
    corrected_images[0].save(
        output_path, 
        save_all=True, 
        append_images=corrected_images[1:],
        resolution=100.0
    )

    print("PDF processing complete!")
    return output_path


## Process the PDF


In [None]:
input_pdf = "resource/pdf/twitch.pdf"
output_pdf = "output/twitch_deskewed.pdf"

# Hyperparameters
# Edge detection parameters
gaussian_kernel_size = [5, 5]  # Size of Gaussian blur kernel
gaussian_sigma = [1.0, 1.0]    # Sigma values for Gaussian blur
edge_threshold = 0.3           # Threshold for edge detection (0-1)

# Hough transform parameters
hough_threshold = 256          # Accumulator threshold for Hough lines
min_line_length = 128          # Minimum line length
max_line_gap = 15              # Maximum allowed gap between line segments0

# Angle parameters
min_angle = -45                # Minimum angle to consider (degrees)
max_angle = 45                 # Maximum angle to consider (degrees)
skew_threshold = 0.01          # Minimum angle to apply correction (degrees)

# Process
processed_pdf = process_pdf(
    input_pdf, 
    output_pdf,
    gaussian_kernel_size=gaussian_kernel_size,
    gaussian_sigma=gaussian_sigma,
    edge_threshold=edge_threshold,
    hough_threshold=hough_threshold,
    min_line_length=min_line_length,
    max_line_gap=max_line_gap,
    min_angle=min_angle,
    max_angle=max_angle,
    skew_threshold=skew_threshold
)
print(f"Processed PDF saved to: {processed_pdf}")


## Visualize Results


In [None]:
def visualize_before_after(input_pdf, output_pdf, page_num=0):
    # Convert PDF -> images
    original_images = convert_from_path(input_pdf)
    corrected_images = convert_from_path(output_pdf)

    if page_num >= len(original_images) or page_num >= len(corrected_images):
        print(f"Error: Page {page_num} does not exist in both PDFs")
        return

    # Get the specified page
    original_img = original_images[page_num]
    corrected_img = corrected_images[page_num]

    plt.figure(figsize=(20, 10))

    plt.subplot(1, 2, 1)
    plt.imshow(original_img)
    plt.title("Original Image")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(corrected_img)
    plt.title("Corrected Image")
    plt.axis('off')

    plt.tight_layout()
    plt.show()

# Visualize the page desired
visualize_before_after(input_pdf, output_pdf, page_num=0)


## Performance Comparison: CPU vs GPU


In [None]:
import time

def benchmark_processing(input_pdf, num_pages=1):
    images = convert_from_path(input_pdf)[:num_pages]

    def process_image_cpu(img_np):
        # Convert to grayscale
        if len(img_np.shape) == 3 and img_np.shape[2] == 3:
            img_gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
        else:
            img_gray = img_np


        img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1)
        edges = cv2.Canny(img_blur, 50, 150, apertureSize=3)
        lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)

        if lines is None or len(lines) == 0:
            return 0.0, img_np

        angles = []
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if x2 - x1 == 0:
                angle = 90.0
            else:
                angle = math.degrees(math.atan2(y2 - y1, x2 - x1))

            if angle < -45:
                angle += 90
            elif angle > 45:
                angle -= 90

            angles.append(angle)

        skew_angle = np.median(angles) if angles else 0.0

        if abs(skew_angle) > 0.5:
            corrected = rotate(img_np, -skew_angle, resize=True, preserve_range=True).astype(np.uint8)
        else:
            corrected = img_np

        return skew_angle, corrected

    cpu_times = []
    for i, img in enumerate(images):
        img_np = np.array(img)

        start_time = time.time()
        angle, _ = process_image_cpu(img_np)
        end_time = time.time()

        cpu_times.append(end_time - start_time)
        print(f"CPU - Page {i+1}: {cpu_times[-1]:.2f} seconds, Angle: {angle:.2f} degrees")

    gpu_times = []
    for i, img in enumerate(images):
        img_np = np.array(img)

        start_time = time.time()
        angle = detect_skew_angle_gpu(img_np)
        if abs(angle) > 0.5:
            _ = correct_skew_gpu(img_np, angle)
        end_time = time.time()

        gpu_times.append(end_time - start_time)
        print(f"GPU - Page {i+1}: {gpu_times[-1]:.2f} seconds, Angle: {angle:.2f} degrees")

    avg_cpu_time = sum(cpu_times) / len(cpu_times) if cpu_times else 0
    avg_gpu_time = sum(gpu_times) / len(gpu_times) if gpu_times else 0

    speedup = avg_cpu_time / avg_gpu_time if avg_gpu_time > 0 else float('inf')

    print(f"\nAverage CPU time: {avg_cpu_time:.2f} seconds")
    print(f"Average GPU time: {avg_gpu_time:.2f} seconds")
    print(f"GPU Speedup: {speedup:.2f}x")

    plt.figure(figsize=(10, 6))
    plt.bar(['CPU', 'GPU'], [avg_cpu_time, avg_gpu_time], color=['blue', 'orange'])
    plt.title('CPU vs GPU Processing Time')
    plt.ylabel('Time (seconds)')
    plt.text(0, avg_cpu_time/2, f"{avg_cpu_time:.2f}s", ha='center', va='center')
    plt.text(1, avg_gpu_time/2, f"{avg_gpu_time:.2f}s", ha='center', va='center')
    plt.show()

benchmark_processing(input_pdf, num_pages=3)