In [1]:
import cv2
import numpy as np
import os
from pdf2image import convert_from_path
from PIL import Image
import img2pdf

def deskew_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blur = cv2.GaussianBlur(gray, (9, 9), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]

    # Correct angle
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    # Rotate image
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, rot_mat, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)

    return rotated

def process_pdf(input_pdf_path, output_pdf_path, dpi=300):
    pages = convert_from_path(input_pdf_path, dpi=dpi)
    deskewed_images = []

    temp_dir = "temp_deskew"
    os.makedirs(temp_dir, exist_ok=True)

    for i, page in enumerate(pages):
        image = np.array(page)
        deskewed = deskew_image(image)
        pil_image = Image.fromarray(deskewed)
        img_path = os.path.join(temp_dir, f"page_{i:03d}.jpg")
        pil_image.save(img_path, "JPEG")
        deskewed_images.append(img_path)

    # Convert to PDF
    with open(output_pdf_path, "wb") as f:
        f.write(img2pdf.convert(deskewed_images))

    # Cleanup
    for img in deskewed_images:
        os.remove(img)
    os.rmdir(temp_dir)

# Example usage
process_pdf("/Users/mason/Desktop/Technical_Projects/PYTHON_Projects/PSAI/raw/STRIKE FROM SPACE.pdf", "deskewed_output.pdf")
