In [3]:
import fitz 
import pytesseract
from PIL import Image
import io
import os

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def get_next_output_filename(base_name, folder='rotated_files'):
    if not os.path.exists(folder): 
        os.makedirs(folder)
    
    counter = 1
    while True: 
        new_file = os.path.join(folder, f"{base_name}_rotated_{counter}.pdf")
        if not os.path.exists(new_file): 
            return new_file
        counter += 1
        
def ocr_detect_and_rotate_pdf(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path} !")
        return
    
    # Get base name 
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    doc = fitz.open(file_path)
    rotated_pages = [] 
    
    for i, page in enumerate(doc): 
        pix = page.get_pixmap(dpi=150) 
        img = Image.open(io.BytesIO(pix.tobytes("png")))
        
        # OCR with orientation detection
        osd = pytesseract.image_to_osd(img, output_type=pytesseract.Output.DICT)
        rotate_angle = int(osd.get("rotate", 0))
        
        if rotate_angle != 0: 
            print(f"Rotating page {i + 1} by {rotate_angle} degrees (based on content)")
            page.set_rotation(rotate_angle)
            rotated_pages.append(i + 1)
        else: 
            print(f"Page {i + 1} has correct orientation")
        
    # Save new file
    output_path = get_next_output_filename(base_name)
    doc.save(output_path)
    doc.close()
    
    print(f"\nSaved: {output_path}")
    if rotated_pages: 
        print(f"Pages auto-rotated: {rotated_pages}")
    else: 
        print("No rotation needed.") 

In [4]:
file_path = input("📄 Please input the financial PDF file name: ")

ocr_detect_and_rotate_pdf(file_path)



Page 1 has correct orientation
Page 2 has correct orientation
Page 3 has correct orientation
Page 4 has correct orientation
Page 5 has correct orientation
Page 6 has correct orientation
Page 7 has correct orientation
Page 8 has correct orientation
Page 9 has correct orientation
Page 10 has correct orientation
Page 11 has correct orientation
Rotating page 12 by 180 degrees (based on content)
Page 13 has correct orientation
Page 14 has correct orientation
Page 15 has correct orientation
Page 16 has correct orientation
Page 17 has correct orientation
Page 18 has correct orientation
Page 19 has correct orientation
Page 20 has correct orientation
Page 21 has correct orientation
Page 22 has correct orientation
Page 23 has correct orientation
Page 24 has correct orientation
Page 25 has correct orientation
Page 26 has correct orientation
Page 27 has correct orientation
Page 28 has correct orientation
Rotating page 29 by 90 degrees (based on content)
Page 30 has correct orientation
Page 31 has 