In [None]:
import cv2
import pytesseract
import numpy as np
import fitz  # PyMuPDF
import os

In [None]:
class OCREngine:
    Tessaract = "tessaract"
    EasyOCR = "easyocr"
    PaddleOCR = "paddleocr"

In [None]:
class IM_Ops:
    def __init__(self) -> None:
        pass

    def image_reader(self, file_path):
        """Reads an image from the given file path"""
        image = cv2.imread(file_path)
        if image is None:
            print(f"Error: Could not read image from {file_path}")
            return None
        print("Image successfully read")
        return image

    def image_grayscale(self, image):
        """Converts the image to grayscale"""
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    def morpher(self, image, kernel_size=5):
        """Applies a morphological operation to remove noise and fill gaps"""
        kernel = np.ones((kernel_size, kernel_size), np.uint8)
        return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    
    def image_thresholding(self, image):
        """Applies Otsu's thresholding to convert an image into a binary image"""
        _, binary_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        return binary_image
    
    def image_sharpen(self, image):
        """Applies a sharpening filter to enhance edges in the image"""
        kernel = np.array([[0, -1, 0], [-1, 30, -1], [0, -1, 0]])
        return cv2.filter2D(image, -1, kernel)



In [None]:
class File_Ops:
    def __init__(self) -> None:
        pass

    def file_check(self, file_path):
        """Checks whether the file is a PDF or image and processes it accordingly"""
        if file_path.lower().endswith('.pdf'):
            self.PDF_reader(file_path)
        else:
            image = self.image_reader(file_path)
            return image

    def split_and_save_columns(self, image, output_dir, page_number):
        """Splits an image into two columns based on detected gaps and saves the columns"""
        # Calculate the vertical projection profile (sum of pixels vertically)
        vertical_projection = np.sum(255 - image, axis=0)

        # Roughly estimate the midpoint
        midpoint = len(vertical_projection) // 2
        column_gap = np.argmin(vertical_projection[midpoint-50:midpoint+50]) + (midpoint - 50)

        # Split the image at the detected column gap
        column1 = image[:, :column_gap]
        column2 = image[:, column_gap:]

        # Ensure output directory exists
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the columns as separate image files
        cv2.imwrite(f'{output_dir}/page_{page_number}_column_1.jpeg', column1)
        cv2.imwrite(f'{output_dir}/page_{page_number}_column_2.jpeg', column2)

        return column1, column2
    
    def save_as_txt(self, text: str, saving_path: str):
        """Saves a given text as a TXT file"""
        try:
            with open(saving_path, "w", encoding="utf8") as file:
                file.write(text + "\n")
            print(f"Text successfully saved to {saving_path}") 
        except Exception as e:
            print(f"Error {e} while writing to TXT file")


In [None]:
class KannadaOCR:
    def __init__(self):
        self.im_ops = IM_Ops()
        self.file_ops = File_Ops()
        self.ocr_engine = OCREngine.Tessaract
        self.im_crop_dir = "C:/Kannada OCR/Outputs/Crop"
        self.ocr_text_out = "C:/Kannada OCR/Outputs/OCR Text"

        # Ensure output directories exist
        if not os.path.exists(self.im_crop_dir):
            os.makedirs(self.im_crop_dir)
        if not os.path.exists(self.ocr_text_out):
            os.makedirs(self.ocr_text_out)

    def image_to_ocr_text(self, image_path):
        """Converts an image to OCR text using the Tesseract engine"""
        image = self.im_ops.image_reader(image_path)
        if image is None:
            return ""

        custom_config = r"--oem 3 --psm 6"
        text = pytesseract.image_to_string(image, lang="kan", config=custom_config)

        return text
    
    def preprocessor(self, file_name):
        """Performs a series of image preprocessing operations"""
        image = self.im_ops.image_reader(file_name)
        if image is None:
            return None

        # Convert to grayscale
        gray_image = self.im_ops.image_grayscale(image)

        # Apply morphological operations
        morphed_image = self.im_ops.morpher(gray_image)

        # Apply thresholding
        binary_image = self.im_ops.image_thresholding(morphed_image)

        # Sharpen the image
        final_image = self.im_ops.image_sharpen(binary_image)

        return final_image

In [None]:
import os

class OCRDriver:
    def __init__(self, input_file):
        self.kannada_ocr = KannadaOCR()
        self.file_ops = File_Ops()
        self.input_file = input_file
        self.image_output_dir = "C:/Kannada OCR/Outputs/pdf_images"

        # Ensure directory for PDF-to-image conversion exists
        if not os.path.exists(self.image_output_dir):
            os.makedirs(self.image_output_dir)

    def run_ocr(self):
        """Main function to decide whether the input is an image or a PDF and process accordingly."""
        if self.input_file.lower().endswith('.pdf'):
            print(f"Processing PDF: {self.input_file}")
            self.process_pdf()
        else:
            print(f"Processing Image: {self.input_file}")
            self.process_image()

    def process_image(self):
        """Process an individual image file, apply OCR, and save the extracted text."""
        # Step 1: Preprocess the image
        preprocessed_image = self.kannada_ocr.preprocessor(self.input_file)
        if preprocessed_image is None:
            print(f"Error processing image: {self.input_file}")
            return

        # Step 2: OCR the preprocessed image
        ocr_text = self.kannada_ocr.image_to_ocr_text(self.input_file)

        # Step 3: Save the OCR text to the OCR Text directory
        base_name = os.path.basename(self.input_file).replace(' ', '_')
        text_file_path = os.path.join(self.kannada_ocr.ocr_text_out, f"{base_name}.txt")

        self.file_ops.save_as_txt(ocr_text, text_file_path)

    def process_pdf(self):
        """Process a PDF, convert pages to images, and apply OCR to each page."""
        # Step 1: Extract images from the PDF and store in the output directory
        self.kannada_ocr.PDF_reader(self.input_file)

        # Step 2: Loop through extracted images, process each, and append OCR results to a file
        pdf_base_name = os.path.basename(self.input_file).replace(' ', '_').replace('.pdf', '')
        text_file_path = os.path.join(self.kannada_ocr.ocr_text_out, f"{pdf_base_name}_ocr.txt")

        for image_file in sorted(os.listdir(self.image_output_dir)):
            image_path = os.path.join(self.image_output_dir, image_file)
            if image_file.endswith('.jpg'):
                print(f"Processing extracted PDF image: {image_path}")

                # Step 3: Preprocess the image
                preprocessed_image = self.kannada_ocr.preprocessor(image_path)
                if preprocessed_image is None:
                    print(f"Error processing image: {image_path}")
                    continue

                # Step 4: OCR the preprocessed image
                ocr_text = self.kannada_ocr.image_to_ocr_text(image_path)

                # Step 5: Append the OCR text to the output file for the PDF
                self.file_ops.save_as_txt(ocr_text, text_file_path)


if __name__ == "__main__":
    # Input file 
    input_file = "C:/path_to_your_file/input_file.pdf"  # Or .jpg, .png, etc.
    
    # Instantiate the OCRDriver and run the OCR process
    ocr_driver = OCRDriver(input_file)
    ocr_driver.run_ocr()
