In [1]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (721, 954) Size: (43, 8)
Text detected at: (662, 954) Size: (56, 8)
Text detected at: (603, 954) Size: (41, 6)
Text detected at: (540, 954) Size: (40, 6)
Text detected at: (231, 954) Size: (44, 8)
Text detected at: (155, 954) Size: (41, 8)
Text detected at: (18, 21) Size: (894, 926)
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Text detected at: (22, 196) Size: (187, 15)
Text detected at: (21, 138) Size: (188, 15)
Text detected at: (23, 126) Size: (34, 9)
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Text detected at: (193, 64) Size: (89, 58)
Text detected at: (72, 64) Size: (94, 58)
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Text detected at: (125, 191) Size: (30, 14)
Text detected at: (14, 188) Size: (73, 17)
Text detected at: (169, 115) Size: (37, 6)
Text detected at: (21, 62) Size: (681, 322)
Text detected at: (245, 21) Size: (453, 37)
Page 5 converted to C

In [2]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Enhance image quality
        enhance_image_quality(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def enhance_image_quality(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply denoising
    denoised_image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    
    # Apply sharpening
    kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    sharpened_image = cv2.filter2D(denoised_image, -1, kernel)
    
    # Apply contrast adjustment (optional)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_adjusted_image = clahe.apply(sharpened_image)
    
    # Save the enhanced image
    cv2.imwrite(image_path, contrast_adjusted_image)
    
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png


error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\clahe.cpp:353: error: (-215:Assertion failed) _src.type() == CV_8UC1 || _src.type() == CV_16UC1 in function '`anonymous-namespace'::CLAHE_Impl::apply'


In [3]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Enhance image quality
        enhance_image_quality(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def enhance_image_quality(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply denoising
    denoised_image = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_adjusted_image = clahe.apply(denoised_image)
    
    # Save the enhanced image
    cv2.imwrite(image_path, contrast_adjusted_image)
    
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (24, 966) Size: (34, 7)
Text detected at: (721, 954) Size: (44, 8)
Text detected at: (662, 954) Size: (56, 8)
Text detected at: (603, 954) Size: (41, 7)
Text detected at: (540, 954) Size: (40, 6)
Text detected at: (434, 954) Size: (41, 6)
Text detected at: (344, 954) Size: (44, 7)
Text detected at: (231, 954) Size: (44, 8)
Text detected at: (155, 954) Size: (41, 8)
Text detected at: (18, 21) Size: (894, 929)
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Text detected at: (21, 196) Size: (188, 15)
Text detected at: (42, 138) Size: (167, 15)
Text detected at: (23, 126) Size: (34, 9)
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Text detected at: (193, 64) Size: (89, 58)
Text detected at: (72, 64) Size: (94, 58)
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Text detected at: (14, 188) Size: (73, 17)
Text detected at: (168, 115) Size: (38, 6)
Text detected at: (21, 60

In [4]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Enhance image quality
        enhance_image_quality(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def enhance_image_quality(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply denoising
    denoised_image = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_adjusted_image = clahe.apply(denoised_image)
    
    # Save the enhanced image
    cv2.imwrite(image_path, contrast_adjusted_image)
    
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (24, 966) Size: (34, 7)
Text detected at: (721, 954) Size: (44, 8)
Text detected at: (662, 954) Size: (56, 8)
Text detected at: (603, 954) Size: (41, 7)
Text detected at: (540, 954) Size: (40, 6)
Text detected at: (434, 954) Size: (41, 6)
Text detected at: (344, 954) Size: (44, 7)
Text detected at: (231, 954) Size: (44, 8)
Text detected at: (155, 954) Size: (41, 8)
Text detected at: (18, 21) Size: (894, 929)
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Text detected at: (21, 196) Size: (188, 15)
Text detected at: (42, 138) Size: (167, 15)
Text detected at: (23, 126) Size: (34, 9)
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Text detected at: (193, 64) Size: (89, 58)
Text detected at: (72, 64) Size: (94, 58)
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Text detected at: (14, 188) Size: (73, 17)
Text detected at: (168, 115) Size: (38, 6)
Text detected at: (21, 60

In [5]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Enhance image quality
        enhance_image_quality(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def enhance_image_quality(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply bilateral filtering for noise reduction
    filtered_image = cv2.bilateralFilter(image, 9, 75, 75)
    
    # Apply sharpening
    kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    sharpened_image = cv2.filter2D(filtered_image, -1, kernel)
    
    # Save the enhanced image
    cv2.imwrite(image_path, sharpened_image)
    
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (18, 21) Size: (894, 926)
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Text detected at: (22, 196) Size: (187, 15)
Text detected at: (22, 138) Size: (187, 15)
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Text detected at: (193, 64) Size: (89, 58)
Text detected at: (73, 64) Size: (93, 58)
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Text detected at: (86, 188) Size: (69, 17)
Text detected at: (41, 188) Size: (46, 17)
Text detected at: (15, 188) Size: (27, 17)
Text detected at: (21, 62) Size: (681, 322)
Text detected at: (348, 21) Size: (350, 37)
Text detected at: (248, 21) Size: (226, 36)
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Text detected at: (46, 50) Size: (252, 285)
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Text detected at: (21, 21) Size: (1014, 564)
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [None]:
import cv2
import numpy as np

# Function to preprocess the image
def preprocess_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    return blurred

# Function to detect text regions
def detect_text_regions(image):
    # Use text detection algorithms to identify text regions
    # For simplicity, let's assume text regions are already known
    
    # Example: Assuming text regions are provided as a list of bounding boxes
    text_regions = [(100, 100, 200, 50), (150, 200, 180, 40)]  # Format: (x, y, width, height)
    
    return text_regions

# Function to detect lines and borders
def detect_lines_and_borders(image):
    # Use line and border detection algorithms
    # For simplicity, let's assume lines and borders are already known
    
    # Example: Assuming lines and borders are provided as a list of bounding boxes
    lines_and_borders = [(50, 50, 5, 200), (100, 100, 200, 5)]  # Format: (x, y, width, height)
    
    return lines_and_borders

# Function to segment tables
def segment_tables(image, lines_and_borders):
    # Segment the image into individual tables
    # For simplicity, let's assume tables are already known
    
    # Example: Assuming tables are provided as a list of bounding boxes
    tables = [(100, 100, 300, 200)]  # Format: (x, y, width, height)
    
    return tables

# Function to identify columns and rows
def identify_columns_and_rows(table_image, lines_and_borders):
    # Identify columns and rows within a table
    # For simplicity, let's assume columns and rows are already known
    
    # Example: Assuming columns are provided as a list of bounding boxes
    columns = [(100, 100, 50, 200)]  # Format: (x, y, width, height)
    
    return columns

# Function to draw boxes for columns
def draw_boxes_for_columns(image, columns):
    # Draw bounding boxes around identified columns
    for column in columns:
        x, y, w, h = column
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Function to identify form fields
def identify_form_fields(image, text_regions):
    # Use contour detection and template matching to identify form fields
    # For simplicity, let's assume form fields are already known
    
    # Example: Assuming form fields are provided as a list of bounding boxes
    form_fields = [(150, 300, 100, 30)]  # Format: (x, y, width, height)
    
    return form_fields

# Function to draw boxes for key-value pairs
def draw_boxes_for_key_value_pairs(image, key_value_pairs):
    # Draw bounding boxes around identified form fields
    for key_value_pair in key_value_pairs:
        x, y, w, h = key_value_pair
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)

# Load the image
image = cv2.imread("your_image.jpg")

# Preprocess the image
processed_image = preprocess_image(image)

# Detect text regions
text_regions = detect_text_regions(processed_image)

# Detect lines and borders
lines_and_borders = detect_lines_and_borders(processed_image)

# Segment tables
tables = segment_tables(processed_image, lines_and_borders)

# Analyze tables
for table in tables:
    # Identify columns and rows
    columns = identify_columns_and_rows(processed_image, lines_and_borders)
    
    # Draw bounding boxes for columns
    draw_boxes_for_columns(processed_image, columns)

# Identify form fields
form_fields = identify_form_fields(processed_image, text_regions)

# Draw bounding boxes for key-value pairs
draw_boxes_for_key_value_pairs(processed_image, form_fields)

# Display the image with drawn boxes
cv2.imshow("Image with Boxes", processed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
import cv2
import os

# Function to preprocess the image
def preprocess_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    return blurred

# Function to detect text regions
def detect_text_regions(image):
    # Use text detection algorithms to identify text regions
    # For simplicity, let's assume text regions are already known
    
    # Example: Assuming text regions are provided as a list of bounding boxes
    text_regions = [(100, 100, 200, 50), (150, 200, 180, 40)]  # Format: (x, y, width, height)
    
    return text_regions

# Function to detect lines and borders
def detect_lines_and_borders(image):
    # Use line and border detection algorithms
    # For simplicity, let's assume lines and borders are already known
    
    # Example: Assuming lines and borders are provided as a list of bounding boxes
    lines_and_borders = [(50, 50, 5, 200), (100, 100, 200, 5)]  # Format: (x, y, width, height)
    
    return lines_and_borders

# Function to segment tables
def segment_tables(image, lines_and_borders):
    # Segment the image into individual tables
    # For simplicity, let's assume tables are already known
    
    # Example: Assuming tables are provided as a list of bounding boxes
    tables = [(100, 100, 300, 200)]  # Format: (x, y, width, height)
    
    return tables

# Function to identify columns and rows
def identify_columns_and_rows(table_image, lines_and_borders):
    # Identify columns and rows within a table
    # For simplicity, let's assume columns and rows are already known
    
    # Example: Assuming columns are provided as a list of bounding boxes
    columns = [(100, 100, 50, 200)]  # Format: (x, y, width, height)
    
    return columns

# Function to draw boxes for columns
def draw_boxes_for_columns(image, columns):
    # Draw bounding boxes around identified columns
    for column in columns:
        x, y, w, h = column
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Function to identify form fields
def identify_form_fields(image, text_regions):
    # Use contour detection and template matching to identify form fields
    # For simplicity, let's assume form fields are already known
    
    # Example: Assuming form fields are provided as a list of bounding boxes
    form_fields = [(150, 300, 100, 30)]  # Format: (x, y, width, height)
    
    return form_fields

# Function to draw boxes for key-value pairs
def draw_boxes_for_key_value_pairs(image, key_value_pairs):
    # Draw bounding boxes around identified form fields
    for key_value_pair in key_value_pairs:
        x, y, w, h = key_value_pair
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)

# Directory containing input images
input_dir = "input_images"

# Directory to save output images
output_dir = "output_images"
os.makedirs(output_dir, exist_ok=True)

# Process each image in the input directory
for filename in os.listdir(input_dir):
    # Load the image
    image = cv2.imread(os.path.join(input_dir, filename))
    
    # Preprocess the image
    processed_image = preprocess_image(image)

    # Detect text regions
    text_regions = detect_text_regions(processed_image)

    # Detect lines and borders
    lines_and_borders = detect_lines_and_borders(processed_image)

    # Segment tables
    tables = segment_tables(processed_image, lines_and_borders)

    # Analyze tables
    for table in tables:
        # Identify columns and rows
        columns = identify_columns_and_rows(processed_image, lines_and_borders)
        
        # Draw bounding boxes for columns
        draw_boxes_for_columns(processed_image, columns)

    # Identify form fields
    form_fields = identify_form_fields(processed_image, text_regions)

    # Draw bounding boxes for key-value pairs
    draw_boxes_for_key_value_pairs(processed_image, form_fields)

    # Save the processed image
    output_path = os.path.join(output_dir, filename)
    cv2.imwrite(output_path, processed_image)

    print(f"Processed image saved: {output_path}")


In [6]:
import fitz

def pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)

    # Iterate through each page
    for page_number in range(len(pdf_document)):
        # Get the page
        page = pdf_document.load_page(page_number)

        # Convert the page to a Pixmap with higher DPI
        pixmap = page.get_pixmap(matrix=fitz.Matrix(600/72, 600/72))

        # Save the Pixmap as an image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        pixmap.writePNG(image_path)

    # Close the PDF
    pdf_document.close()

# Example usage
pdf_to_images("C:/Users/Shreshtha/Labelled_MNS_Sample.pdf", "C:/Users/Shreshtha/Downloads/")


AttributeError: 'Pixmap' object has no attribute 'writePNG'

In [7]:
import fitz
import os

def pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Iterate through each page
    for page_number in range(len(pdf_document)):
        # Get the page
        page = pdf_document.load_page(page_number)

        # Convert the page to a Pixmap with higher DPI
        pixmap = page.get_pixmap(matrix=fitz.Matrix(600/72, 600/72))

        # Save the Pixmap as an image
        image_path = os.path.join(output_folder, f"page_{page_number + 1}.png")
        pixmap.write_image(image_path)

    # Close the PDF
    pdf_document.close()

# Example usage
pdf_to_images("C:/Users/Shreshtha/Labelled_MNS_Sample.pdf", "C:/Users/Shreshtha/Downloads/")


AttributeError: 'Pixmap' object has no attribute 'write_image'

In [9]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image with higher DPI
        zoom = 2.0  # Adjust the zoom factor as needed for higher resolution
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        pix.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Enhance image quality
        enhance_image_quality(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def enhance_image_quality(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply Gaussian blur for noise reduction and smoother edges
    blurred_image = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Apply unsharp masking for enhancing details
    unsharp_image = cv2.addWeighted(image, 1.5, blurred_image, -0.5, 0)
    
    # Save the enhanced image
    cv2.imwrite(image_path, unsharp_image)
    
def analyze_layout(image_path):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(contour)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (18, 21) Size: (894, 926)
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Text detected at: (21, 196) Size: (188, 15)
Text detected at: (21, 138) Size: (188, 15)
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Text detected at: (193, 64) Size: (89, 58)
Text detected at: (72, 64) Size: (94, 58)
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Text detected at: (14, 188) Size: (73, 17)
Text detected at: (169, 115) Size: (37, 6)
Text detected at: (21, 62) Size: (681, 322)
Text detected at: (245, 21) Size: (453, 37)
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Text detected at: (46, 168) Size: (248, 167)
Text detected at: (46, 50) Size: (248, 124)
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Text detected at: (162, 491) Size: (32, 8)
Text detected at: (244, 484) Size: (89, 22)
Text detected at: (904, 442) Size: (115, 14)
Text detected at: (758