In [4]:
import cv2
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
    
    # Apply morphological operations to remove noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Perform layout analysis to identify text, tables, and forms
    analyze_layout(morphed)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, morphed)
    
def analyze_layout(image):
    # Find contours
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        
        # Check aspect ratio to identify potential text blocks
        aspect_ratio = w / h
        if aspect_ratio > 0.5 and aspect_ratio < 20:
            print("Text block found at:", (x, y), "Size:", (w, h))
        else:
            # Check if the contour area is larger than a certain threshold
            contour_area = cv2.contourArea(contour)
            if contour_area > 1000:
                # Check if the contour is roughly rectangular
                perimeter = cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
                if len(approx) == 4:
                    # This contour may represent a table or form
                    print("Potential table or form found at:", (x, y), "Size:", (w, h))

# Example usage
pdf_path = "C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text block found at: (858, 956) Size: (18, 5)
Text block found at: (767, 956) Size: (5, 5)
Text block found at: (647, 956) Size: (8, 6)
Text block found at: (877, 955) Size: (11, 7)
Text block found at: (843, 955) Size: (13, 8)
Text block found at: (810, 955) Size: (32, 8)
Text block found at: (782, 955) Size: (26, 7)
Text block found at: (773, 955) Size: (7, 6)
Text block found at: (722, 955) Size: (43, 8)
Text block found at: (658, 955) Size: (61, 8)
Text block found at: (604, 955) Size: (41, 6)
Text block found at: (583, 955) Size: (19, 6)
Text block found at: (536, 955) Size: (45, 6)
Text block found at: (512, 955) Size: (22, 7)
Text block found at: (479, 955) Size: (30, 8)
Text block found at: (435, 955) Size: (41, 6)
Text block found at: (414, 955) Size: (19, 6)
Text block found at: (391, 955) Size: (20, 6)
Text block found at: (345, 955) Size: (44, 6)
Text block found at: (320, 955) Size: (22, 7)
Text block found at: (

In [6]:
import cv2
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, gray)
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [7]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_gray = clahe.apply(gray)
    
    # Apply edge enhancement
    edges = cv2.Canny(enhanced_gray, 50, 150)
    
    # Combine original grayscale image with edges
    enhanced_image = cv2.addWeighted(gray, 0.5, edges, 0.5, 0)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, enhanced_image)
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [8]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply contrast enhancement
    enhanced_image = enhance_contrast(image)
    
    # Apply edge enhancement
    enhanced_image = enhance_edges(enhanced_image)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, enhanced_image)
    
def enhance_contrast(image):
    # Convert the image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # Split the LAB image into channels
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_l = clahe.apply(l)
    
    # Merge the enhanced L channel with the original A and B channels
    enhanced_lab = cv2.merge((enhanced_l, a, b))
    
    # Convert the LAB image back to BGR color space
    enhanced_image = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
    
    return enhanced_image
    
def enhance_edges(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply edge enhancement using the Canny edge detector
    edges = cv2.Canny(gray, 50, 150)
    
    # Create a mask to retain only the edges
    mask = np.zeros_like(image)
    mask[edges != 0] = image[edges != 0]
    
    # Combine the original image with the edges
    enhanced_image = cv2.addWeighted(image, 0.5, mask, 0.5, 0)
    
    return enhanced_image
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [9]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply contrast enhancement
    enhanced_image = enhance_contrast(image)
    
    # Apply edge enhancement
    enhanced_image = enhance_edges(enhanced_image)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, enhanced_image)
    
def enhance_contrast(image):
    # Convert the image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # Split the LAB image into channels
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))  # Adjust clipLimit for more contrast
    enhanced_l = clahe.apply(l)
    
    # Merge the enhanced L channel with the original A and B channels
    enhanced_lab = cv2.merge((enhanced_l, a, b))
    
    # Convert the LAB image back to BGR color space
    enhanced_image = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
    
    return enhanced_image
    
def enhance_edges(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply edge enhancement using the Canny edge detector
    edges = cv2.Canny(gray, 100, 200)  # Adjust threshold values for more/less edges
    
    # Create a mask to retain only the edges
    mask = np.zeros_like(image)
    mask[edges != 0] = image[edges != 0]
    
    # Combine the original image with the edges
    enhanced_image = cv2.addWeighted(image, 0.7, mask, 0.3, 0)  # Adjust weights for better visibility
    
    return enhanced_image
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [10]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply contrast enhancement
    enhanced_image = enhance_contrast(image)
    
    # Apply edge enhancement
    enhanced_image = enhance_edges(enhanced_image)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, enhanced_image)
    
def enhance_contrast(image):
    # Convert the image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # Split the LAB image into channels
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_l = clahe.apply(l)
    
    # Merge the enhanced L channel with the original A and B channels
    enhanced_lab = cv2.merge((enhanced_l, a, b))
    
    # Convert the LAB image back to BGR color space
    enhanced_image = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
    
    # Ensure that white areas remain white
    mask = cv2.threshold(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 240, 255, cv2.THRESH_BINARY)[1]
    enhanced_image[np.where(mask == 255)] = image[np.where(mask == 255)]
    
    return enhanced_image
    
def enhance_edges(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply edge enhancement using the Canny edge detector
    edges = cv2.Canny(gray, 100, 200)
    
    # Create a mask to retain only the edges
    mask = np.zeros_like(image)
    mask[edges != 0] = image[edges != 0]
    
    # Combine the original image with the edges
    enhanced_image = cv2.addWeighted(image, 0.7, mask, 0.3, 0)
    
    return enhanced_image
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Page 2 converted to C:/Users/Shreshtha/Downloads//page_2.png
Page 3 converted to C:/Users/Shreshtha/Downloads//page_3.png
Page 4 converted to C:/Users/Shreshtha/Downloads//page_4.png
Page 5 converted to C:/Users/Shreshtha/Downloads//page_5.png
Page 6 converted to C:/Users/Shreshtha/Downloads//page_6.png
Page 7 converted to C:/Users/Shreshtha/Downloads//page_7.png


In [None]:
import cv2
import numpy as np
import fitz

def convert_pdf_to_images(pdf_path, output_folder):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Iterate through each page
    for page_number in range(pdf_document.page_count):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Convert the page to a PIL image
        image = page.get_pixmap()
        
        # Save the image
        image_path = f"{output_folder}/page_{page_number + 1}.png"
        image.save(image_path)
        
        print(f"Page {page_number + 1} converted to {image_path}")
        
        # Preprocess the image
        preprocess_image(image_path)
        
        # Perform layout analysis
        analyze_layout(image_path)
        
def preprocess_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Apply contrast enhancement
    enhanced_image = enhance_contrast(image)
    
    # Apply edge enhancement
    enhanced_image = enhance_edges(enhanced_image)
    
    # Save the preprocessed image
    cv2.imwrite(image_path, enhanced_image)
    
def enhance_contrast(image):
    # Convert the image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # Split the LAB image into channels
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_l = clahe.apply(l)
    
    # Merge the enhanced L channel with the original A and B channels
    enhanced_lab = cv2.merge((enhanced_l, a, b))
    
    # Convert the LAB image back to BGR color space
    enhanced_image = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
    
    # Ensure that white areas remain white
    mask = cv2.threshold(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 240, 255, cv2.THRESH_BINARY)[1]
    enhanced_image[np.where(mask == 255)] = image[np.where(mask == 255)]
    
    return enhanced_image
    
def enhance_edges(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply edge enhancement using the Canny edge detector
    edges = cv2.Canny(gray, 100, 200)
    
    # Create a mask to retain only the edges
    mask = np.zeros_like(image)
    mask[edges != 0] = image[edges != 0]
    
    # Combine the original image with the edges
    enhanced_image = cv2.addWeighted(image, 0.7, mask, 0.3, 0)
    
    return enhanced_image
    
def analyze_layout(image_path):
    # Read the preprocessed image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding to binarize the image
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Iterate through contours
    for contour in contours:
        # Calculate contour area
        area = cv2.contourArea(contour)
        
        # If contour area is small, skip
        if area < 100:
            continue
        
        # Calculate contour perimeter
        perimeter = cv2.arcLength(contour, True)
        
        # Approximate polygonal curves
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
        
        # Get bounding rectangle
        x, y, w, h = cv2.boundingRect(approx)
        
        # Draw bounding rectangle (for visualization)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
        # Classify contour based on aspect ratio
        aspect_ratio = w / h
        
        if 0.5 < aspect_ratio < 20:
            print("Text detected at:", (x, y), "Size:", (w, h))
            # You can further process the text region here
        elif 0.1 < aspect_ratio < 5:
            print("Potential table or form detected at:", (x, y), "Size:", (w, h))
            # You can further process the table or form region here
    
    # Display the image with contours
    cv2.imshow("Layout Analysis", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
# Example usage
pdf_path ="C:/Users/Shreshtha/Labelled_MNS_Sample.pdf"
output_folder = "C:/Users/Shreshtha/Downloads/"
convert_pdf_to_images(pdf_path, output_folder)


Page 1 converted to C:/Users/Shreshtha/Downloads//page_1.png
Text detected at: (21, 21) Size: (891, 926)
