In [36]:
from PIL import Image
import os
import cv2
import os
import numpy as np

In [40]:
# resize images to match the dimensions of the WA images as mentioned in the paper

# Define input and output folders
input_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/doc_images"  # path to original images
output_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/WA_images_rgb"  # path to WA processed images

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Target size specified in the paper
target_size = (780, 1040)

# Process each image in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):  # Check for valid image extensions
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        try:
            # Open the image
            with Image.open(input_path) as img:
                # Resize the image
                resized_img = img.resize(target_size)
                # Save the resized image to the output folder
                resized_img.save(output_path)
                print(f"Resized and saved: {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

print("Image resizing completed to match WA image dimensions!")


Resized and saved: doc1.jpeg
Resized and saved: doc2.jpeg
Resized and saved: doc3.jpeg
Image resizing completed to match WA image dimensions!


In [44]:
# Convert images to grayscale as mentioned in the paper
import cv2
import os

# Define the input and output folders
input_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/WA_images_rgb"  # Path to original images
output_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/WA_images_grayscale"  # Path to grayscale images

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Process each image in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):  # Check for valid image extensions
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        try:
            # Load the image using cv2
            img = cv2.imread(input_path)
            if img is None:
                print(f"Skipping invalid image: {filename}")
                continue

            # Convert the image to grayscale
            grayscale_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Save the grayscale image to the output folder
            cv2.imwrite(output_path, grayscale_img)
            print(f"Converted to grayscale and saved: {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

print("Image conversion to grayscale completed!")


Converted to grayscale and saved: doc1.jpeg
Converted to grayscale and saved: doc2.jpeg
Converted to grayscale and saved: doc3.jpeg
Image conversion to grayscale completed!


In [58]:
# Apply the guided filter to the grayscale images as mentioned in the paper


def guided_filter(image, radius=1, epsilon=1e-6):
    """
    Apply a guided filter to the input image.
    
    Args:
        image: Grayscale image (2D numpy array).
        radius: Radius of the local window (default: 1 for 3x3 neighborhood).
        epsilon: Regularization parameter to stabilize `a_k` (default: 1e-6).
    
    Returns:
        Grayscale Filtered image (guided filtered version).
    """
    # Convert to float for calculations
    image = image.astype(np.float32) / 255.0
    h,w = image.shape # height and width of the image

    # Create the ideal binary image using Otsu's threshold
    _, I_ideal = cv2.threshold((image * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    I_ideal = I_ideal.astype(np.float32) / 255.0

    # Define kernel for filtering
    kernel = np.ones((2 * radius + 1, 2 * radius + 1), dtype=np.float32)

    # Compute mean and variance in the local window
    mean_I = cv2.filter2D(image, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()
    mean_I_ideal = cv2.filter2D(I_ideal, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()

    mean_I_ideal_squared = cv2.filter2D(I_ideal * I_ideal, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()
    var_I_ideal = mean_I_ideal_squared - mean_I_ideal**2

    cov_I_Ideal = cv2.filter2D(image * I_ideal, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum() - mean_I * mean_I_ideal

    # Compute coefficients a_k and b_k
    a_k = cov_I_Ideal / (var_I_ideal + epsilon)
    b_k = mean_I - a_k * mean_I_ideal

    # Compute mean of a_k and b_k
    mean_a_k = np.zeros((h, w), dtype=np.float32)
    mean_b_k = np.zeros((h, w), dtype=np.float32)


    # this approach does not handle edge cases well
    # mean_a_k = cv2.filter2D(a_k, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()
    # mean_b_k = cv2.filter2D(b_k, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()

    # this approach handles edge cases better
    num_elements_w = [3] * w
    num_elements_w[0] = 1
    num_elements_w[1] = 2
    num_elements_w[w - 2] = 2
    num_elements_w[w - 1] = 1

    num_elements_h = [3] * h
    num_elements_h[0] = 1
    num_elements_h[1] = 2
    num_elements_h[h - 2] = 2
    num_elements_h[h - 1] = 1

    for i in range(h):
        for j in range(w):
            sum_a_k = 0.0  # Reset sum_a_k for each (i, j) computation
            sum_b_k = 0.0  # Reset sum_b_k for each (i, j) computation

            for _i in range(i - 2, i + 1):  # Iterate over _i range
                if 0 <= _i <= h - 3:  # Ensure _i is within valid bounds
                    for _j in range(j - 2, j + 1):  # Iterate over _j range
                        if 0 <= _j <= w - 3:  # Ensure _j is within valid bounds
                            sum_a_k += a_k[_i + 1][_j + 1]
                            sum_b_k += b_k[_i + 1][_j + 1]
                            
            mean_a_k[i][j] = sum_a_k / (num_elements_h[i] * num_elements_w[j])
            mean_b_k[i][j] = sum_b_k / (num_elements_h[i] * num_elements_w[j])

    # Compute final guided filtered image
    I_GF = mean_a_k * I_ideal + mean_b_k

    # Scale back to 8-bit range for saving
    return (I_GF * 255).astype(np.uint8)
    # return (I_ideal * 255).astype(np.uint8)


In [63]:
def process_images(input_folder, output_folder, radius=1, epsilon=1e-6):
    """
    Process all images in the input folder, apply guided filtering, and save to the output folder.

    Args:
        input_folder: Path to the folder containing input grayscale images.
        output_folder: Path to the folder where filtered images will be saved.
        radius: Radius of the local window for guided filtering.
        epsilon: Regularization parameter for guided filtering.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Process each image in the input folder
    for filename in os.listdir(input_folder):
        # Check for valid image files
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)

            # Load the image
            image = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
            # image = Image.open(input_path).convert('L')
            if image is None:
                print(f"Skipping invalid image: {filename}")
                continue

            # Apply guided filter
            filtered_image = guided_filter(np.array(image), radius=radius, epsilon=epsilon)


            # Save the filtered image
            cv2.imwrite(output_path, filtered_image)
            print(f"Processed and saved: {filename}")

In [64]:
input_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/data/train/doc1/original" 
output_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/data/train/doc1/guided_patch_ideal_corrected"
# input_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/WA_images_grayscale" 
# output_folder = "/Users/hemantpanchariya/Desktop/IIT_RPR/khanna_implementation/guided_images"

process_images(input_folder, output_folder, radius=1, epsilon=1e-6)

(18, 18)


ValueError: operands could not be broadcast together with shapes (5,5) (18,18) 

In [35]:
# just for testing purpose
# Has nothing to do with main code

import numpy as np
import cv2

# Create a 10x10 numpy image with values in whole numbers [0,5]
# image = np.random.randint(0, 6, size=(5, 5)).astype(np.float32) 
image = np.array([[5., 3., 1., 2., 4.],
                  [4., 2., 2., 1., 2.],
                  [4., 4., 2., 5., 1.],
                  [0., 4., 2., 0., 4.],
                  [3., 0., 4., 3., 5.]], dtype=np.float32)

ref_image = cv2.copyMakeBorder(image, 1,1,1,1,cv2.BORDER_REFLECT)
# print(ref_image)

h,w = image.shape


# Define radius for the filter
radius = 1  # For a 3x3 filter

# Define kernel for filtering
kernel = np.ones((2 * radius + 1, 2 * radius + 1), dtype=np.float32)

# Compute mean in the local window
mean_1 = cv2.filter2D(image, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()

mean_2 = cv2.filter2D(mean_1, -1, kernel, borderType=cv2.BORDER_REFLECT) / kernel.sum()
# mean_1 = cv2.filter2D(image, -1, kernel) / kernel.sum()


print(image.shape)
print(mean_1.shape)
print(mean_2.shape)
print("Original Image:")
print(image)

print("\nMean1:")
print(mean_1)

print("\nMean2:")
print(mean_2)

num_elements_w = [3] * w
num_elements_w[0] = 1
num_elements_w[1] = 2
num_elements_w[w - 2] = 2
num_elements_w[w - 1] = 1

num_elements_h = [3] * h
num_elements_h[0] = 1
num_elements_h[1] = 2
num_elements_h[h - 2] = 2
num_elements_h[h - 1] = 1

for i in range(h):
    for j in range(w):
        sum1 = 0  # Reset sum1 for each (i, j) computation

        for _i in range(i - 2, i + 1):  # Iterate over _i range
            if 0 <= _i <= h - 3:  # Ensure _i is within valid bounds
                for _j in range(j - 2, j + 1):  # Iterate over _j range
                    if 0 <= _j <= w - 3:  # Ensure _j is within valid bounds
                        sum1 += mean_1[_i + 1][_j + 1]
                        
        mean_2[i][j] = sum1 / (num_elements_h[i] * num_elements_w[j])



print("\nCorrected Mean2:")
print(mean_2) 



(5, 5)
(5, 5)
(5, 5)
Original Image:
[[5. 3. 1. 2. 4.]
 [4. 2. 2. 1. 2.]
 [4. 4. 2. 5. 1.]
 [0. 4. 2. 0. 4.]
 [3. 0. 4. 3. 5.]]

Mean1:
[[4.        2.8888888 1.8888888 2.1111112 2.7777777]
 [3.8888888 3.        2.4444444 2.2222223 2.4444444]
 [2.8888888 2.6666667 2.4444444 2.1111112 2.2222223]
 [2.4444444 2.5555556 2.6666667 2.8888888 3.1111112]
 [1.7777778 2.2222223 2.2222223 3.3333333 3.7777777]]

Mean2:
[[3.617284  2.9876544 2.382716  2.2962961 2.493827 ]
 [3.3456793 2.9012346 2.419753  2.2962964 2.3703701]
 [2.9629629 2.7777777 2.5555556 2.5061731 2.5308642]
 [2.4074075 2.4320989 2.5679014 2.7530866 2.9506176]
 [2.1111112 2.2345679 2.6296296 3.037037  3.4320989]]

Corrected Mean2:
[[3.        2.7222223 2.5555556 2.3333335 2.2222223]
 [2.8333335 2.638889  2.4814816 2.3055558 2.1666667]
 [2.7407408 2.6296299 2.5555556 2.462963  2.4074075]
 [2.6111112 2.5833335 2.5555556 2.5277777 2.5      ]
 [2.5555556 2.6111112 2.7037036 2.7777777 2.8888888]]
