In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

# Load the image
img = cv2.imread("2.png")

# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)

# Apply Canny edge detection
edges = cv2.Canny(gray, 100, 200)

# Display the original image and the edge image
cv2.imshow("Original Image", img)
cv2.imshow("Edge Image", edges)

# Wait for a key press and close the windows
cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
import cv2
import numpy as np

def inpaint_text(img_path, remove_list, pipeline):
    # read image
    img = cv2.imread(img_path)
    # Prediction_groups is a list of (word, box) tuples
    prediction_groups = pipeline.recognize([img])
    #print image with annotation and boxes
    keras_ocr.tools.drawAnnotations(image=img, predictions=prediction_groups[0])

    for box in prediction_groups[0]:
        if box[0] in remove_list:
            x0, y0 = box[1][0]
            x1, y1 = box[1][1]
            x2, y2 = box[1][2]
            x3, y3 = box[1][3]
            x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
            x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
            thickness = int(math.sqrt((x2 - x1)**2 + (y2 - y1)**2))

            # create mask
            mask = np.zeros(img.shape[:2], dtype="uint8")
            cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)

            # inpaint the image
            img_inpainted = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)

    return img_inpainted

In [3]:
import skimage.feature

def find_edges(img_path):
    # load and display original image as grayscale
    image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    plt.imshow(image)

    # apply Canny edge detection
    edges = skimage.feature.canny(
        image=image,
        sigma=2,
        low_threshold=0.1,
        high_threshold=0.5
    )

    # display edges
    plt.imshow(edges, cmap='gray')
    plt.show()

    # count edges
    edge_count = np.sum(edges)
    print(f"Number of edges: {edge_count}")

ModuleNotFoundError: No module named 'skimage'

In [5]:
import keras_ocr

remove_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# define pipeline
pipeline = keras_ocr.pipeline.Pipeline()

# read image
img = cv2.imread('2.png')

# remove numbers from the image
img_inpainted = inpaint_text('2.png', remove_list, pipeline)

# save the image without numbers
cv2.imwrite('2_no_numbers.png', img_inpainted)

In [None]:
# find edges
find_edges('2_no_numbers.png')

In [8]:
import cv2
import keras_ocr
import numpy as np
from keras_ocr.tools import draw_box_on_image

# Define the list of numbers to remove
remove_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# Define the image path
image_path = 'image.png'

# Define the pipeline
pipeline = keras_ocr.pipeline.Pipeline()

# Recognize text in the image
prediction_groups = pipeline.recognize([image_path])[0]

# Create a blank image with the same size as the input image
height, width, _ = cv2.imread(image_path).shape
blank_image = np.zeros((height, width, 3), dtype=np.uint8)

# Loop through the recognized text and apply a mask on the input image
for group in prediction_groups:
    for box, text in group:
        if text in remove_list:
            x, y, w, h = box
            # Draw a red rectangle on the input image
            draw_box_on_image(image_path, box, color=(0, 0, 255))
            # Apply a mask on the blank image
            blank_image[y:y+h, x:x+w] = 255

# Inpaint the masked regions in the blank image
inpainted_image = cv2.inpaint(blank_image, np.array(cv2.bitwise_not(blank_image), dtype=np.uint8), 3, cv2.INPAINT_TELEA)

# Save the inpainted image
cv2.imwrite('inpainted_image.png', inpainted_image)

In [9]:
import cv2
import keras_ocr
import numpy as np
from keras_ocr.tools import read, drawAnnotations

# Define the list of numbers to remove
remove_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# Define the image path
image_path = '2.png'

# Define the pipeline
pipeline = keras_ocr.pipeline.Pipeline()

# Recognize text in the image
prediction_groups = pipeline.recognize([image_path])[0]

# Create a blank image with the same size as the input image
image = cv2.imread(image_path)
height, width, _ = image.shape
blank_image = np.zeros((height, width, 3), dtype=np.uint8)

# Loop through the recognized text and apply a mask on the input image
for group in prediction_groups:
    for box, text in group:
        if text in remove_list:
            x, y, w, h = box
            # Apply a mask on the blank image
            blank_image[y:y+h, x:x+w] = image[y:y+h, x:x+w]

# Inpaint the masked regions in the blank image
inpainted_image = cv2.inpaint(blank_image, np.zeros(blank_image.shape, dtype=np.uint8), 3, cv2.INPAINT_TELEA)

# Save the inpainted image
cv2.imwrite('2_no_numbers.png', inpainted_image)

In [11]:
# Another approach

In [12]:
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import math
import numpy as np

#General Approach.....
#Use keras OCR to detect text, define a mask around the text, and inpaint the
#masked regions to remove the text.
#To apply the mask we need to provide the coordinates of the starting and 
#the ending points of the line, and the thickness of the line

#The start point will be the mid-point between the top-left corner and 
#the bottom-left corner of the box. 
#the end point will be the mid-point between the top-right corner and the bottom-right corner.
#The following function does exactly that.
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
    return (x_mid, y_mid)

#Main function that detects text and inpaints. 
#Inputs are the image path and kreas_ocr pipeline
def inpaint_text(img_path, pipeline):
    # read the image 
    img = keras_ocr.tools.read(img_path) 
    
    # Recogize text (and corresponding regions)
    # Each list of predictions in prediction_groups is a list of
    # (word, box) tuples. 
    prediction_groups = pipeline.recognize([img])
    
    #Define the mask for inpainting
    mask = np.zeros(img.shape[:2], dtype="uint8")
    for box in prediction_groups[0]:
        x0, y0 = box[1][0]
        x1, y1 = box[1][1] 
        x2, y2 = box[1][2]
        x3, y3 = box[1][3] 
        
        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
        
        #For the line thickness, we will calculate the length of the line between 
        #the top-left corner and the bottom-left corner.
        thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
        #Define the line and inpaint
        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,    
        thickness)
        inpainted_img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(inpainted_img)

# keras-ocr will automatically download pretrained
# weights for the detector and recognizer.
pipeline = keras_ocr.pipeline.Pipeline()

img_text_removed = inpaint_text('2.png', pipeline)

plt.imshow(img_text_removed)

cv2.imwrite('2.png', cv2.cvtColor(img_text_removed, cv2.COLOR_BGR2RGB))

In [20]:
import cv2
import numpy as np

# Load the image
image = cv2.imread('2.png', 0)

# Thresholding to get a binary image
_, thresh = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create an all-white image
output = np.ones_like(image) * 255

for contour in contours:
    # Calculate the area of the contour to identify numbers and lines separately based on their size
    area = cv2.contourArea(contour)
    
    if area < 500:  # This threshold value may need adjustment depending on the specific image and text size.
        continue
    
    # Draw detected contours (lines) on the white background
    cv2.drawContours(output, [contour], -1, (0), 1)

cv2.imshow('Processed Image', output)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [15]:
cv2.imwrite('processed_image.png', output)

In [56]:
# Getting an output below

In [9]:
import cv2
import numpy as np

# Load the image
image = cv2.imread('3.png', 0)

# Thresholding to get a binary image
_, thresh = cv2.threshold(image, 20, 30, cv2.THRESH_BINARY_INV)
# 4.png:46-49, 3.png :20, 2.png:25

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


# # Perform erosion and dilation to remove noise and fill gaps
# kernel = np.ones((100,10), np.uint8)
# thresh = cv2.erode(thresh, kernel, iterations=50)
# thresh = cv2.dilate(thresh, kernel, iterations=50)


# Create an all-white image
output = np.ones_like(image) * 255

for contour in contours:
    # Calculate the area of the contour to identify numbers and lines separately based on their size
    area = cv2.contourArea(contour)
    
    if area < 30:  # This threshold value may need adjustment depending on the specific image and text size.
        continue
    
    # Draw detected contours (lines) on the white background
    cv2.drawContours(output, [contour], -1, (0), 1)

cv2.imshow('Processed Image', output)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [57]:
import cv2
import numpy as np

# Load the image
image = cv2.imread('3.png', cv2.IMREAD_GRAYSCALE)

# Threshold the image to create a binary mask
_, binary_mask = cv2.threshold(image, 10, 255, cv2.THRESH_BINARY_INV)

# #adaptive threshold
# adaptive_thresh = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 17, 7)

# Find contours in the binary mask
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create an all-white image
output = np.ones_like(image) * 255

# Draw the detected contours (lines) on the output image
for contour in contours:
    cv2.drawContours(output, [contour], -1, (0), 2)

# Display the output image
cv2.imshow('Extracted Lines', output)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [35]:
import matplotlib.pyplot as plt
import keras_ocr

# Load the Keras-OCR pipeline
pipeline = keras_ocr.pipeline.Pipeline()

# Read the image
image_path = '2.png'
image = keras_ocr.tools.read(image_path)

# Perform OCR on the image
result = pipeline.recognize([image])

# Extract the recognized text
recognized_text = result[0][0]

# Display the recognized text
print("Recognized Text:")
print(recognized_text)

# Display the image with bounding boxes around detected text
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(image)
keras_ocr.tools.drawAnnotations(image=image, predictions=result, ax=ax)
plt.show()


In [36]:
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import math
import numpy as np

#General Approach.....
#Use keras OCR to detect text, define a mask around the text, and inpaint the
#masked regions to remove the text.
#To apply the mask we need to provide the coordinates of the starting and 
#the ending points of the line, and the thickness of the line

#The start point will be the mid-point between the top-left corner and 
#the bottom-left corner of the box. 
#the end point will be the mid-point between the top-right corner and the bottom-right corner.
#The following function does exactly that.
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
    return (x_mid, y_mid)

#Main function that detects text and inpaints. 
#Inputs are the image path and kreas_ocr pipeline
def inpaint_text(img_path, pipeline):
    # read the image 
    img = keras_ocr.tools.read(img_path) 
    
    # Recogize text (and corresponding regions)
    # Each list of predictions in prediction_groups is a list of
    # (word, box) tuples. 
    prediction_groups = pipeline.recognize([img])
    
    #Define the mask for inpainting
    mask = np.zeros(img.shape[:2], dtype="uint8")
    for box in prediction_groups[0]:
        x0, y0 = box[1][0]
        x1, y1 = box[1][1] 
        x2, y2 = box[1][2]
        x3, y3 = box[1][3] 
        
        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
        
        #For the line thickness, we will calculate the length of the line between 
        #the top-left corner and the bottom-left corner.
        thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
        #Define the line and inpaint
        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,    
        thickness)
        inpainted_img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(inpainted_img)

# keras-ocr will automatically download pretrained
# weights for the detector and recognizer.
pipeline = keras_ocr.pipeline.Pipeline()

img_text_removed = inpaint_text('2.png', pipeline)

plt.imshow(img_text_removed)

cv2.imwrite('no_nums2.png', cv2.cvtColor(img_text_removed, cv2.COLOR_BGR2RGB))

###### codedebugger

import cv2
import numpy as np

# Read image
img = cv2.imread('2-transformed.png')

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Gaussian Blur
image = cv2.GaussianBlur(image, (5, 5), 0)

# Threshold the image
threshold_value = 25
thresh = cv2.threshold(gray, 5, 255, cv2.THRESH_BINARY_INV)[1]
#3.png : 23, 2.png : 155 - 160 , 

#removing noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# Invert the image
thresh = cv2.bitwise_not(thresh)

# Find contours of the white regions
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create a mask image with only the black regions
mask = np.zeros_like(gray)

for cnt in contours:
    area = cv2.contourArea(cnt)
    if area < 200: # Change this threshold value according to your image
        cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(img, img, mask=mask)

cv2.imshow('Extracted Lines', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Save the image
# cv2.imwrite('new_image2.png', result)


## Getting a Hollow Image but smooth Internal Lines

In [60]:
# Applying gaussian blur to improve clarity

import cv2
import numpy as np

# Load the image
image = cv2.imread('2.png', 0)

# Apply Gaussian blurring to reduce noise
image = cv2.GaussianBlur(image, (5, 5), 0)

# Applying Bilateral Filtering
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# gray = cv2.bilateralFilter(gray, 20, 15, 18)
# cv2.imshow('Post-Blur', gray)

# canny_image = cv2.Canny(gray, 23, 55)
# cv2.imshow('canny-image', canny_image)

# Apply adaptive thresholding to get a binary image
_, thresh = cv2.threshold(image, 142, 255, cv2.THRESH_BINARY_INV)
#3.png : 75, 2.png : 142-150

# Apply morphological operations to remove noise and fill in gaps
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create an all-white image
output = np.ones_like(image) * 255

for contour in contours:
    # Calculate the area of the contour to identify numbers and lines separately based on their size
    area = cv2.contourArea(contour)
    
    if area < 255:  # This threshold value may need adjustment depending on the specific image and text size.
        continue
    
    # Draw detected contours (lines) on the white background
    cv2.drawContours(output, [contour], -1, (0), 1)

# Display the processed image
cv2.imshow('Processed Image', output)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [623]:
# Output

In [55]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

# Load the image
image = cv2.imread('3.png', 0)

# Apply Gaussian blurring to reduce noise
image = cv2.GaussianBlur(image, (5, 5), 0)

# Apply adaptive thresholding to get a binary image
_, thresh = cv2.threshold(image, 50 , 255, cv2.THRESH_BINARY_INV)

# Apply morphological operations to remove noise and fill in gaps
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create an all-white image
output = np.ones_like(image) * 255

for contour in contours:
    # Calculate the area of the contour to identify numbers and lines separately based on their size
    area = cv2.contourArea(contour)
    
    if area < 255:  # This threshold value may need adjustment depending on the specific image and text size.
        continue
    
    # Draw detected contours (lines) on the white background
    cv2.drawContours(output, [contour], -1, 0, 1)

# Display the processed image
cv2.imshow('Processed Image', output)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [54]:
import cv2 
import numpy as np

# Read image
img = cv2.imread('3.png')

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Gaussian Blur
image = cv2.GaussianBlur(image, (9, 7), 0)

# Threshold the image
threshold_value = 25 
thresh = cv2.threshold(gray, 18, 255, cv2.THRESH_BINARY_INV)[1] 
#3.png : 23, 2.png : 155 - 160 , 

#removing noise
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 2)) 
# thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

#Invert the image
thresh = cv2.bitwise_not(thresh)

#Find contours of the white regions
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#Create a mask image with only the black regions
mask = np.zeros_like(gray)

for cnt in contours: 
        area = cv2.contourArea(cnt) 
        if area < 200: # Change this threshold value according to your image 
            cv2.drawContours(mask, [cnt], -1, 0, -1)

#Invert the mask
mask = cv2.bitwise_not(mask)

#Apply the mask to the original image
result = cv2.bitwise_and(img, img, mask=mask)

cv2.imshow('Extracted Lines', thresh) 
cv2.waitKey(0) 
cv2.destroyAllWindows()

#Save the image
# cv2.imwrite('new_image2.png', result)

In [53]:
import cv2 
import numpy as np

#Read image
img = cv2.imread('3.png')
cv2.imshow('Original', img)

#Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

#Gaussian Blur
image = cv2.GaussianBlur(img, (5, 5), 0)

#Threshold the image
threshold_value = 25
thresh = cv2.threshold(gray, 23, 255, cv2.THRESH_BINARY_INV)[1] 
#3.png : 23, 2.png : 155 - 160 ,

#Find contours of the white regions
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cv2.imshow('Contours', contours)

#removing noise 
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cv2.imshow('NoNoise', thresh)

#Invert the image
thresh = cv2.bitwise_not(thresh)

#Create a mask image with only the black regions
mask = np.zeros_like(gray)

for cnt in contours: 
    area = cv2.contourArea(cnt) 
    if area < 200: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

#Invert the mask
mask = cv2.bitwise_not(mask)

#Apply the mask to the original image
result = cv2.bitwise_and(img, img, mask=mask)

cv2.imshow('Extracted Lines', thresh) 
cv2.waitKey(0) 
cv2.destroyAllWindows()

# Save the image
# cv2.imwrite('new_image2.png', result)

In [52]:
#Applying Canny Edge detection

import cv2 
import numpy as np

#Read image
img = cv2.imread('2-transformed.png')

#Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

#Gaussian Blur
image = cv2.GaussianBlur(gray, (5, 5), 0)

# Apply Canny edge detection
canny_image = cv2.Canny(gray, 100, 100)

#Find contours of the white regions
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#Threshold the image
threshold_value = 30
thresh = cv2.threshold(gray, 23, 255, cv2.THRESH_BINARY_INV)[1] 
#3.png : 23, 2.png : 155 - 160 ,

#removing noise 
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

#Invert the image
thresh = cv2.bitwise_not(thresh)

#Create a mask image with only the black regions
mask = np.zeros_like(gray)

for cnt in contours: 
    area = cv2.contourArea(cnt) 
    if area < 500: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

#Invert the mask
mask = cv2.bitwise_not(mask)

#Apply the mask to the original image
result = cv2.bitwise_and(img, img, mask=mask)

cv2.imshow('Extracted Lines', thresh) 
cv2.waitKey(0) 
cv2.destroyAllWindows()

# Save the image
# cv2.imwrite('new_image2.png', result)

In [None]:
# The code below finds the contour area and since we have set a threshold value of contour anything
# with an area less than thresh value is potentially a number, parts with larger areas are drawn onto the white image (output)

In [51]:
import cv2 
import numpy as np

# Read image
img = cv2.imread('3.png')
cv2.imshow('Original', img)

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Canny edge detection
canny_image = cv2.Canny(gray, 100, 100)
cv2.imshow('canny-image', canny_image)

# Find contours of the white regions
contours, hierarchy = cv2.findContours(canny_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Gaussian Blur
gray = cv2.GaussianBlur(gray, (5, 5), 0)
cv2.imshow('Post-Blur', gray)

# Threshold the image
threshold_value = 50
_, thresh = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)

# Remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cv2.imshow('Smooth', thresh)

# Invert the image
thresh = cv2.bitwise_not(thresh)

# Create a mask image with only the black regions
mask = np.zeros_like(thresh)

for cnt in contours: 
    area = cv2.contourArea(cnt) 
    if area < 60: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(thresh, thresh, mask=mask)

# Display the extracted lines
cv2.imshow('Extracted Lines', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Save the image
# cv2.imwrite('new_image2.png', result)

In [1]:
import cv2 
import numpy as np
import matplotlib.pyplot as plt

# Read image
img = cv2.imread('3.png')

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Canny edge detection
canny_image = cv2.Canny(gray, 100, 100)

# Find contours of the white regions
contours, hierarchy = cv2.findContours(canny_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

cv2.imshow('contours', contours)

# Gaussian Blur
gray = cv2.GaussianBlur(gray, (5, 5), 0)

# Threshold the image
threshold_value = 50
_, thresh = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)

# Remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)



# Invert the image
thresh = cv2.bitwise_not(thresh)

# Create a mask image with only the black regions
mask = np.zeros_like(thresh)

for cnt in contours: 
    area = cv2.contourArea(cnt) 
    if area < 60: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(thresh, thresh, mask=mask)

# Display the extracted lines
cv2.imshow('Extracted Lines', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# # Save the image
# cv2.imwrite('new_image2.png', result)

## No Numbers but jagged internal Lines

In [50]:
import cv2 
import numpy as np

# Read image
img = cv2.imread('3.png')
cv2.imshow('Original', img)

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Canny edge detection
canny_image = cv2.Canny(gray, 23, 55)
cv2.imshow('canny-image', canny_image)

# Find contours of the white regions
contours, hierarchy = cv2.findContours(canny_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Gaussian Blur
#gray = cv2.GaussianBlur(gray, (3, 3), 0)

# Getting Better results with bilateral filtering
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 20, 15, 18)
cv2.imshow('Post-Blur', gray)

# Threshold the image
threshold_value = 20
_, thresh = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)

# Remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cv2.imshow('Smooth', thresh)

# Invert the image
thresh = cv2.bitwise_not(thresh)

# Create a mask image with only the black regions
mask = np.zeros_like(thresh)

for cnt in contours: 
    area = cv2.contourArea(cnt) 
    if area < 0: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(thresh, thresh, mask=mask)

# Display the extracted lines
cv2.imshow('Extracted Lines', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# # Save the image
# cv2.imwrite('new_image2.png', result)

In [30]:
import cv2
import numpy as np

# Read image
img = cv2.imread('2.png')
cv2.imshow('Original', img)

# Preprocessing
# 1. Convert to grayscale and apply bilateral filtering (improves edge preservation)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)  # Adjust parameters as needed

# 2. Adaptive thresholding (better handles uneven lighting)
thresh_value = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2)

# Morphological operations for noise removal
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
thresh = cv2.morphologyEx(thresh_value, cv2.MORPH_OPEN, kernel)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# Invert the image (foreground becomes white)
thresh = cv2.bitwise_not(thresh)
cv2.imshow('thresh', thresh)

# Find contours of potential number regions
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Improved number detection logic
def is_number_contour(cnt):
  # Filter based on area, aspect ratio, and solidity (convexity)
  area = cv2.contourArea(cnt)
  x, y, w, h = cv2.boundingRect(cnt)
  aspect_ratio = float(w)/h
  solidity = cv2.contourArea(cnt) / cv2.contourArea(cv2.convexHull(cnt))
  return 10 < area < 30 and 0.2 < aspect_ratio < 1.0 and solidity > 0.7

# Create a mask image with only black regions (potential numbers)
mask = np.zeros_like(thresh)
for cnt in contours:
  if is_number_contour(cnt):
    cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(img, img, mask=mask)

# Display results
cv2.imshow('Extracted Lines', result)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [37]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

# Read original image
img = cv2.imread("3.png")

# Preprocessing to detect the numbers better
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 17, 15, 18)

# Canny Edge Detection
canny_image = cv2.Canny(gray, 30, 20)

#Contours
contours, hierarchy = cv2.findContours(canny_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.imshow('Contours', contours)

filtered_contours = [cnt for cnt in contours if cv2.arcLength(cnt, True) > 100]

# Adaptive Thresholding
binary_image_adaptive = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)


# Create a mask to remove the numbers
mask = np.zeros_like(gray)
for cnt in contours:
    cv2.drawContours(mask, [cnt], -1, (255), thickness=cv2.FILLED)

#Applying Mask
result_without_numbers = cv2.bitwise_and(img, img, mask=~mask)

# Convert the result without numbers to grayscale
gray_result_without_numbers = cv2.cvtColor(result_without_numbers, cv2.COLOR_BGR2GRAY)

#plotting outputs
plt.figure(figsize=(12, 6))

# Plot the original image
plt.subplot(1, 3, 1)
plt.imshow(img)
plt.title('Original Image')

# Plot the grayscale image
plt.subplot(1, 3, 2)
plt.imshow(canny_image, cmap='gray')
plt.title('Canny Image')

#Ploting Canny Image
plt.subplot(1,3,3)
plt.imshow(binary_image_adaptive, cmap='gray')
plt.title('Adaptive Binary')

# Adjust spacing between subplots
plt.subplots_adjust(wspace=0.2)

# Show the plots
plt.show()




In [41]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

# Read original image
img = cv2.imread("3.png")

# Preprocessing for better number detection
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 17, 15, 18)

# Adaptive Thresholding
binary_image_adaptive = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 3)

# Find contours of the numbers
contours, hierarchies = cv2.findContours(binary_image_adaptive, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Create a mask to remove the numbers
mask = np.zeros_like(gray)
for cnt in contours:
    cv2.drawContours(mask, [cnt], -1, (255), thickness=cv2.FILLED)

# Apply the mask to remove the numbers
result_without_numbers = cv2.bitwise_and(img, img, mask=~mask)

# Convert the result without numbers to grayscale
gray_result_without_numbers = cv2.cvtColor(result_without_numbers, cv2.COLOR_BGR2GRAY)

# Find contours of the lines
contours_lines, _ = cv2.findContours(gray_result_without_numbers, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw the contours of the lines on a blank image
lines_image = np.zeros_like(gray)
cv2.drawContours(lines_image, contours_lines, -1, 255, thickness=1)

# Plotting outputs
plt.figure(figsize=(12, 6))

# Plot the original image
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('Original Image')

# Plot the Adaptive Binary image
plt.subplot(1, 3, 2)
plt.imshow(binary_image_adaptive, cmap='gray')
plt.title('Adaptive Binary Image')

# Plot the image without numbers
plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(gray, cv2.COLOR_BGR2RGB))
plt.title('Gray')

# Show the plots
plt.show()


In [55]:
import cv2
import numpy as np
import os

# Read original image
img = cv2.imread("3.png")

# Preprocessing for better number detection
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 17, 15, 20)

# Adaptive Thresholding
binary_image_adaptive = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)

# Find contours of the numbers
contours, _ = cv2.findContours(binary_image_adaptive, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Create a mask to remove the numbers
mask = np.zeros_like(gray)
for cnt in contours:
    cv2.drawContours(mask, [cnt], -1, (255), thickness=cv2.FILLED)

# Apply the mask to remove the numbers
result_without_numbers = cv2.bitwise_and(img, img, mask=~mask)

# Convert the result without numbers to grayscale
gray_result_without_numbers = cv2.cvtColor(result_without_numbers, cv2.COLOR_BGR2GRAY)

# Find contours of the lines
contours_lines, _ = cv2.findContours(gray_result_without_numbers, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw the contours of the lines on a blank image
lines_image = np.zeros_like(gray)
cv2.drawContours(lines_image, contours_lines, -1, (255), thickness=2)

# Invert the image (foreground becomes white)
lines_image = cv2.bitwise_not(result_without_numbers)
cv2.imshow('thresh', lines_image)

# Display the outputs
cv2.imshow("Original Image", img)
cv2.imshow("Adaptive Binary Image", binary_image_adaptive)
cv2.imshow("Image without Numbers", lines_image)

# Wait for user input
cv2.waitKey(0)

# Close all windows
cv2.destroyAllWindows()

In [4]:
import cv2 
import numpy as np

# Read image
img = cv2.imread('3.png')
cv2.imshow('Original', img)

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 17, 15, 10)

# Apply Canny edge detection
canny_image = cv2.Canny(gray, 20, 20)
cv2.imshow('canny-image', canny_image)

# Find contours of the white regions
contours, hierarchy = cv2.findContours(canny_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

# Gaussian Blur
gray = cv2.GaussianBlur(gray, (3, 3), 0)
cv2.imshow('Post-Blur', gray)

# Threshold the image
threshold_value = 120
_, thresh = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)


# Remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 2)) 
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cv2.imshow('Smooth', thresh)

# Invert the image
thresh = cv2.bitwise_not(thresh)

# Create a mask image with only the black regions
mask = np.zeros_like(thresh)

for cnt in contours: 
    perimeter = cv2.arcLength(cnt, True)
    if perimeter > 100: # Change this threshold value according to your image 
        cv2.drawContours(mask, [cnt], -1, 0, -1)

# Invert the mask
mask = cv2.bitwise_not(mask)

# Apply the mask to the original image
result = cv2.bitwise_and(thresh, thresh, mask=mask)

# Display the extracted lines
cv2.imshow('Extracted Lines', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# # Save the image
# cv2.imwrite('new_image2.png', result)

In [6]:
# Reattempting fitz

In [7]:
import fitz
from PIL import Image

In [13]:
doc = fitz.open("3.pdf")
page = doc[0]
paths = page.get_drawings()  # extract existing drawings

In [15]:
# define some output page with the same dimensions
outpdf = fitz.open()
outpage = outpdf.new_page(width=page.rect.width, height=page.rect.height)
shape = outpage.new_shape()  # make a drawing canvas for the output page

In [16]:
# --------------------------------------
# loop through the paths and draw them
# --------------------------------------
for path in paths:
    # ------------------------------------
    # draw each entry of the 'items' list
    # ------------------------------------
    for item in path["items"]:  # these are the draw commands
        if item[0] == "l":  # line
            shape.draw_line(item[1], item[2])
        elif item[0] == "re":  # rectangle
            shape.draw_rect(item[1])
        elif item[0] == "qu":  # quad
            shape.draw_quad(item[1])
        elif item[0] == "c":  # curve
            shape.draw_bezier(item[1], item[2], item[3], item[4])
        else:
            raise ValueError("unhandled drawing", item)
    # ------------------------------------------------------
    # all items are drawn, now apply the common properties
    # to finish the path
    # ------------------------------------------------------
    shape.finish()

In [19]:
# all paths processed - commit the shape to its page
shape.commit()
outpdf.save("nums_removed.pdf")

In [21]:
import PIL

In [22]:
doc = fitz.open('nums_removed.pdf')

In [23]:
page = doc.load_page(0)

In [24]:
pix = page.get_pixmap()

In [25]:
img = pix.save('no_nums.jpg')