In [2]:
import cv2
#For Image Manipulation

In [3]:
import numpy
#For Numerical Manipulation

In [4]:
import matplotlib.pyplot as plt
#For Visual Representation

In [5]:
def load_image(image_path, grayscale=True):
    """
    Load an image from disk.
    Args:
        image_path (str): path to image file.
        grayscale (bool): whether to load as grayscale.
    Returns:
        image (numpy.ndarray): loaded image array.
    """
    if grayscale:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    else:
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if img is None:
        raise FileNotFoundError(f"Could not load image at {image_path}")
    return img

In [6]:
def save_image(image, out_path):
    """
    Save the image to disk.
    """
    cv2.imwrite(out_path, image)

In [7]:
def display(im_path):
    """
    Displays the Image through image path
    """
    dpi = 80
    im_data = plt.imread(im_path)

    height, width  = im_data.shape[:2]
    
    # What size does the figure need to be in inches to fit the image?
    figsize = width / float(dpi), height / float(dpi)

    # Create a figure of the right size with one axes that takes up the full figure
    fig = plt.figure(figsize=figsize)
    ax = fig.add_axes([0, 0, 1, 1])

    # Hide spines, ticks, etc.
    ax.axis('off')

    # Display the image.
    ax.imshow(im_data, cmap='gray')

    plt.show()

In [8]:
def inverted_img(img):
    """
    Inverts the Pixel Value
    """
    inverted_image = cv2.bitwise_not(img)
    return inverted_image

In [9]:
def to_grayscale(img):
    """
    Converts Color Image to shades of Gray
    """
    grayscaled_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return grayscaled_img

In [10]:
def binarisation(img):
    """
    Converts Grayscaled_Image to Two colors, i.e., Black and White
    """
    thresh, im_bw = cv2.threshold(gray_image, 210, 230, cv2.THRESH_BINARY)
    return im_bw

In [11]:
def noise_removal(image):
    """
    Removes Noise(dots that interfere OCR) from Binarised Image
    """
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.erode(image, kernel, iterations=1)
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    image = cv2.medianBlur(image, 3)
    return image

Changing font width

In [12]:
def thin_font(image):
    """
    Makes the font thin
    """
    image = cv2.bitwise_not(image)
    kernel = np.ones((2,2),np.uint8)
    image = cv2.erode(image, kernel, iterations=1)
    image = cv2.bitwise_not(image)
    return image

def thick_font(image):
    """
    Makes the font thick
    """
    image = cv2.bitwise_not(image)
    kernel = np.ones((2,2),np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    image = cv2.bitwise_not(image)
    return image

In [13]:
def getSkewAngle(cvImage):
    """
    Helps to get skew angle which tells us how much the text is distorted from horizontal one
    """
    # Prep image, copy, convert to gray scale, blur, and threshold
    newImage = cvImage.copy()
    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9, 9), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Apply dilate to merge text into meaningful lines/paragraphs.
    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
    # But use smaller kernel on Y axis to separate between different blocks of text
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    dilate = cv2.dilate(thresh, kernel, iterations=2)

    # Find all contours
    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key = cv2.contourArea, reverse = True)
    for c in contours:
        rect = cv2.boundingRect(c)
        x,y,w,h = rect
        cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)

    # Find largest contour and surround in min area box
    largestContour = contours[0]
    print (len(contours))
    minAreaRect = cv2.minAreaRect(largestContour)
    cv2.imwrite("temp/boxes.jpg", newImage)
    # Determine the angle. Convert it to the value that was originally used to obtain skewed image
    angle = minAreaRect[-1]
    if angle < -45:
        angle = 90 + angle
    return -1.0 * angle
# Rotate the image around its center
def rotateImage(cvImage, angle: float):
    """
    To rotate whole image
    """
    newImage = cvImage.copy()
    (h, w) = newImage.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return newImage

In [14]:
def deskew(cvImage):
    """
    Deskew with the help of Skew Angle
    """
    angle = getSkewAngle(cvImage)
    return rotateImage(cvImage, -1.0 * angle)

In [16]:
def remove_borders(image):
    """
    Remove the borders
    """
    contours, heiarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cntsSorted = sorted(contours, key=lambda x:cv2.contourArea(x))
    cnt = cntsSorted[-1]
    x, y, w, h = cv2.boundingRect(cnt)
    crop = image[y:y+h, x:x+w]
    return crop

In [18]:
color = [255, 255, 255]
top, bottom, left, right = [150]*4

def give_borders(img):
    """
    Give borders if required
    """    
    image_with_border = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return image_with_border