In [4]:
import os
os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS',r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\OCRProject.json")

'C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\OCRProject.json'

In [6]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm_notebook as tqdm
import os 
import torch
import torch.nn as nn
from torch import optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Function, Variable
from pathlib import Path
from itertools import groupby
import json
import glob
import pytesseract
import time
import math

In [7]:
if os.path.exists("TT-Images") == False:
    os.mkdir("TT-Images")

In [8]:
input_dir = r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\COCO\DIR\annotations"
train_img_dir = r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\COCO\DIR\annotations"

WIDTH = 512
HEIGHT = 256
category_num = 5 + 1

ratio = 8

epoch_num = 20
batch_size = 4

device = "cuda:0"

In [9]:
class double_conv(nn.Module):
    '''(conv => BN => ReLU) * 2'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv(x)
        return x


class inconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(inconv, self).__init__()
        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x):
        x = self.conv(x)
        return x


class down(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(down, self).__init__()
        self.mpconv = nn.Sequential(
            nn.MaxPool2d(2),
            double_conv(in_ch, out_ch)
        )

    def forward(self, x):
        x = self.mpconv(x)
        return x


class up(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()

        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffX = x1.size()[2] - x2.size()[2]
        diffY = x1.size()[3] - x2.size()[3]
        x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
                        diffY // 2, int(diffY / 2)))
        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class outconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(outconv, self).__init__()
        self.conv = nn.Conv2d(in_ch, out_ch, 1)

    def forward(self, x):
        x = self.conv(x)
        return x

    
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(UNet, self).__init__()
        self.inc = inconv(n_channels, 64)
        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        self.down4 = down(512, 512)
        self.up1 = up(1024, 256)
        self.up2 = up(512, 128)
        self.up3 = up(256, 64)
        self.up4 = up(128, 64)
        self.outc = outconv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

In [19]:
net = UNet(n_channels=3, n_classes=category_num).to(device)
net.load_state_dict(torch.load(r'models\newdata-50-4-0.01.pth'))

<All keys matched successfully>

In [20]:
def test_generator(location):
    img_names = glob.glob(f"{location}/*.jpg")
    img_names.extend(glob.glob(f"{location}/*.jpeg"))
    img_names.extend(glob.glob(f"{location}/*.png"))
    for img_name in img_names:
        img = cv2.imread(img_name)
        img1 = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
        # HWC -> CHW
        img2 = img1.transpose((2, 0, 1))
        yield img_name, np.asarray([img2], dtype=np.float32) / 255, img1, img

In [21]:
from pytesseract import Output

def get_all_ROIs(img):
    all_rois = []
    d = pytesseract.image_to_data(img, output_type=Output.DICT)
    n_boxes = len(d['text'])
    for i in range(n_boxes):
        if int(d['conf'][i]) > 60:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
            all_rois.append((x,y,x+w,y+h))
    return all_rois
    

In [22]:
def get_iou(bb1, bb2):

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [23]:
def reference_MICR_Codes(img):
    charNames = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0","T", "U", "A", "D"]

    ref = cv2.imread(args["reference"])
    ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY)
    ref = imutils.resize(ref, width=400)
    ref = cv2.threshold(ref, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    refCnts = imutils.grab_contours(refCnts)
    refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]
    
    refROIs = extract_digits_and_symbols(ref, refCnts,minW=10, minH=20)[0]
    chars = {}
    # loop over the reference ROIs
    for (name, roi) in zip(charNames, refROIs):
        roi = cv2.resize(roi, (36, 36)) 
        chars[name] = roi

In [24]:
from skimage.segmentation import clear_border
from imutils import contours
import numpy as np
import argparse
import imutils
import cv2

def extract_digits_and_symbols(image, charCnts, minW=5, minH=15):

	charIter = charCnts.__iter__()
	rois = []
	locs = []

	while True:
		try:

			c = next(charIter)
			(cX, cY, cW, cH) = cv2.boundingRect(c)
			roi = None

			if cW >= minW and cH >= minH:
				# extract the ROI
				roi = image[cY:cY + cH, cX:cX + cW]
				rois.append(roi)
				locs.append((cX, cY, cX + cW, cY + cH))
			else:

				parts = [c, next(charIter), next(charIter)]
				(sXA, sYA, sXB, sYB) = (np.inf, np.inf, -np.inf,
					-np.inf)
				# loop over the parts
				for p in parts:
					# compute the bounding box for the part, then
					# update our bookkeeping variables
					(pX, pY, pW, pH) = cv2.boundingRect(p)
					sXA = min(sXA, pX)
					sYA = min(sYA, pY)
					sXB = max(sXB, pX + pW)
					sYB = max(sYB, pY + pH)
				# extract the ROI
				roi = image[sYA:sYB, sXA:sXB]
				rois.append(roi)
				locs.append((sXA, sYA, sXB, sYB))
                
                		# we have reached the end of the iterator; gracefully break
		# from the loop
		except StopIteration:
			break
	# return a tuple of the ROIs and locations
	return (rois, locs)

charNames = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
	"T", "U", "A", "D"]

reference = 'micr_chars.png'

ref = cv2.imread(reference)
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY)
ref = imutils.resize(ref, width=400)
ref = cv2.threshold(ref, 0, 255, cv2.THRESH_BINARY_INV |
	cv2.THRESH_OTSU)[1]

# find contours in the MICR image (i.e,. the outlines of the
# characters) and sort them from left to right
refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
refCnts = imutils.grab_contours(refCnts)
refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]
# create a clone of the original image so we can draw on it
clone = np.dstack([ref.copy()] * 3)
# loop over the (sorted) contours
for c in refCnts:
	# compute the bounding box of the contour and draw it on our
	# image
	(x, y, w, h) = cv2.boundingRect(c)
	cv2.rectangle(clone, (x, y), (x + w, y + h), (0, 255, 0), 2)

# extract the digits and symbols from the list of contours, then
# initialize a dictionary to map the character name to the ROI
(refROIs, refLocs) = extract_digits_and_symbols(ref, refCnts,
	minW=10, minH=20)
chars = {}
# re-initialize the clone image so we can draw on it again
clone = np.dstack([ref.copy()] * 3)
# loop over the reference ROIs and locations
for (name, roi, loc) in zip(charNames, refROIs, refLocs):
	# draw a bounding box surrounding the character on the output
	# image
	(xA, yA, xB, yB) = loc
	cv2.rectangle(clone, (xA, yA), (xB, yB), (0, 255, 0), 2)
	# resize the ROI to a fixed size, then update the characters
	# dictionary, mapping the character name to the ROI
	roi = cv2.resize(roi, (36, 36)) 
	chars[name] = roi

In [37]:
def detect_text(path):
    """Detects text in the file."""
    from google.cloud import vision
    import io
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    print('Texts:')

    for text in texts:
        print('\n"{}"'.format(text.description))

        vertices = (['({},{})'.format(vertex.x, vertex.y)
                    for vertex in text.bounding_poly.vertices])

        print('bounds: {}'.format(','.join(vertices)))

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
    
    return texts
        
def get_google_text(img):
    
    cv2.imwrite(f"temp\\temp.jpg",img)
    text = detect_text(f"temp\\temp.jpg")
    
    return text

def get_text(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
    #gray = cv2.medianBlur(gray, 3)
    
    cv2.imwrite(f"temp\\temp.jpg",gray)
    
    config = ('-c tessedit_char_whitelist=/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-psm 6')
    text = pytesseract.image_to_string(Image.open(f"temp\\temp.jpg"), lang = 'eng',config=config)
    
    return text
    
def get_MICR_text(img):
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7))
    blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, rectKernel)
    gradX = cv2.Sobel(blackhat, ddepth=cv2.CV_32F, dx=1, dy=0,
	ksize=-1)
    gradX = np.absolute(gradX)
    (minVal, maxVal) = (np.min(gradX), np.max(gradX))
    gradX = (255 * ((gradX - minVal) / (maxVal - minVal)))
    gradX = gradX.astype("uint8")
    gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
    thresh = cv2.threshold(gradX, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    thresh = clear_border(thresh)
    groupLocs = get_contours(thresh)
    for (gX, gY, gW, gH) in groupLocs:
        groupOutput = []

        group = gray[gY - 5:gY + gH + 5, gX - 5:gX + gW + 5]
        group = cv2.threshold(group, 0, 255,
            cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
        cv2.imshow("Group", group)
        cv2.waitKey(0)

        charCnts = cv2.findContours(group.copy(), cv2.RETR_EXTERNAL,
            cv2.CHAIN_APPROX_SIMPLE)
        charCnts = imutils.grab_contours(charCnts)
        charCnts = contours.sort_contours(charCnts,
            method="left-to-right")[0]
        print ("extract_digits_and_symbols")
        (rois, locs) = extract_digits_and_symbols(group, charCnts)
        
    for roi in rois:

        scores = []
        roi = cv2.resize(roi, (36, 36))
        for charName in charNames:
            result = cv2.matchTemplate(roi, chars[charName],cv2.TM_CCOEFF)
            (_, score, _, _) = cv2.minMaxLoc(result)
            scores.append(score)

        groupOutput.append(charNames[np.argmax(scores)])
    
    cv2.rectangle(image, (gX - 10, gY + delta - 10),(gX + gW + 10, gY + gH + delta), (0, 0, 255), 2)
    cv2.putText(image, "".join(groupOutput),(gX - 10, gY + delta - 25), cv2.FONT_HERSHEY_SIMPLEX,0.95, (0, 0, 255), 3)

    output.append("".join(groupOutput))
    
    # display the output check OCR information to the screen
    print("Check OCR: {}".format(" ".join(output)))
    cv2.imshow("Check OCR", image)
    cv2.waitKey(0)

def get_contours(gray):
    groupCnts = cv2.findContours(gray.copy(), cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
    groupCnts = imutils.grab_contours(groupCnts)
    groupLocs = []
    for (i, c) in enumerate(groupCnts):
        (x, y, w, h) = cv2.boundingRect(c)
        if w > 5 and h > 2:
            groupLocs.append((x, y, w, h))
    groupLocs = sorted(groupLocs, key=lambda x:x[0])
    
    return groupLocs

def class_x_processing(img,mask_prob,class_id, resized_image, original_image):
    ori_x = original_image.shape[1]
    ori_y = original_image.shape[0]
    Sx = ori_x/abs(ori_x - HEIGHT)
    Sy = ori_y/abs(ori_y - WIDTH)
    
    mask = np.zeros((HEIGHT, WIDTH),np.float32)
    indices = np.where(mask_prob==class_id)
    mask[indices[0],indices[1]] = 255
    curr_time = time.time()
    cv2.imwrite(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\TT-Images\\{class_id}-mask-{curr_time}.jpg",mask)
    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7))

    gray = cv2.cvtColor(np.repeat(mask[:, :, np.newaxis],3,axis = 2), cv2.COLOR_BGR2GRAY).astype("uint8")
    
    groupLocs = get_contours(gray)
    
    for (gX, gY, gW, gH) in groupLocs:
        #img = img.transpose((1,2,0))
        #print (img.shape)
        scaled_gY = int(gY * Sx)
        scaled_gH = int(gH * Sx)
        scaled_gX = int(gX * Sy)
        scaled_gW = int(gW * Sy)
        
        print (scaled_gY-5,scaled_gY + scaled_gH+5)
        group = original_image[scaled_gY-5:scaled_gY + scaled_gH+5, scaled_gX-5:scaled_gX + scaled_gW+5]
        print (group.shape)

        group = resized_image[gY-5:gY + gH+5, gX-5:gX + gW+5]
        cv2.imwrite(f"test_temp\\{curr_time}.jpg",group)
        #rint (gY-5,gY + gH+5, gX-5,gX + gW+5)
        #print ((gY + gH+5) - (gY-5), (gX + gW+5) - (gX-5))
        if class_id != 4:
            continue#text = get_text(group)
        else:
            text = get_MICR_text(group)
        print (text)



In [38]:
def plot_all_possible_ROIs(img):
    d = pytesseract.image_to_data(img, output_type=Output.DICT)
    n_boxes = len(d['text'])
    for i in range(n_boxes):
        if int(d['conf'][i]) > 60:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
            img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cv2.imwrite(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\TT-Images\\ROI-{time.time()}.jpg",img)

In [39]:
import imutils,cv2
from skimage.segmentation import clear_border
from imutils import contours
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def predict(location):
    sub_list = []
    net.eval()
    count = 0
    for img_name, img, resized_image, original_image in test_generator(location):
        print (img_name)
        X = torch.tensor(img, dtype=torch.float32).to(device)
        mask_pred = net(X)
        mask_pred1 = mask_pred.cpu().detach().numpy()
        mask_prob = np.argmax(mask_pred1, axis=1)
        
        plot_all_possible_ROIs(original_image)
        get_text(original_image)
        #all_ROIs = get_all_ROIs(img)
        #print (all_ROIs)
        
        mask_prob = mask_prob.transpose((1, 2, 0))
        #print (original_image.shape)
        for i in range(5):
            class0 = class_x_processing(img,mask_prob,i, resized_image, original_image)
    

In [None]:
test_img_dir = r"single_inference"
predict(test_img_dir)

In [None]:

    def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)

    else:
        r = width / float(w)
        dim = (width, int(h * r))

    resized = cv2.resize(image, dim, interpolation = inter)

    return resized