In [1]:
!set GOOGLE_APPLICATION_CREDENTIALS=["C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\OCRProject.json"]

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
from torch import optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Function, Variable
from pathlib import Path
from itertools import groupby
import json
import glob
import pytesseract

In [3]:
input_dir = r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\COCO\DIR\annotations"
train_img_dir = r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\COCO\DIR\annotations"
test_img_dir = r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\COCO\DIR\annotations\val\single_inference"

WIDTH = 256
HEIGHT = 128
category_num = 5 + 1

ratio = 8

epoch_num = 20
batch_size = 4

device = "cuda:0"

In [4]:
class double_conv(nn.Module):
    '''(conv => BN => ReLU) * 2'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv(x)
        return x


class inconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(inconv, self).__init__()
        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x):
        x = self.conv(x)
        return x


class down(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(down, self).__init__()
        self.mpconv = nn.Sequential(
            nn.MaxPool2d(2),
            double_conv(in_ch, out_ch)
        )

    def forward(self, x):
        x = self.mpconv(x)
        return x


class up(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()

        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffX = x1.size()[2] - x2.size()[2]
        diffY = x1.size()[3] - x2.size()[3]
        x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
                        diffY // 2, int(diffY / 2)))
        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class outconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(outconv, self).__init__()
        self.conv = nn.Conv2d(in_ch, out_ch, 1)

    def forward(self, x):
        x = self.conv(x)
        return x

    
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(UNet, self).__init__()
        self.inc = inconv(n_channels, 64)
        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        self.down4 = down(512, 512)
        self.up1 = up(1024, 256)
        self.up2 = up(512, 128)
        self.up3 = up(256, 64)
        self.up4 = up(128, 64)
        self.outc = outconv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

In [5]:
net = UNet(n_channels=3, n_classes=category_num).to(device)
net.load_state_dict(torch.load(r'C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\models\baseline_newdata.pth'))

<All keys matched successfully>

In [6]:
def test_generator(location):
    img_names = glob.glob(f"{location}/*.jpg")
    img_names.extend(glob.glob(f"{location}/*.jpeg"))
    img_names.extend(glob.glob(f"{location}/*.png"))
    for img_name in img_names:
        img = cv2.imread(img_name)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
        # HWC -> CHW
        img = img.transpose((2, 0, 1))
        yield img_name, np.asarray([img], dtype=np.float32) / 255

In [7]:
from pytesseract import Output

def get_all_ROIs(img):
    all_rois = []
    d = pytesseract.image_to_data(img, output_type=Output.DICT)
    n_boxes = len(d['text'])
    for i in range(n_boxes):
        if int(d['conf'][i]) > 60:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
            all_rois.append((x,y,x+w,y+h))
    return all_rois
    

In [8]:
def get_iou(bb1, bb2):

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [9]:
def reference_MICR_Codes(img):
    charNames = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0","T", "U", "A", "D"]

    ref = cv2.imread(args["reference"])
    ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY)
    ref = imutils.resize(ref, width=400)
    ref = cv2.threshold(ref, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    refCnts = imutils.grab_contours(refCnts)
    refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]
    
    refROIs = extract_digits_and_symbols(ref, refCnts,minW=10, minH=20)[0]
    chars = {}
    # loop over the reference ROIs
    for (name, roi) in zip(charNames, refROIs):
        roi = cv2.resize(roi, (36, 36)) 
        chars[name] = roi

In [10]:
def detect_text(path):
    """Detects text in the file."""
    from google.cloud import vision
    import io
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    print('Texts:')

    for text in texts:
        print('\n"{}"'.format(text.description))

        vertices = (['({},{})'.format(vertex.x, vertex.y)
                    for vertex in text.bounding_poly.vertices])

        print('bounds: {}'.format(','.join(vertices)))

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
        
def get_google_text(img):
    
    cv2.imwrite(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\temp.jpg",img)
    text = detect_text(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\temp.jpg")
    
    return text

def get_text(img):
    
    cv2.imwrite(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\temp.jpg",img)
    config = ('-l eng --oem 1 --psm 3')
    text = pytesseract.image_to_string(Image.open(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\temp.jpg"),config=config)
    
    return text
    
def get_MICR_text(img):
    
    groupOutput = []
    
    charCnts = cv2.findContours(img.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    charCnts = imutils.grab_contours(charCnts)
    charCnts = contours.sort_contours(charCnts,method="left-to-right")[0]
    
    (rois, locs) = extract_digits_and_symbols(group, charCnts)

    for roi in rois:
        scores = []
        roi = cv2.resize(roi, (36, 36))
        
        for charName in charNames:
            result = cv2.matchTemplate(roi, chars[charName],cv2.TM_CCOEFF)
            (_, score, _, _) = cv2.minMaxLoc(result)
            scores.append(score)
        groupOutput.append(charNames[np.argmax(scores)])
    return "".join(groupOutput)

def get_contours(gray):
    groupCnts = cv2.findContours(gray.copy(), cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
    groupCnts = imutils.grab_contours(groupCnts)
    groupLocs = []

    for (i, c) in enumerate(groupCnts):
        (x, y, w, h) = cv2.boundingRect(c)
        if w > 15 and h > 15:
            groupLocs.append((x, y, w, h))
    groupLocs = sorted(groupLocs, key=lambda x:x[0])
    
    return groupLocs

def class_x_processing(img,mask_prob,class_id):
    mask = np.zeros((HEIGHT, WIDTH),np.float32)
    indices = np.where(mask_prob==class_id)
    mask[indices[0],indices[1]] = 255
    
    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7))

    gray = cv2.cvtColor(np.repeat(mask[:, :, np.newaxis],3,axis = 2), cv2.COLOR_BGR2GRAY).astype("uint8")
    
    groupLocs = get_contours(gray)
    
    cv2.imshow('img',mask)
    cv2.waitKey(0)
    for (gX, gY, gW, gH) in groupLocs:
        
        
        group = img[gY-5:gY + gH+5, gX-5:gX + gW+5]

#         if class_id != 4:
#             text = get_text(group)
#         else:
#             text = get_MICR_text(group)
#         print (text)
        cv2.imshow('img',group)
        cv2.waitKey(0)
        text = get_text(group)
        print (text)

In [11]:
import imutils,cv2
from skimage.segmentation import clear_border
from imutils import contours
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def predict(location):
    sub_list = []
    net.eval()
    count = 0
    for img_name, img in test_generator(location):
        X = torch.tensor(img, dtype=torch.float32).to(device)
        mask_pred = net(X)
        mask_pred1 = mask_pred.cpu().detach().numpy()
        mask_prob = np.argmax(mask_pred1, axis=1)

        img = cv2.imread(img_name)
        img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)

        all_ROIs = get_all_ROIs(img)

        mask_prob = mask_prob.transpose((1, 2, 0))

        for i in range(5):
            class0 = class_x_processing(img,mask_prob,i)
    

In [None]:
predict(test_img_dir)

In [24]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)

    else:
        r = width / float(w)
        dim = (width, int(h * r))

    resized = cv2.resize(image, dim, interpolation = inter)

    return resized

In [25]:
import imutils,cv2
from skimage.segmentation import clear_border
from imutils import contours
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

sub_list = []
net.eval()
count = 0
for img_name, img in test_generator(os.path.join(test_img_dir,"downloaded")):
    X = torch.tensor(img, dtype=torch.float32).to(device)
    mask_pred = net(X)
    mask_pred1 = mask_pred.cpu().detach().numpy()
    mask_prob = np.argmax(mask_pred1, axis=1)
    
    img = cv2.imread(img_name)
    img = cv2.resize(img, (WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
    
    all_ROIs = get_all_ROIs(img)
    
    mask_prob = mask_prob.transpose((1, 2, 0))
    print (np.unique(mask_prob))
    
    mask = np.zeros((HEIGHT, WIDTH),np.float32)
    indices = np.where(mask_prob!=5)
    mask[indices[0],indices[1]] = 255

    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7))

    gray = cv2.cvtColor(np.repeat(mask[:, :, np.newaxis],3,axis = 2), cv2.COLOR_BGR2GRAY).astype("uint8")
    
    
    groupCnts = cv2.findContours(gray.copy(), cv2.RETR_EXTERNAL,
	cv2.CHAIN_APPROX_SIMPLE)
    groupCnts = imutils.grab_contours(groupCnts)
    groupLocs = []

    for (i, c) in enumerate(groupCnts):
        (x, y, w, h) = cv2.boundingRect(c)
        # only accept the contour region as a grouping of characters if
        # the ROI is sufficiently large
        if w > 15 and h > 2:
            groupLocs.append((x, y, w, h))
        
#         if w > 15 and h > 15:
#             if len(all_ROIs)>0:
#                 for bbox in all_ROIs:
#                     val = get_iou((x, y, x+w, y+h), bbox)
#                     if val>0.1:
#                         groupLocs.append(bbox)
                        
    # sort the digit locations from left-to-right
    groupLocs = sorted(groupLocs, key=lambda x:x[0])
    
    
    for (gX, gY, gW, gH) in groupLocs:
        
        # initialize the group output of characters
        groupOutput = []
        # extract the group ROI of characters from the grayscale
        # image, then apply thresholding to segment the digits from
        # the background of the credit card
        group = img[gY-5:gY + gH+5, gX-5:gX + gW+5]
        group = image_resize(group,height=100)
        cv2.imwrite(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\{count}.jpg",group)
        
        config = ('-l eng --oem 1 --psm 3')
        text = pytesseract.image_to_string(Image.open(f"C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\{count}.jpg"),config=config)
        print (text,count)
        cv2.imshow('img',group)
        cv2.waitKey(0)
        count+=1

In [25]:
"""var chequeOCR = require('cheque-ocr'),
    fs = require('fs');

var image = fs.readFileSync("""+path""");
chequeOCR(image, function(err, result) {
  console.log(err, result);
});"""

In [79]:
import os

print('Credendtials from environ: {}'.format(os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')))

Credendtials from environ: C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\OCRProject.json


In [78]:
os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS',r"C:\Users\masan\Downloads\IDRBT Cheque Image Dataset\OCRProject.json")

'C:\\Users\\masan\\Downloads\\IDRBT Cheque Image Dataset\\OCRProject.json'