In [1]:
# Required Libraries

import os
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import easyocr
import cv2 as cv
import torch 
import torch.nn as nn
import torchvision
from torch.nn import init
from torch.optim import lr_scheduler
import functools
import torchvision.transforms as transforms
from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from math import sqrt
import time

  from .collection import imread_collection_wrapper


In [2]:
reader = easyocr.Reader(['en'],  gpu=False)

Using CPU. Note: This module is much faster with a GPU.


In [3]:
# def get_model_instance_segmentation(num_keypoints, weights_path=None):
       
#     model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False,
#                                                                    pretrained_backbone=True,
#                                                                    num_keypoints=num_keypoints,
#                                                                    num_classes = 6) 
                                              

#     if weights_path:
#         state_dict = torch.load(weights_path)
#         model.load_state_dict(state_dict)        
        
#     return model

def get_model_instance_segmentation(num_classes):
    
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [5]:
model = get_model_instance_segmentation(6)
model.load_state_dict(torch.load('ocr_mask_rcnn.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [6]:
def order_points(pts):
    
    rect = np.zeros((4, 2), dtype = "float32")
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

In [7]:
def four_point_transform(image, pts):

    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")

    M = cv.getPerspectiveTransform(rect, dst)
    warped = cv.warpPerspective(image, M, (maxWidth, maxHeight))
    
    return warped

In [8]:
def Alignment(bboxes, keypoints, image):
    
    bbox = [int(bbox) for bbox in bboxes]
    bbox = [0 if i < 0 else i for i in bbox]
    segment_list = [int(segment_list) for segment_list in keypoints]
    segment_list = [0 if i < 0 else i for i in segment_list]
    x_segement = segment_list[0::2]
    y_segement = segment_list[1::2]
    segemnts_2d = np.array(list(zip(x_segement, y_segement)))
    pts = segemnts_2d
    
    warped = four_point_transform(image, pts)
        
    return warped

In [9]:
def __make_power_2(img, base, method=Image.BICUBIC):
    
    ow, oh = img.size
    h = int(round(oh / base) * base)
    w = int(round(ow / base) * base)
    
    if h == oh and w == ow:
        return img
    
    return img.resize((w, h), method)

In [10]:
def get_transform(method):
    
    transform_list = []
    transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base=4, method=method)))

    transform_list += [transforms.ToTensor()]
    transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
            
    return transforms.Compose(transform_list)

In [11]:
def init_weights(net, init_type='normal', init_gain=0.02):
    
    def init_func(m): 
        
        classname = m.__class__.__name__
        
        if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
           
            init.normal_(m.weight.data, 0.0, init_gain)
                
            if hasattr(m, 'bias') and m.bias is not None:
                
                init.constant_(m.bias.data, 0.0)
                
        elif classname.find('BatchNorm2d') != -1: 
            
            init.normal_(m.weight.data, 1.0, init_gain)
            init.constant_(m.bias.data, 0.0)

    net.apply(init_func) 
    
    return net

In [12]:
class ResnetBlock(nn.Module):

    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):

        super(ResnetBlock, self).__init__()
        self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)

    def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
       
        conv_block = []
        p = 0
        
        conv_block += [nn.ReflectionPad2d(1)]

        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)]
        
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]

        p = 0
       
        conv_block += [nn.ReflectionPad2d(1)]

        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)]

        return nn.Sequential(*conv_block)
    
    def forward(self, x):

        out = x + self.conv_block(x)  # add skip connections
        
        return out

In [13]:
class ResnetGenerator(nn.Module):

    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=True, n_blocks=9, padding_type='reflect'):

        assert(n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        
        use_bias=False

        model = [nn.ReflectionPad2d(3),
                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]

        n_downsampling = 2
        for i in range(n_downsampling):  # add downsampling layers
            mult = 2 ** i
            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]

        mult = 2 ** n_downsampling
        for i in range(n_blocks):       # add ResNet blocks

            model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]

        for i in range(n_downsampling):  # add upsampling layers
            mult = 2 ** (n_downsampling - i)
            model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]
        model += [nn.ReflectionPad2d(3)]
        model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)

    def forward(self, input):

        return self.model(input)

In [14]:
class TestModel:

    def __init__(self):

        self.netG = ResnetGenerator(input_nc=3, output_nc=3, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=True, n_blocks=9, padding_type='reflect')
        init_weights(self.netG, 'normal', 0.02)
        
    def set_input(self, input):
        
        self.transform_real = get_transform(Image.BICUBIC)
        self.real = self.transform_real(input)
        self.real = self.real.to(torch.device('cpu'))
        self.real = torch.unsqueeze(self.real, dim=0)

    def forward(self):
  
        self.fake = self.netG(self.real)
        
        return self.fake

In [15]:
pix2pix = TestModel()

In [16]:
pix2pix.netG.load_state_dict(torch.load('latest_net_G.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [17]:
model.eval()

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [18]:
pix2pix.netG.eval()

ResnetGenerator(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), bias=False)
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): ResnetBlock(
      (conv_block): Sequential(
        (0): ReflectionPad2d((1, 1, 1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), bias=False)
        (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): ReLU(inplace=True)
        (4): Dropout

In [19]:
# directory = 'C:/Users/muhammad.ispahani/Desktop/Data Science Project/CNIC OCR/Segmentation/All Labeled/images/sample2.jpg'

# final_img = cv.imread(directory)
# trans = transforms.Compose([transforms.ToTensor()])
# image = Image.open(directory).convert('RGB') 
# img = trans(image)

# Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())

In [20]:
# start_time_key = time.time()

# with torch.no_grad():
#     prediction = model([img.to(torch.device('cpu'))])
    
# keypoint_time = time.time() - start_time_key

In [21]:
# prediction

In [22]:
# cats = ['CNIC Front','CNIC Back','Not CNIC or SNIC','SNIC Front ','SNIC Back']
# category =  cats[int(prediction[0]['labels'][0])-1]

In [23]:
# category

In [24]:
# prob = round (int(prediction[0]['scores'][0]* 100),2)    
# print(category,str(prob)+"%")

In [25]:
# plt.plot(prediction[0]['boxes'][0][0], prediction[0]['boxes'][0][1], marker='v', color="red")
# plt.plot(prediction[0]['boxes'][0][2], prediction[0]['boxes'][0][3], marker='v', color="blue")
# plt.plot(prediction[0]['keypoints'][0][0][0], prediction[0]['keypoints'][0][0][1], marker='v', color="yellow")
# plt.plot(prediction[0]['keypoints'][0][1][0], prediction[0]['keypoints'][0][1][1], marker='v', color="purple")
# plt.plot(prediction[0]['keypoints'][0][2][0], prediction[0]['keypoints'][0][2][1], marker='v', color="pink")
# plt.plot(prediction[0]['keypoints'][0][3][0], prediction[0]['keypoints'][0][3][1], marker='v', color="black")
# plt.imshow(Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy()))
# plt.show()

In [26]:
# keypoints = [prediction[0]['keypoints'][0][0][0].numpy(), prediction[0]['keypoints'][0][0][1].numpy(), prediction[0]['keypoints'][0][1][0].numpy(), prediction[0]['keypoints'][0][1][1].numpy(), prediction[0]['keypoints'][0][2][0].numpy(), prediction[0]['keypoints'][0][2][1].numpy(), prediction[0]['keypoints'][0][3][0].numpy(), prediction[0]['keypoints'][0][3][1].numpy()]
# bboxes = prediction[0]['boxes'][0].numpy()

# start_time_align = time.time()
# warped = Alignment(bboxes, keypoints, final_img)
# alignment_time = time.time() - start_time_align

In [27]:
# plt.imshow(warped)

In [28]:
# # Opencv to PIL conversion for GAN model 

# color_coverted = cv.cvtColor(warped, cv.COLOR_BGR2RGB)
# pil_image = Image.fromarray(color_coverted).convert('RGB')

In [29]:
# plt.imshow(pil_image)

In [30]:
# pix2pix.set_input(pil_image)

In [31]:
# start_time_gan = time.time()

# with torch.no_grad():
#     pred = pix2pix.forward() 
    
# gan_time = time.time() - start_time_gan

In [32]:
# image_tensor = pred.data
# image_numpy = image_tensor[0].cpu().float().numpy()
# image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
# image_numpy = image_numpy.astype(np.uint8)
# image_pil = Image.fromarray(image_numpy)

In [33]:
# image_pil

In [34]:
# start_time_ocr = time.time()

# result = reader.readtext(image_numpy, detail=0, adjust_contrast=0.7, rotation_info=[90,180,270])

# ocr_time = time.time() - start_time_ocr

In [35]:
# result

In [36]:
# print('Total OCR Time End-to-End: ', keypoint_time + alignment_time + gan_time + ocr_time)

In [37]:
directory = "C:/Users/muhammad.ispahani/Desktop/Test Images/Localization"
    
for index, images in enumerate(os.listdir(directory)):
    
    final_img = cv.imread(os.path.join(directory, images))
    trans = transforms.Compose([transforms.ToTensor()])
    image = Image.open(os.path.join(directory, images)).convert('RGB') 
    img = trans(image)
    
    start_time_key = time.time()

    with torch.no_grad():
        prediction = model([img.to(torch.device('cpu'))])

    keypoint_time = time.time() - start_time_key
    
#     keypoints = [prediction[0]['keypoints'][0][0][0].numpy(), prediction[0]['keypoints'][0][0][1].numpy(), prediction[0]['keypoints'][0][1][0].numpy(), prediction[0]['keypoints'][0][1][1].numpy(), prediction[0]['keypoints'][0][2][0].numpy(), prediction[0]['keypoints'][0][2][1].numpy(), prediction[0]['keypoints'][0][3][0].numpy(), prediction[0]['keypoints'][0][3][1].numpy()]
    bboxes = prediction[0]['boxes'][0].numpy()

#     start_time_align = time.time()
#     warped = Alignment(bboxes, keypoints, final_img)
#     alignment_time = time.time() - start_time_align
    
    color_coverted = cv.cvtColor(final_img, cv.COLOR_BGR2RGB)
    pil_image = Image.fromarray(color_coverted).convert('RGB')
    
    pix2pix.set_input(pil_image)
    
    start_time_gan = time.time()

    with torch.no_grad():
        pred = pix2pix.forward() 

    gan_time = time.time() - start_time_gan
    
    image_tensor = pred.data
    image_numpy = image_tensor[0].cpu().float().numpy()
    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
    image_numpy = image_numpy.astype(np.uint8)
    image_pil = Image.fromarray(image_numpy)
    
    start_time_ocr = time.time()

    result = reader.readtext(image_numpy, detail=0, adjust_contrast=0.7, rotation_info=[90,180,270])

    ocr_time = time.time() - start_time_ocr

    print('Index: ', index, 'Path: ', images)
    print(result)
    print('Total OCR Time End-to-End: ', keypoint_time + gan_time + ocr_time)
    print('--------------------------------------------------------------------------')
    
#     result = reader.readtext(os.path.join(directory, image), detail=0, rotation_info=[90, 180 ,270])

Index:  0 Path:  046223e4-2174521_3210169094134.JPG
['32101-6909413-4', '<', '4^', '3', '1', '4', 'T $"', '1', '1', '3', '{', '11/06/2012', '3', '1/06/2022', '1', 'I']
Total OCR Time End-to-End:  32.60904407501221
--------------------------------------------------------------------------
Index:  1 Path:  05550f4e-2172944_3210290255434.JPG
['130988', '8', '32102-9025543-4', '1', '1', ';', '16/04/2023', '16/04/2013', '8', '6']
Total OCR Time End-to-End:  34.804020404815674
--------------------------------------------------------------------------
Index:  2 Path:  06e72665-2177613_3320378236338.JPG
['PAKISTAN', 'National Identity', 'Card', '|', '1', 'Anaiat Bibi', '3', 'Husband Nani?', 'Ameer', '7', 'Giender', 'Pakistan', 'Wlentry NVumber', '1', '33203-7823633-8', '01.01.1966', '1', '1', '27.02.2017', '27.02.2027', '1', 'F']
Total OCR Time End-to-End:  44.87978720664978
--------------------------------------------------------------------------
Index:  3 Path:  0f9231ee-2171078_33303206668

Index:  28 Path:  6b4a69c5-2177344_3610399794874.JPG
['PAKISTAN', '1', '1']
Total OCR Time End-to-End:  27.22291111946106
--------------------------------------------------------------------------
Index:  29 Path:  6d01a7d8-2170978_5220193769374.JPG
['6', '3', '52201-9376937-4', '{', 'j :', '4', '{', '1955', '{']
Total OCR Time End-to-End:  30.058295011520386
--------------------------------------------------------------------------
Index:  30 Path:  7ae7ac68-2176850_3240213615426.JPG
['PAKISTAN', 'National idenity Card', '1', 'Name', 'Shamo Mai', 'Husband Nan', 'Muhammad Ramzan', '8', 'Eountry &fSiay', 'Gendef', '~uetsed', '1', 'Identity Numhei', '01.01.1961', '32402-1361542-6', '6', 'Date of Expiry', '1', 'Lifetime', '12.12.2017', '1', '6']
Total OCR Time End-to-End:  45.34150266647339
--------------------------------------------------------------------------
Index:  31 Path:  7d710c61-2171296_4410805767776.JPG
['1', "'", ';', '44108-0576777-6', '3', '1', '01/01/1938']
Total OCR Time