In [19]:
import cv2
import math
from scipy import ndimage
import numpy as np
import os
import pytesseract
import re
import pandas as pd


In [2]:
CONFIDENCE_THRESHOLD= 0.2
NMS_THRESHOLD= 0.4

classFile = './obj.names'
classNames = []
with open(classFile, 'rt') as f:
    classNames = f.read().splitlines()
print(classNames) 

['nameline1', 'nameline2', 'addressline1', 'addressline2', 'UID', 'photo', 'date', 'ID']


In [3]:
configPath='./yolov4-obj.cfg'
weightsPath='./yolov4-obj_last.weights'

In [4]:
#net=cv2.dnn.readNet(weightsPath, configPath)

net=cv2.dnn.readNetFromDarknet(configPath,weightsPath)

net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


In [5]:
model= cv2.dnn_DetectionModel(net)
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)

In [10]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = os.path.join(folder,filename)
        if img is not None:
            images.append(img)
    return images

In [35]:
def rotated_images(list_of_images):
    rotated_images = []
    for img in f1:
        img = cv2.imread(img)
        img_gray=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        gray_gaussian=cv2.GaussianBlur(img_gray, (3,3), 0)
    
        img_edges=cv2.Canny(gray_gaussian, 50, 150)

        lines=cv2.HoughLinesP(img_edges,1, math.pi/180.0, 100, minLineLength=100, maxLineGap=5)

        if lines is None:
            #if no angle found return the original image
            median_angle=0.0
        else:
            angles=[]
            for [[x1, y1, x2, y2]] in lines:
                #cv2.line(img_before,(x1, y1),(x2, y2), (255, 0, 0), 3)
                angle=math.degrees(math.atan2(y2-y1, x2-x1))
                angles.append(angle)

                median_angle=np.median(angles)
                #angle=angles[0]
        if median_angle == -90.0:
            img_rotated=img
        else:
            img_rotated=ndimage.rotate(img, median_angle)
        
        rotated_images.append(img_rotated)
        
    return rotated_images

In [29]:
folder="/home/webwerks/Desktop/test"
f1 = load_images_from_folder(folder)
f1

['/home/webwerks/Desktop/test/CustNID_1910050530046.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110040027.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110040026.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110050030.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110040019.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110040023.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110030041.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110050031.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110050027.jpg',
 '/home/webwerks/Desktop/test/CustNID_19110040022.jpg']

In [30]:
var = rotated_images(f1)

from else
[[[ 985  743 1091  742]]

 [[ 779  165  932  165]]

 [[1164  351 1186  604]]]
[-0.5405101871306666, 0.0, 85.0302592718897]
from else
[[[  8 586   8 484]]

 [[  8 292   8 186]]

 [[724 274 726 409]]

 [[553 424 677 422]]

 [[  8 173   8  11]]]
[-90.0, -90.0, 89.15123572844642, -0.9240453527727062, -90.0]
from else
[[[  8 587   8 485]]

 [[  8 122   8  11]]

 [[505 193 747 193]]

 [[373 191 569 191]]

 [[523 442 721 442]]

 [[354 399 354 205]]

 [[570 190 740 190]]

 [[367 443 542 443]]

 [[369 194 514 194]]

 [[752 319 752 202]]

 [[753 424 753 320]]]
[-90.0, -90.0, 0.0, 0.0, 0.0, -90.0, 0.0, 0.0, 0.0, -90.0, -90.0]
from else
[[[ 876  834 1012  836]]]
[0.8425242607404145]
from else
[[[  8 562   8 422]]

 [[  8 153   8  20]]

 [[275 251 278 140]]]
[-90.0, -90.0, -88.45184230102204]
from else
[[[  8 290   8 168]]

 [[391 403 610 400]]

 [[448 150 561 149]]

 [[  8 144   8  11]]

 [[427 284 626 281]]]
[-90.0, -0.7848246029918882, -0.5070290609147756, -90.0, -0.863690044599587]
fr

In [31]:
def img_to_text(x):
    
    data_from_image=[]
    
    for img_rotated in x:
        text_from_id=[]
        
        image=img_rotated.copy()
        classes, scores, boxes = model.detect(img_rotated, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

        list_label=['nameline1', 'nameline2', 'addressline1','addressline2','UID']
        for (box, classid) in zip(boxes, classes):
            if classNames[classid[0]] in list_label:
                label=classNames[classid[0]]
                x,y,l,b= box
                cropped=image[y:b+y, x:x+l]
                # cropped=cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
                text=pytesseract.image_to_string(cropped, lang='ara+en')
                text=re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff-\n]','',text)
                text_from_id.append(text)
        data = dict(zip(list_label, text_from_id))
        data_from_image.append(data)
    return data_from_image

In [32]:
var1 = img_to_text(var)
var1

[{'nameline1': '',
  'nameline2': '',
  'addressline1': '',
  'addressline2': 'مركز صان الحَجرَ  الشرقية',
  'UID': 'و" و رربو زرز.و”'},
 {'nameline1': 'إصحمود',
  'nameline2': 'محمد محمود عبدالفتاح',
  'addressline1': '٠ش\u200f ابوعاظف_بيرام سلطاء',
  'addressline2': 'البساتين  القاهره',
  'UID': '77 ااام 9'},
 {'nameline1': '',
  'nameline2': 'على محمد احمد السيد',
  'addressline1': '؟ اش \u200e٠\u200f الفززدى.',
  'addressline2': 'الخليفه  القاهره',
  'UID': '1 ل لل 11'},
 {'nameline1': '',
  'nameline2': 'محمود هدفى محمود',
  'addressline1': '> شن الزن الزاشيد',
  'addressline2': 'روض القرج  القاهرء',
  'UID': 'موا و موا'},
 {'nameline1': '',
  'nameline2': 'عباس محمد اسماعيل',
  'addressline1': '5 ش_ خيرت  لاظو غلى',
  'addressline2': ' ',
  'UID': 'ال 5511"'},
 {'nameline1': '',
  'nameline2': '',
  'addressline1': 'مساكن الزاوية الحمراءبلوك5مدخل ؛',
  'addressline2': 'الزاويه الحمزاء  القاهره',
  'UID': 'ا ا 7'},
 {'nameline1': '',
  'nameline2': '',
  'addressline1': '',
  'add

In [33]:
data = pd.DataFrame(var1)
data

Unnamed: 0,nameline1,nameline2,addressline1,addressline2,UID
0,,,,مركز صان الحَجرَ الشرقية,"و"" و رربو زرز.و”"
1,إصحمود,محمد محمود عبدالفتاح,٠ش‏ ابوعاظف_بيرام سلطاء,البساتين القاهره,77 ااام 9
2,,على محمد احمد السيد,؟ اش ‎٠‏ الفززدى.,الخليفه القاهره,1 ل لل 11
3,,محمود هدفى محمود,> شن الزن الزاشيد,روض القرج القاهرء,موا و موا
4,,عباس محمد اسماعيل,5 ش_ خيرت لاظو غلى,,"ال 5511"""
5,,,مساكن الزاوية الحمراءبلوك5مدخل ؛,الزاويه الحمزاء القاهره,ا ا 7
6,,,,باب الشعريه القاهره,8 ٠. ‏اه‎ال5.١ +0 941
7,,لسيد لح سيد العربىممدوااا,,بولاق الذكرور الجيزه,لاا وى زوع ولي نهل لا
8,,,3 ‏ش كعستجدتش النضص الطوايقفيصل‎ ١,| الهرم الجيزه,#و فى 7# وم رب ؟
9,,,,الخليفة القاهرة],11


In [34]:
data.to_csv('file.csv')