In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import cv2
import os
import glob

In [2]:

IMAGE_SIZE = 224
img_dir = "archives/images/" # Enter Directory of all images 
data_path = os.path.join(img_dir,'*g')
files = glob.glob(data_path)
files.sort() #We sort the images in alphabetical order to match them to the xml files containing the annotations of the bounding boxes
X=[]
for f1 in files:
    img = cv2.imread(f1)
    img = cv2.resize(img, (IMAGE_SIZE,IMAGE_SIZE))
    X.append(np.array(img))

In [3]:
from lxml import etree
def resizeannotation(f):
    tree = etree.parse(f)
    for dim in tree.xpath("size"):
        width = int(dim.xpath("width")[0].text)
        height = int(dim.xpath("height")[0].text)
    for dim in tree.xpath("object/bndbox"):
        xmin = int(dim.xpath("xmin")[0].text)/(width/IMAGE_SIZE)
        ymin = int(dim.xpath("ymin")[0].text)/(height/IMAGE_SIZE)
        xmax = int(dim.xpath("xmax")[0].text)/(width/IMAGE_SIZE)
        ymax = int(dim.xpath("ymax")[0].text)/(height/IMAGE_SIZE)
    return [int(xmax), int(ymax), int(xmin), int(ymin)]

In [4]:
path = 'archives/annotations'
text_files = ['archives/annotations/'+f for f in sorted(os.listdir(path))]
y=[]
for i in text_files:
    y.append(resizeannotation(i))

In [5]:
X=np.array(X)
y=np.array(y)
X = X / 255
y = y / 255

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1)

In [7]:
from keras.models import Sequential

from keras.layers import Dense, Flatten

from keras.applications.vgg16 import VGG16

In [8]:
# Create the model
model = Sequential()
model.add(VGG16(weights="imagenet", include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(4, activation="sigmoid"))

model.layers[-6].trainable = False

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               3211392   
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 260       
Total params: 17,951,108
Trainable params: 3,236,420
Non-trainable params: 14,714,688
____________________________________

In [9]:
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])


In [None]:
train = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32, verbose=1)

Epoch 1/50


In [None]:
model.save('my_model_ocr_.h5') 

In [None]:
printdwdf()(()()()()()())(())))(()())

In [None]:
from keras.models import load_model
model = load_model('my_model_ocr_.h5')

In [None]:
Xs=[]
image1 = cv2.imread('archives/images/cars0.png') 
image1 = cv2.resize(image1,(224,224))
Xs.append(np.array(image1))
Xs=np.array(Xs)
Xs=Xs/255

In [None]:
plt.imshow(image1)

In [None]:
y_cnn = model.predict(Xs)

In [None]:
print(int(ny[0]),int(ny[1]),int(ny[2]),int(ny[3]))

In [None]:
plt.figure(figsize=(20,40))

plt.axis('off')
ny = y_cnn[0]*255
imagesdi = cv2.rectangle(image1,(int(ny[0]),int(ny[1])),(int(ny[2]),int(ny[3])),(0, 255, 0))
plt.imshow(image1)

In [None]:
import pytesseract
cv2.imwrite("output.png",imagesdi)
image1 = cv2.imread('output.png', 0)
thresh1 = 255 - cv2.threshold(image1, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
print(thresh1)


In [None]:
ROI = thresh1[int(ny[3]):int(ny[1]),int(ny[2]):int(ny[0])]
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
data = pytesseract.image_to_string(ROI, lang='eng',config='--psm 6')


In [None]:
extracted_text=[]
from pytesseract import Output
d = pytesseract.image_to_data(image1, output_type=Output.DICT)
n_boxes = len(d['level'])
#print(n_boxes)
print(d['text'][4])
extracted_text.append(d['text'][4])
print(extracted_text)

In [None]:
from pytesseract import Output
d = pytesseract.image_to_data(image, output_type=Output.DICT)

In [None]:
!pip install tesseract-ocr

In [None]:
image_path_in_colab='output.png'
extractedInformation = pytesseract.image_to_string(Image.open(image_path_in_colab))
print(extractedInformation)

In [None]:
print(ROI)