In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pytesseract as pt
import matplotlib.pyplot as plt
import xml.etree.ElementTree as xet

from shutil import copy
from keras.models import Model
from keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from keras.applications import InceptionResNetV2
from keras.layers import Dense, Dropout, Flatten, Input

In [2]:
col = ['filepath','xmin','xmax','ymin','ymax']
rows = []

for k in range(248):    
    try:
        xmlparse = xet.parse('images/N'+str(k+1)+'.xml')
        root = xmlparse.getroot()
        for i in root:
            if i.tag=='object':
                for j in i:
                    if j.tag=='bndbox':
                        xmin = j.find("xmin").text
                        xmax = j.find("xmax").text
                        ymin = j.find("ymin").text
                        ymax = j.find("ymax").text
                        rows.append({"filepath":"images/N"+str(k+1)+".xml",
                                    "xmin":xmin,
                                    "xmax":xmax,
                                    "ymin":ymin,
                                    "ymax":ymax})
    except FileNotFoundError:
        continue
    
df = pd.DataFrame(rows,columns=col)
df.to_csv('output.csv',index=False)
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,images/N1.xml,1093,1396,645,727
1,images/N2.xml,1804,2493,1734,1882
2,images/N3.xml,73,260,381,462
3,images/N4.xml,1326,1830,1464,1607
4,images/N5.xml,938,1173,709,762


In [3]:
filename = df['filepath'][0]
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    filepath_image = os.path.join('images',filename_image)
    return filepath_image
getFilename(filename)

img_path = list(df['filepath'].apply(getFilename))
img_path

['images\\N1.jpeg',
 'images\\N2.jpeg',
 'images\\N3.jpeg',
 'images\\N4.jpeg',
 'images\\N5.jpeg',
 'images\\N6.jpeg',
 'images\\N7.jpeg',
 'images\\N8.jpeg',
 'images\\N9.jpeg',
 'images\\N11.jpeg',
 'images\\N12.jpeg',
 'images\\N14.jpeg',
 'images\\N15.jpeg',
 'images\\N16.jpeg',
 'images\\N17.jpeg',
 'images\\N18.jpeg',
 'images\\N19.jpeg',
 'images\\N20.jpeg',
 'images\\N21.jpeg',
 'images\\N22.jpeg',
 'images\\N23.jpeg',
 'images\\N24.jpeg',
 'images\\N25.jpeg',
 'images\\N27.jpeg',
 'images\\N28.jpeg',
 'images\\N30.jpeg',
 'images\\N31.jpeg',
 'images\\N32.jpeg',
 'images\\N33.jpeg',
 'images\\N34.jpeg',
 'images\\N35.jpeg',
 'images\\N36.jpeg',
 'images\\N37.jpeg',
 'images\\N38.jpeg',
 'images\\N40.jpeg',
 'images\\N42.jpeg',
 'images\\N43.jpeg',
 'images\\N44.jpeg',
 'images\\N45.jpeg',
 'images\\N46.jpeg',
 'images\\N47.jpeg',
 'images\\N48.jpeg',
 'images\\N49.jpeg',
 'images\\N50.jpeg',
 'images\\N51.jpeg',
 'images\\N52.jpeg',
 'images\\N53.jpeg',
 'images\\N54.jpeg',
 

In [4]:
data = []
output = []
for ind in range(len(img_path)):
    image = img_path[ind]
    img_arr = cv2.imread(image)
    h,w,d = img_arr.shape
    load_image = tf.keras.utils.load_img(image,target_size=(224,224))
    load_image_arr = tf.keras.utils.img_to_array(load_image)
    norm_load_image_arr = load_image_arr/255.0 # Normalization
    xmin = float(df['xmin'][ind])
    xmax = float(df['xmax'][ind])
    ymin = float(df['ymin'][ind])
    ymax = float(df['ymax'][ind])
    nxmin,nxmax = np.divide(xmin,w),np.divide(xmax,w)
    nymin,nymax = np.divide(ymin,h),np.divide(ymax,h)
    label_norm = (nxmin,nxmax,nymin,nymax) # Normalized output
    data.append(norm_load_image_arr)
    output.append(label_norm)

In [5]:
X = np.array(data,dtype=np.float32)
y = np.array(output,dtype=np.float32)

In [6]:
train_x,test_x,train_y,test_y = train_test_split(X,y,train_size=0.8,random_state=0)
train_x.shape,test_x.shape,train_y.shape,test_y.shape

((180, 224, 224, 3), (45, 224, 224, 3), (180, 4), (45, 4))

In [7]:
incep_resnet = InceptionResNetV2(weights="imagenet",include_top=False, input_tensor=Input(shape=(224,224,3)))
# ---------------------
mainmodel = incep_resnet.output
mainmodel = Flatten()(mainmodel)
mainmodel = Dense(500,activation="relu")(mainmodel)
mainmodel = Dense(250,activation="relu")(mainmodel)
mainmodel = Dense(4,activation='sigmoid')(mainmodel)

model = Model(inputs=incep_resnet.input,outputs=mainmodel)

In [8]:
# Complie model
model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))

In [9]:
tfb = TensorBoard('object_detection')
history = model.fit(x=train_x,y=train_y,batch_size=10,epochs=180,
                    validation_data=(test_x,test_y),callbacks=[tfb])

Epoch 1/180
Epoch 2/180
Epoch 3/180
Epoch 4/180
Epoch 5/180
Epoch 6/180
Epoch 7/180
Epoch 8/180