In [1]:
import numpy as np
import pandas as pd
import random

import os
from glob import glob

import xml.etree.ElementTree as et
import cv2

from sklearn.model_selection import train_test_split
import tensorflow as tf




In [2]:
xml=glob('Object_detection_dataset\\annotations\\*.xml')

In [3]:
xml[0]

'Object_detection_dataset\\annotations\\road0.xml'

In [4]:
len(xml)

877

In [5]:
annotations_list=[]

for i in xml:
    
    root=et.parse(i).getroot()
    
    image_name=root.find('filename').text
    image_path=os.path.join('Object_detection_dataset\\images',image_name)
    
    label=root.find('object/name').text
    
    width=int(root.find('size/width').text)
    height=int(root.find('size/height').text)
    
    xmin=int(root.find('object/bndbox/xmin').text)
    ymin=int(root.find('object/bndbox/ymin').text)
    xmax=int(root.find('object/bndbox/xmax').text)
    ymax=int(root.find('object/bndbox/ymax').text)
    
    annotations_list.append([image_path,label,width,height,xmin,ymin,xmax,ymax])

In [6]:
cols=['image_path','label','width','height','xmin','ymin','xmax','ymax']
df=pd.DataFrame(annotations_list,columns=cols)

In [7]:
df.head()

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax
0,Object_detection_dataset\images\road0.png,trafficlight,267,400,98,62,208,232
1,Object_detection_dataset\images\road1.png,trafficlight,400,283,154,63,258,281
2,Object_detection_dataset\images\road10.png,trafficlight,400,267,106,3,244,263
3,Object_detection_dataset\images\road100.png,speedlimit,400,385,35,5,363,326
4,Object_detection_dataset\images\road101.png,speedlimit,400,200,195,7,392,194


In [8]:
df['label'].unique()

array(['trafficlight', 'speedlimit', 'crosswalk', 'stop'], dtype=object)

In [9]:
dictionary={'trafficlight':0,'speedlimit':1,'crosswalk':2,'stop':3}

df['label']=[dictionary[i] for i in df['label']]

In [10]:
df.head()

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax
0,Object_detection_dataset\images\road0.png,0,267,400,98,62,208,232
1,Object_detection_dataset\images\road1.png,0,400,283,154,63,258,281
2,Object_detection_dataset\images\road10.png,0,400,267,106,3,244,263
3,Object_detection_dataset\images\road100.png,1,400,385,35,5,363,326
4,Object_detection_dataset\images\road101.png,1,400,200,195,7,392,194


In [11]:
max(df['width'].values)

400

In [12]:
max(df['height'].values)

400

In [13]:
df['r_width']=250
df['r_height']=250

df['r_xmin']=(df['r_width']/df['width'])*df['xmin']
df['r_ymin']=(df['r_height']/df['height'])*df['ymin']
df['r_xmax']=(df['r_width']/df['width'])*df['xmax']
df['r_ymax']=(df['r_height']/df['height'])*df['ymax']

In [14]:
df.head()

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax,r_width,r_height,r_xmin,r_ymin,r_xmax,r_ymax
0,Object_detection_dataset\images\road0.png,0,267,400,98,62,208,232,250,250,91.7603,38.75,194.756554,145.0
1,Object_detection_dataset\images\road1.png,0,400,283,154,63,258,281,250,250,96.25,55.65371,161.25,248.233216
2,Object_detection_dataset\images\road10.png,0,400,267,106,3,244,263,250,250,66.25,2.808989,152.5,246.254682
3,Object_detection_dataset\images\road100.png,1,400,385,35,5,363,326,250,250,21.875,3.246753,226.875,211.688312
4,Object_detection_dataset\images\road101.png,1,400,200,195,7,392,194,250,250,121.875,8.75,245.0,242.5


In [15]:
df['r_xmin']=[int(round(i)) for i in df['r_xmin'].values]
df['r_ymin']=[int(round(i)) for i in df['r_ymin'].values]
df['r_xmax']=[int(round(i)) for i in df['r_xmax'].values]
df['r_ymax']=[int(round(i)) for i in df['r_ymax'].values]

df['new_bb_coordinates']=df[['r_xmin','r_ymin','r_xmax','r_ymax']].values.tolist()

In [16]:
df.head()

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax,r_width,r_height,r_xmin,r_ymin,r_xmax,r_ymax,new_bb_coordinates
0,Object_detection_dataset\images\road0.png,0,267,400,98,62,208,232,250,250,92,39,195,145,"[92, 39, 195, 145]"
1,Object_detection_dataset\images\road1.png,0,400,283,154,63,258,281,250,250,96,56,161,248,"[96, 56, 161, 248]"
2,Object_detection_dataset\images\road10.png,0,400,267,106,3,244,263,250,250,66,3,152,246,"[66, 3, 152, 246]"
3,Object_detection_dataset\images\road100.png,1,400,385,35,5,363,326,250,250,22,3,227,212,"[22, 3, 227, 212]"
4,Object_detection_dataset\images\road101.png,1,400,200,195,7,392,194,250,250,122,9,245,242,"[122, 9, 245, 242]"


In [17]:
df.loc[140:150]

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax,r_width,r_height,r_xmin,r_ymin,r_xmax,r_ymax,new_bb_coordinates
140,Object_detection_dataset\images\road224.png,1,300,400,146,192,159,205,250,250,122,120,132,128,"[122, 120, 132, 128]"
141,Object_detection_dataset\images\road225.png,1,300,400,162,212,172,222,250,250,135,132,143,139,"[135, 132, 143, 139]"
142,Object_detection_dataset\images\road226.png,1,300,400,134,175,156,197,250,250,112,109,130,123,"[112, 109, 130, 123]"
143,Object_detection_dataset\images\road227.png,1,300,400,141,181,153,193,250,250,118,113,128,121,"[118, 113, 128, 121]"
144,Object_detection_dataset\images\road228.png,1,300,400,156,179,172,196,250,250,130,112,143,122,"[130, 112, 143, 122]"
145,Object_detection_dataset\images\road229.png,1,300,400,120,171,140,191,250,250,100,107,117,119,"[100, 107, 117, 119]"
146,Object_detection_dataset\images\road23.png,0,266,400,216,126,242,179,250,250,203,79,227,112,"[203, 79, 227, 112]"
147,Object_detection_dataset\images\road230.png,1,300,400,124,193,138,207,250,250,103,121,115,129,"[103, 121, 115, 129]"
148,Object_detection_dataset\images\road231.png,1,300,400,128,195,149,216,250,250,107,122,124,135,"[107, 122, 124, 135]"
149,Object_detection_dataset\images\road232.png,1,300,400,127,221,142,234,250,250,106,138,118,146,"[106, 138, 118, 146]"


In [18]:
def resize_save(old_image_path):
    img=cv2.resize(cv2.imread(old_image_path),(250,250))
    new_image_path=os.path.join('Object_detection_dataset\\images_resized',os.path.split(old_image_path)[1])
    
    cv2.imwrite(new_image_path,img)
    
    return new_image_path

In [19]:
df['new_image_path']=[resize_save(i) for i in df['image_path'].values]

In [20]:
df.head()

Unnamed: 0,image_path,label,width,height,xmin,ymin,xmax,ymax,r_width,r_height,r_xmin,r_ymin,r_xmax,r_ymax,new_bb_coordinates,new_image_path
0,Object_detection_dataset\images\road0.png,0,267,400,98,62,208,232,250,250,92,39,195,145,"[92, 39, 195, 145]",Object_detection_dataset\images_resized\road0.png
1,Object_detection_dataset\images\road1.png,0,400,283,154,63,258,281,250,250,96,56,161,248,"[96, 56, 161, 248]",Object_detection_dataset\images_resized\road1.png
2,Object_detection_dataset\images\road10.png,0,400,267,106,3,244,263,250,250,66,3,152,246,"[66, 3, 152, 246]",Object_detection_dataset\images_resized\road10...
3,Object_detection_dataset\images\road100.png,1,400,385,35,5,363,326,250,250,22,3,227,212,"[22, 3, 227, 212]",Object_detection_dataset\images_resized\road10...
4,Object_detection_dataset\images\road101.png,1,400,200,195,7,392,194,250,250,122,9,245,242,"[122, 9, 245, 242]",Object_detection_dataset\images_resized\road10...


In [21]:
#flipping the image
def flip_image_bounding(image,bb,flip_code):
        
    flipped_image=cv2.flip(image,flip_code)
    
    if flip_code==1:
        bb[0],bb[2]=image.shape[1]-bb[2],image.shape[1]-bb[0]
    
    elif flip_code==0:
        bb[1],bb[3]=image.shape[0]-bb[3],image.shape[0]-bb[1]
    
    return flipped_image,bb


#rotating the image
def rotate_image_bounding(image,bb,angle):
    
    rotation_matrix=cv2.getRotationMatrix2D((image.shape[1]/2,image.shape[0]/2),angle,1)
    rotated_image=cv2.warpAffine(image,rotation_matrix,(image.shape[1],image.shape[0]))
    
    t=cv2.transform(np.array([[bb[0],bb[1]],[bb[2],bb[3]]]).reshape(-1,1,2),rotation_matrix)
    
  
    bb[0],bb[1]=t[0][0]
    bb[2],bb[3]=t[1][0]
    
    return rotated_image,bb


#performing the random crop
def random_crop(image,bb,crop_size):
    
    if image.shape[0]<crop_size[0] or image.shape[1]<crop_size[1]:
        return image,bb
    
    max_x=image.shape[1]-crop_size[1]
    max_y=image.shape[0]-crop_size[0]
    
    x=np.random.randint(0,max_x+1)
    y=np.random.randint(0,max_y+1)
    
    cropped_image=image[y:y+crop_size[0],x:x+crop_size[1]]
    
    bb[0]=max(0,bb[0]-x)
    bb[1]=max(0,bb[1]-y)
    bb[2]=min(crop_size[1],bb[2]-x)
    bb[3]=min(crop_size[0],bb[3]-x)
    
    return cropped_image,bb
              
              
#performing center crop
def center_crop(image,bb,crop_size):
    
              
    if image.shape[0]<crop_size[0] or image.shape[1]<crop_size[1]:
        return image,bb
    
    center_x=image.shape[1]//2
    center_y=image.shape[0]//2
              
    x=center_x-crop_size[1]//2
    y=center_y-crop_size[0]//2
              
    cropped_image=image[y:y+crop_size[0],x:x+crop_size[1]]
    
    bb[0]=max(0,bb[0]-x)
    bb[1]=max(0,bb[1]-y)
    bb[2]=min(crop_size[1],bb[2]-x)
    bb[3]=min(crop_size[0],bb[3]-y)
    
    return cropped_image,bb

In [22]:
def image_transformations(image_path,bb,flip=True,rotate=True,rcrop=True,ccrop=True):
    image=cv2.imread(image_path)
    w=image.shape[1]
    h=image.shape[0]
    
    if flip:
        code=np.random.choice([0, 1])
        image,bb=flip_image_bounding(image,bb,code)
    
    if rotate:
        ang=np.random.randint(1,360)
        image,bb=rotate_image_bounding(image,bb,ang)
        
    if rcrop:
        size=[224,224]
        image,bb=random_crop(image,bb,size)
        
    if ccrop:
        size=[224,224]
        image,bb=center_crop(image,bb,size)
        
    
    image=cv2.resize(image,(224,224))
    bb[0]=int(round((224/w)*bb[0]))
    bb[1]=int(round((224/h)*bb[1]))
    bb[2]=int(round((224/w)*bb[2]))
    bb[3]=int(round((224/h)*bb[3]))
    
    return image,bb

In [23]:
def normalize_image(image):
    image=tf.image.convert_image_dtype(image, tf.float32)
    n_image=image/255.0
    return n_image

In [24]:
x=df[['new_image_path','new_bb_coordinates']]
y=df[['label']]

In [25]:
df.loc[142]

image_path                  Object_detection_dataset\images\road226.png
label                                                                 1
width                                                               300
height                                                              400
xmin                                                                134
ymin                                                                175
xmax                                                                156
ymax                                                                197
r_width                                                             250
r_height                                                            250
r_xmin                                                              112
r_ymin                                                              109
r_xmax                                                              130
r_ymax                                                          

In [26]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0,stratify=y)

In [27]:
def preprocess_image(image_path,bb,label,transforms):
    image=cv2.imread(image_path)
    
    if transforms:
        image,bb=image_transformations(image_path,bb,flip=np.random.choice([True,False]),rotate=np.random.choice([True,False]),rcrop=np.random.choice([True,False]),ccrop=np.random.choice([True,False]))
    
    n_image=normalize_image(image)
    
    return n_image,bb,label

In [28]:
train=[preprocess_image(i,j,k,transforms=True) for i,j,k in zip(x_train['new_image_path'],x_train['new_bb_coordinates'],y_train['label'])]
test=[preprocess_image(i,j,k,transforms=True) for i,j,k in zip(x_test['new_image_path'],x_test['new_bb_coordinates'],y_test['label'])]

In [29]:
train_images,train_bb,train_labels=zip(*train)
test_images,test_bb,test_labels=zip(*test)

In [30]:
train_images=np.array(train_images)
train_bb=np.array(train_bb)
train_labels=np.array(train_labels)

test_images=np.array(test_images)
test_bb=np.array(test_bb)
test_labels=np.array(test_labels)

In [31]:
image_input=tf.keras.layers.Input(shape=(224,224,3),name='image_input')

feature_extraction=tf.keras.layers.Conv2D(32,(3,3),activation='relu')(image_input)
feature_extraction=tf.keras.layers.MaxPool2D((2,2))(feature_extraction)
#feature_extraction=tf.keras.layers.Dropout(0.3)(feature_extraction)

fexture_extraction=tf.keras.layers.Conv2D(64,(3,3),activation='relu')(feature_extraction)
feature_extraction=tf.keras.layers.MaxPool2D((2,2))(feature_extraction)
#feature_extraction=tf.keras.layers.Dropout(0.3)(feature_extraction)

# fexture_extraction=tf.keras.layers.Conv2D(128,(3,3),activation='relu')(feature_extraction)
# feature_extraction=tf.keras.layers.MaxPool2D((2,2))(feature_extraction)
#feature_extraction=tf.keras.layers.Dropout(0.3)(feature_extraction)

# fexture_extraction=tf.keras.layers.Conv2D(256,(3,3),activation='relu')(feature_extraction)
# feature_extraction=tf.keras.layers.MaxPool2D((2,2))(feature_extraction)

feature_extraction=tf.keras.layers.Flatten()(feature_extraction)





In [32]:
#bb_output=tf.keras.layers.Dropout(0.3)(feature_extraction)
bb_output=tf.keras.layers.Dense(32,activation='relu')(feature_extraction)
#bb_output=tf.keras.layers.Dense(128,activation='relu')(bb_output)
# bb_output=tf.keras.layers.Dense(64,activation='relu')(bb_output)
bb_output=tf.keras.layers.Dense(4,activation='linear',name='bb_output')(bb_output)

In [33]:
#label_output=tf.keras.layers.Dropout(0.5)(feature_extraction)
label_output=tf.keras.layers.Dense(32,activation='relu')(feature_extraction)
#label_output=tf.keras.layers.Dense(128,activation='relu')(label_output)
# label_output=tf.keras.layers.Dense(64,activation='relu')(label_output)
label_output=tf.keras.layers.Dense(4,activation='softmax',name='label_output')(label_output)

In [34]:
model=tf.keras.models.Model(inputs=image_input,outputs=[bb_output,label_output])

In [35]:
model.compile(optimizer='adam',
             loss={'bb_output':'mse','label_output':'sparse_categorical_crossentropy'},
              metrics={'bb_output':'mae','label_output':'accuracy'})




In [36]:
model.fit(train_images,{'bb_output':train_bb,'label_output':train_labels},
         validation_data=[test_images,{'bb_output':test_bb,'label_output':test_labels}],
          epochs=10)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1a4970f2350>

In [37]:
predictions=model.predict(test_images)



In [38]:
predicted_boxes=predictions[0]

In [39]:
predicted_labels=np.argmax(predictions[1],axis=1)

In [40]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
print("Accuracy is: ",accuracy_score(test_labels,predicted_labels))
print("Precision is: ",precision_score(test_labels,predicted_labels,average='weighted'))
print("Recall is: ",recall_score(test_labels,predicted_labels,average='weighted'))
print("F1 score is: ",f1_score(test_labels,predicted_labels,average='weighted'))

Accuracy is:  0.7443181818181818
Precision is:  0.5540095557851239
Recall is:  0.7443181818181818
F1 score is:  0.6352161681966243


  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
iou_scores=[]

def iou_score_calculator(bb1,bb2):
    
    area_1=(bb1[2]-bb1[0])*(bb1[3]-bb1[1])
    area_2=(bb2[2]-bb2[0])*(bb2[3]-bb2[1])
    
    x1=max(bb1[0],bb2[0])
    y1=max(bb1[1],bb2[1])
    x2=min(bb1[2],bb2[2])
    y2=min(bb1[3],bb2[3])
    
    if x2<x1 or y2<y1:
        iou_scores.append(0)
        return 0
    
    i=(x2-x1)*(y2-y1)
    u=area_1+area_2-i
    
    iou_scores.append(i/u)
    
    return 0

In [42]:
for bb1,bb2 in zip(test_bb,predicted_boxes):
    iou_score_calculator(bb1,bb2)

In [43]:
print("Average IOU score: ",sum(iou_scores)/len(iou_scores))

Average IOU score:  0.022397791039548334
