In [1]:
import numpy as np
import copy
import os
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
import matplotlib.image as mimp
import keras
from keras.models import Sequential
from keras.layers import Dense,Conv2D,Reshape,LeakyReLU,MaxPooling2D,Dropout,Input, Flatten
from keras import backend as K
import tensorflow as tf
from sklearn.model_selection import train_test_split


Using TensorFlow backend.


In [2]:
img_w, img_h=640,480
grid_size=40
width=int(img_w/grid_size)
height=int(img_h/grid_size)
B=5
lamda_coord=5
lamda_noobj=0.5
classes={'WBC':0,'RBC':1}
n_classes=len(classes)
anno_path='/Users/hit.flouxetine/datasets/BCCD_Dataset/BCCD/Annotations'
image_path='/Users/hit.flouxetine/datasets/BCCD_Dataset/BCCD/JPEGImages'

In [3]:
def parse_annotation(anno_path,image_path):
    data=[]
    for each in sorted(os.listdir(anno_path)):
        image={}
        tree=ET.parse(anno_path+'/'+each)
        Object=[]
        for elem in tree.iter():
            if 'filename' in elem.tag:
                image['filename'] = image_path+'/' + elem.text
            if 'width' in elem.tag:
                image['width'] = int(elem.text)
            if 'height' in elem.tag:
                image['height'] = int(elem.text)
            if 'object' in elem.tag or 'part' in elem.tag:
                obj = {}
    
                for attr in list(elem):
                    if 'name' in attr.tag:
                        obj['name'] = attr.text
    
                    if 'bndbox' in attr.tag:
                        for dim in list(attr):
                            if 'xmin' in dim.tag:
                                obj['xmin'] = int(round(float(dim.text)))
                            if 'ymin' in dim.tag:
                                obj['ymin'] = int(round(float(dim.text)))
                            if 'xmax' in dim.tag:
                                obj['xmax'] = int(round(float(dim.text)))
                            if 'ymax' in dim.tag:
                                obj['ymax'] = int(round(float(dim.text)))
                Object.append(obj)
        image['object']=Object
        
        data.append(image)
    return data


In [4]:
def draw_boxes(boxes,img):
    for elem in boxes:
        img=cv2.rectangle(img,(elem['xmin'],elem['ymin']),(elem['xmax'],elem['ymax']), color=(255,0,0),thickness=2)
    return img

In [5]:
data=parse_annotation(anno_path, image_path)
N= len(data)
out=[]
x=[]
for elem in data:
      
    img=mimp.imread(elem['filename'])
    x.append(img)
    truth=np.zeros((height,width,n_classes+B*5))
    cnt=np.zeros((height,width)) #to count how many object has been put in cell (i,j)

    for obj in elem['object']:
            
        bbox_x_cen, bbox_y_cen=(obj['xmax']-obj['xmin'])//2+obj['xmin'],(obj['ymax']-obj['ymin'])//2+obj['ymin']
        bbox_width,bbox_height= (obj['xmax']-obj['xmin']),(obj['ymax']-obj['ymin'])
        
        
        
        #possition of the bbox
        pos_x,pos_y=bbox_x_cen//grid_size,bbox_y_cen//grid_size
        cnt[pos_y][pos_x]+=1 # increase number of object in cell (i,j) one unit
        
        if obj['name']=='RBC':
            cell_classes=[0,1]
        else:
            cell_classes=[1,0]
        
        #normalize bbox
        bbox_x_cen,bbox_y_cen=bbox_x_cen/grid_size,bbox_y_cen/grid_size
        bbox_width,bbox_height=bbox_width/img_w, bbox_height/img_h
        bbox_c_score=1
        
        #assign value of bbox to the grib cell
        if cnt[pos_y][pos_x]==0:
            bbox=[]
            bbox=[bbox_x_cen,bbox_y_cen,bbox_width,bbox_height,bbox_c_score]*B+cell_classes
            truth[pos_y][pos_x]=bbox
            cnt[pos_y][pos_x]+=1
        
    out.append(truth)

In [6]:
def IOU(a,b):
    '''
        input: two tensor with shape (batch, h, w, number_of_box, 5) represent each box in the grid cell
                each box has exactly 5 values which are x_center, y_center, width and height of it.
        output:a tensor is iou between those boxes
    '''
    bbox= K.variable([a[...,0]+a[...,2]/2,
                   a[...,0]-a[...,2]/2,
                   a[...,1]+a[...,3]/2,
                   a[...,1]-a[...,3]/2])
    groundtruth=K.variable([b[...,0]+b[...,2]/2,
                         b[...,0]-b[...,2]/2,
                         b[...,1]+b[...,3]/2,
                         b[...,1]-b[...,3]/2])
    interval_x=K.maximum(0.0,K.minimum(groundtruth[0],bbox[0])-K.maximum(groundtruth[1],bbox[1]))
    interval_y=K.maximum(K.minimum(groundtruth[2],bbox[2])-K.maximum(groundtruth[3],bbox[3]),0.0)
    
    bbox_s=a[...,2]*a[...,3]
    groundtruth_s=b[...,2]*b[...,3]
    union=interval_x*interval_y
    s=bbox_s+groundtruth_s-union
    
    return union/s

In [7]:
def custom_loss(y_true,y_pred):
    """
        shape: (batch,h,w,number_of_box*5+classes)
    """
    grid = np.array([ [[float(x),float(y),float(0),float(0),float(0)]]*B   for y in range(height) for x in range(width)])
    grid = K.variable(grid)
    grid = K.reshape(grid,(-1,height,width,B,5))
    
    bbox=y_pred[...,B*5] #shape(batch,height,width,B*5)
    groundtruth=y_true[...,B*5]#shape(batch,height,width,B*5)
    bbox=K.reshape(bbox,shape=(-1,height,width,B,5))#shape(batch,height,width,B,5)
    bbox=bbox+grid #normalize bounding box x,y to offset coordinate
    
    groundtruth=K.reshape(groundtruth,shape=(-1,height,width,B,5))#shape(batch,height,width,B,5)

    #compute IOU between bounding box and groundtruth of all cell. shape(batch,height,width,B). 
    iou=IOU(bbox,groundtruth)
    
    #find out max iou of each cell.
    maxx=tf.reduce_max(iou,axis=3,keepdims=True)  
    
    #create a mask of 0,1 represent which bounding box is used to predict. shape(batch,height,width,B)
    object_mask=tf.cast((maxx<=iou), dtype=tf.float32)    
    
    
    #coordinate loss
    #x_center,Y_center
    bbox_xy_coord=bbox[...,:2] #shape(batch,height,width,B,2)
    groundtruth_xy_coord=groundtruth[...,:2] #shape(batch, height,width,B,2)
    bbox_wh_coord=bbox[...,2:4] #shape(batch, height,width,B,2)
    groundtruth_wh_coord=groundtruth[...,2:4] #shape(batch, height,width,B,2)
    delta=K.sum(K.square(groundtruth_xy_coord-bbox_xy_coord),axis=4)
    xy=K.sum(delta*object_mask) 
    
    #width,height
    delta=K.sum(K.square(K.sqrt(groundtruth_wh_coord)-K.sqrt(bbox_wh_coord)),axis=4)
    wh=K.sum(delta*object_mask)
    coord_score=xy+wh
    
    #bounding box confidence score
    
    groundtruth_c_score=groundtruth[...,4]
    bbox_c_score=bbox[...,4]
    noobject_mask=tf.cast((maxx>iou),dtype=tf.float32)
    
    c_score=K.sum(K.square(groundtruth_c_score*iou-bbox_c_score)*object_mask)
    c_score+=lamda_noobj*K.sum(K.square(groundtruth_c_score-bbox_c_score)*noobject_mask)
    
    #class loss
    
    class_pred=y_pred[...,B*5:]
    class_truth=y_true[...,B*5:]
    
    cell_mask=y_true[...,4]
    
    class_loss=K.sum(K.square((class_truth-class_pred)*cell_mask))
    
    return lamda_coord*coord_score+c_score+class_loss

In [8]:
print(np.max(cnt))

1.0


In [9]:
model=Sequential()
model.add(Conv2D(filters=64,kernel_size=(3,3), input_shape=(img_h,img_w,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=64,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(3,3), strides=2))
    
model.add(Conv2D(filters=128, kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=128,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(3,3),strides=2))

model.add(Conv2D(filters=256, kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=256,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(3,3),strides=2))

model.add(Conv2D(filters=512, kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=512,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=512,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(3,3),strides=2))

model.add(Conv2D(filters=512, kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=512,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(filters=512,kernel_size=(3,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(3,3),strides=2))

model.add(Flatten())
model.add(Dense(25088))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(4096))
model.add(LeakyReLU(0.1))
model.add(Dense(units=height*width*(n_classes+5*B), activation='sigmoid'))
model.add(Reshape((height,width,n_classes+5*B)))

In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 478, 638, 64)      1792      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 478, 638, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 476, 636, 64)      36928     
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 476, 636, 64)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 237, 317, 64)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 235, 315, 128)     73856     
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 235, 315, 128)    

In [11]:
def IOU(a,b):
    '''
        input: two tensor with shape (batch, h, w, number_of_box, 5) represent each box in the grid cell
                each box has exactly 5 values which are x_center, y_center, width and height of it.
        output:a tensor is iou between those boxes
    '''
    bbox= K.variable([a[...,0]+a[...,2]/2,
                   a[...,0]-a[...,2]/2,
                   a[...,1]+a[...,3]/2,
                   a[...,1]-a[...,3]/2])
    groundtruth=K.variable([b[...,0]+b[...,2]/2,
                         b[...,0]-b[...,2]/2,
                         b[...,1]+b[...,3]/2,
                         b[...,1]-b[...,3]/2])
    interval_x=K.maximum(0.0,K.minimum(groundtruth[0],bbox[0])-K.maximum(groundtruth[1],bbox[1]))
    interval_y=K.maximum(K.minimum(groundtruth[2],bbox[2])-K.maximum(groundtruth[3],bbox[3]),0.0)
    
    bbox_s=a[...,2]*a[...,3]
    groundtruth_s=b[...,2]*b[...,3]
    union=interval_x*interval_y
    s=bbox_s+groundtruth_s-union
    
    return union/s

In [12]:
model.compile(loss=custom_loss,optimizer='adam',metrics=['accuracy'])

In [16]:
x_train,x_test,y_train,y_test=train_test_split(x,out,test_size=0.2)

In [17]:
model.fit(np.array(x_train),np.array(y_train),epochs=100,batch_size=16,validation_data=(np.array(x_test),np.array(y_test)))

Train on 291 samples, validate on 73 samples
Epoch 1/100


FailedPreconditionError:  Error while reading resource variable _AnonymousVar39 from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/_AnonymousVar39/N10tensorflow3VarE does not exist.
	 [[node loss/reshape_1_loss/custom_loss/ReadVariableOp_2 (defined at /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3009) ]] [Op:__inference_keras_scratch_graph_4319]

Function call stack:
keras_scratch_graph
