In [1]:
from lxml import etree as ET
import numpy as np
import numpy as np
import tensorflow as tf
import keras.backend as K
from PIL import Image


dic = {
    '正常':0,
    '铁壳打火机':1,
    '黑钉打火机':2,
    '刀具':3,
    '电源和电池':4,
    '剪刀':5
}


Using TensorFlow backend.


In [10]:
class dataset:
    labelDir = '/VOC2007/Annotations/'
    imageDir = '/VOC2007/JPEGImages/'
    fileListDir = '/VOC2007/ImageSets/Main/'
    def __init__(self,dir):
        self.labelDir = dir + self.labelDir
        self.imageDir = dir + self.imageDir
        self.fileListDir = dir + self.fileListDir
        self.__getTrainList()
        self.rpn_stride = 8
        
    #def genBatch(self,batchSize):
        
    def XMLreader(self,fileName):
        dir = self.labelDir + fileName + '.xml'
        tree = ET.parse(dir)
        #root = tree.getroot()
        info = {}
        info['filename'] = tree.find('filename').text
        info['width'] = float(tree.find('size').find('width').text)
        info['height'] = float(tree.find('size').find('height').text)
        bboxSE = []
        bboxCWH = []
        for ob in tree.findall('object'):
            size = [
                float(ob.find('bndbox').find('xmin').text),
                float(ob.find('bndbox').find('ymin').text),
                float(ob.find('bndbox').find('xmax').text),
                float(ob.find('bndbox').find('ymax').text)
            ]
            bboxCWH.append(
                [
                    (size[0]+size[2])/2,
                    (size[1]+size[3])/2,
                    (size[2]-size[0]),
                    (size[3]-size[1]),
                    dic[ob.find('name').text]
                ])
            bboxSE.append(
                [
                    size[0],
                    size[1],
                    size[2],
                    size[3],
                    dic[ob.find('name').text]                    
                ])
        info['bboxSE'] = np.array(bboxSE)
        info['bboxCWH'] = np.array(bboxCWH)
        self.info = info
        return tree
        #print(dir)
        
    def __getTrainList(self):
        ListDir = self.fileListDir + '/train.txt'
        reader = open(ListDir,mode='r')
        List = reader.read()
        List = List.split('\n')
        self.TrainList = List
        
    def anchor_gen(self,SE=1):
        size_X=self.info['width']
        size_Y=self.info['height']
        rpn_stride=self.rpn_stride
        a=32
        scales=[a,a*2,a*4]
        rations=[0.5,1,2]
        
        scales , rations = np.meshgrid( scales , rations )
        scales , rations = scales.flatten() , rations.flatten()
        scaleY = scales * np.sqrt(rations)
        scaleX = scales / np.sqrt(rations)
        #---------------------------------
        '''
        X * Y = scales^2
        X / Y = rations
        scales是面积关系，rations是X Y比值。
        解得如上关系。
        可以保证同种尺寸的面积一致。
        '''
        #--------------------------------

        shiftX = np.arange(0,int(size_X/rpn_stride)) * rpn_stride
        shiftY = np.arange(0,int(size_Y/rpn_stride)) * rpn_stride
        shiftX,shiftY = np.meshgrid(shiftX,shiftY)
        centerX,anchorX = np.meshgrid(shiftX,scaleX)
        centerY,anchorY = np.meshgrid(shiftY,scaleY)
        anchor_center = np.stack([centerX,centerY],axis = 2).reshape(-1,2)
        anchor_size = np.stack([anchorX,anchorY],axis = 2).reshape(-1,2)
        boxes1 = np.concatenate([anchor_center - 0.5*anchor_size,anchor_center + 0.5*anchor_size],axis = 1)
        boxes2 = np.concatenate([anchor_center,anchor_size],axis = 1)
        if SE:
            return boxes1
        else:
            return boxes2
        self.anchorsSE = boxes1
        self.anchorsCWH = boxes1
    
    def IOU(self,bboxs,anchors):
        IOU = []
        for bbox in bboxs:
            bS = bbox[0]
            bE = bbox[1]
            aS = np.array(anchors[:,0])
            aE = np.array(anchors[:,1])

            xs1,ys1,xe1,ye1 = bS[0],bS[1],bE[0],bE[1]
            xs2,ys2,xe2,ye2 = aS.T[0],aS.T[1],aE.T[0],aE.T[1]
            c1,c2,c3,c4 = xs1-xs2, xs1-xe2, xe1-xs2, xe1-xe2
            q1 = ((c1<0) * (c2<0) * (c3>0) * (c4<0)) +((c1>0) * (c2<0) * (c3>0) * (c4>0))
            q2 = ((c1<0) * (c2<0) * (c3>0) * (c4>0)) +((c1>0) * (c2<0) * (c3>0) * (c4<0))
            q3 = ((c1<0) * (c2<0) * (c3<0) * (c4<0)) +((c1>0) * (c2>0) * (c3>0) * (c4>0))
            xo1 = q1*(np.greater((xe1-xs2)-(xe2-xs1),0)*(xe2-xs1)+np.greater((xe2-xs1)-(xe1-xs2),0)*(xe1-xs2)) + q2*(np.less((xe1-xs1)-(xe2-xs2),0)*(xe1-xs1)+np.greater((xe1-xs1)-(xe2-xs2),0)*(xe2-xs2))

            c1,c2,c3,c4 = ys1-ys2, ys1-ye2, ye1-ys2, ye1-ye2
            q1 = ((c1<0) * (c2<0) * (c3>0) * (c4<0)) +((c1>0) * (c2<0) * (c3>0) * (c4>0))
            q2 = ((c1<0) * (c2<0) * (c3>0) * (c4>0)) +((c1>0) * (c2<0) * (c3>0) * (c4<0))
            q3 = ((c1<0) * (c2<0) * (c3<0) * (c4<0)) +((c1>0) * (c2>0) * (c3>0) * (c4>0))
            yo1 = q1*(np.greater((ye1-ys2)-(ye2-ys1),0)*(ye2-ys1)+np.greater((ye2-ys1)-(ye1-ys2),0)*(ye1-ys2))+ q2*(np.less((ye1-ys1)-(ye2-ys2),0)*(ye1-ys1)+np.greater((ye1-ys1)-(ye2-ys2),0)*(ye2-ys2))
            overarea = xo1*yo1
            IOU.append(overarea/((xe1-xs1)*(ye1-ys1)+(xe2-xs2)*(ye2-ys2)-overarea))
        return np.array(IOU)
    
    def rpn_bbox(self,bboxs,anchors,input_rpn_match,IOU):
        rpnBbox = []
        for bbox in bboxs:
            bbox = bboxs[0]
            indices,q =np.meshgrid(np.equal(input_rpn_match,1),[1,1,1,1])
            indices = (indices.T*1).reshape(-1,2,2)
            anchorsDiff = (anchors-bbox)*indices
            w = self.info['width']
            h = self.info['height']
            s,q = np.meshgrid([w,h],np.ones(len(anchors))) 
            s = np.concatenate([s,anchors[:,1:].reshape(-1,2)],axis=1).reshape(-1,2,2)
            anchorsDiff = anchorsDiff/s
            rpnBbox.append(anchorsDiff)
        rpnBbox = np.array(rpnBbox)
        K = np.array(np.where(np.argsort(IOU.T)==(len(IOU)-1)))
        rpnBbox=rpnBbox[K[1],K[0],:,:]
        return rpnBbox.reshape(-1,4)
        
    def rpnInputData(self,filename):
        self.XMLreader(filename)
        anchors = self.anchor_gen().reshape(-1,2,2)
        bboxs = np.array(self.info['bboxSE'][:,:4]).reshape(-1,2,2)
        IOU = self.IOU(bboxs,anchors.reshape(-1,2,2))
        input_rpn_match = np.sort(IOU.T,axis=1).T[-1]
        input_rpn_match = np.greater_equal(input_rpn_match,0.6)*1 + np.less_equal(input_rpn_match,0.3)*(-1)
        rpn_bbox = self.rpn_bbox(bboxs,anchors,input_rpn_match,IOU)
        return input_rpn_match.reshape(1,-1,1),rpn_bbox.reshape(1,-1,4)
        
    def inputImgData(self,filename):
        dir = self.imageDir + filename + '.jpg'
        img = np.array(Image.open(dir))
        return img[np.newaxis,:,:,:]
    
    def gen_batch(self,):
        TL = self.TrainList
        while 1: 
            np.random.shuffle(TL)
            input_rpn_match,rpn_bbox = self.rpnInputData(TL[0])
            img = self.inputImgData(TL[0])
            i = np.array(input_rpn_match)
            r = np.array(rpn_bbox)
            m = np.array(img)
            return [i,r,m],[]


In [11]:
Dir = '/home/araragiero/kuaidi/data/VOC-kd'
data = dataset(Dir)

In [12]:
input_rpn_match,rpn_bbox = data.rpnInputData(data.TrainList[4])
data.XMLreader(data.TrainList[4])
bboxs = np.array(data.info['bboxCWH'][:,:4]).reshape(-1,2,2)
anchors = data.anchor_gen(SE=0).reshape(-1,2,2)
IOU = data.IOU(np.array(data.info['bboxSE'][:,:4]).reshape(-1,2,2),
               data.anchor_gen(SE=1).reshape(-1,2,2))

In [22]:
batchsize =20
TL = data.TrainList
np.random.shuffle(TL)
i,r,m = [],[],[]
for b in range(batchsize):
    input_rpn_match,rpn_bbox = data.rpnInputData(TL[b])
    img = data.inputImgData(TL[b])
    i.append(np.array(input_rpn_match))
    r.append(rpn_bbox)
    m.append(img)


In [23]:
i

[array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
         ...,
         [-1],
         [-1],
         [-1]]]), array([[[-1],
         [-1],
         [-1],
 

In [None]:
i = np.array(i)
r = np.array(r)
m = np.array(m)
return [i,r,m],[]