| @@ -0,0 +1,110 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Fri Mar 23 02:25:45 2018 | ||
| @author: ubuntu | ||
| """ | ||
|
|
||
| import random | ||
| import cv2 | ||
| import numpy as np | ||
|
|
||
| class DatasetFetch(): | ||
| def __init__(self, commonParams, datasetParams): | ||
| ''' | ||
| Args: | ||
| commonParams: dict | ||
| datasetParams: dict | ||
| ''' | ||
| self.dataPath = str(datasetParams['path']) | ||
| self.width = int(commonParams['image_size']) | ||
| self.height = int(commonParams['image_size']) | ||
| self.batchSize = int(commonParams['batch_size']) | ||
| self.threadNum = int(datasetParams['thread_num']) | ||
| self.maxObjects = int(commonParams['max_objects_per_image']) | ||
|
|
||
| self.recordList = [] | ||
|
|
||
| inputFile = open(self.dataPath, 'r') | ||
|
|
||
| for line in inputFile: | ||
| line = line.strip() | ||
| ss = line.split(' ') | ||
| ss[1:] = [float(num) for num in ss[1:]] | ||
| self.recordList.append(ss) | ||
|
|
||
| def recordProcess(self, record): | ||
| ''' | ||
| Record image info. -- path object coordinates. | ||
| Args: | ||
| record: imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2 | ||
| Returns: | ||
| image: 3-D ndarray | ||
| labels: 2-D list [self.maxObjects, 5] --> [xCenter, yCenter, w, h, classNum] | ||
| objectNum: int of total object number | ||
| ''' | ||
| image = cv2.imread(record[0]) | ||
| if image == None: | ||
| raise Exception("Couldn't load image!") | ||
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | ||
| h = image.shape[0] | ||
| w = image.shape[1] | ||
|
|
||
| widthRate = self.width*1.0 / w | ||
| heightRate = self.height*1.0 / h | ||
|
|
||
| image = cv2.resize(image, (self.height, self.width)) | ||
|
|
||
| labels = [[0, 0, 0, 0, 0]] * self.maxObjects | ||
| i = 1 | ||
| objectNum = 0 | ||
|
|
||
| while i < len(record): | ||
| xmin = record[i] | ||
| ymin = record[i + 1] | ||
| xmax = record[i + 2] | ||
| ymax = record[i + 3] | ||
| classNum = record[i + 4] | ||
| #Real position of ficed size | ||
| xcenter = (xmin + xmax) * 1.0 / 2 * widthRate | ||
| ycenter = (ymin + ymax) * 1.0 / 2 * heightRate | ||
|
|
||
| boxW = (xmax - xmin) * widthRate | ||
| boxH = (ymax - ymin) * heightRate | ||
|
|
||
| labels[objectNum] = [xcenter, ycenter, boxW, boxH, classNum] | ||
| objectNum += 1 | ||
|
|
||
| i += 5 | ||
| if objectNum >= self.maxObjects: | ||
| break | ||
|
|
||
| return [image, labels, objectNum] | ||
|
|
||
| def batch(self): | ||
| ''' | ||
| Get batch. | ||
| Returns: | ||
| images: 4-D ndarray [batch size, h, w, 3] | ||
| labels: 3-D ndarray [batch size, max objects, 5] | ||
| objectsNum: 1-D ndarray [batch size] | ||
| ''' | ||
| images = [] | ||
| labels = [] | ||
| objectsNum = [] | ||
|
|
||
| for i in range(self.batchSize): | ||
| image, label, objectNum = self.imageLabelQueue.get() | ||
| images.append(image) | ||
| labels.append(label) | ||
| objectsNum.append(objectNum) | ||
|
|
||
| images = np.asarray(images, dtype=np.float32) | ||
| images = images/255*2 - 1 | ||
| labels = np.asarray(labels, dtype = np.float32) | ||
| objectsNum = np.asarray(objectsNum, dtype = np.float32) | ||
| return images, labels, objectsNum |
| @@ -0,0 +1,10 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Mon Feb 19 16:44:03 2018 | ||
| @author: root | ||
| """ | ||
| import yolo.net.net | ||
| import yolo.net.yolo_net | ||
| import yolo.net.yolo_tiny_net |
| @@ -0,0 +1,170 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Mon Feb 19 16:44:36 2018 | ||
| @author: zsc | ||
| """ | ||
|
|
||
| from __future__ import absolute_import | ||
| from __future__ import division | ||
| from __future__ import print_function | ||
|
|
||
| import tensorflow as tf | ||
| import numpy as np | ||
| import re | ||
|
|
||
| class Net(object): | ||
| ''' | ||
| Basic yolo component. | ||
| ''' | ||
|
|
||
| def __init__(self, commonParams, netParams): | ||
| ''' | ||
| Init the object. | ||
| Args: | ||
| commonParams: a dict of pretrained parameters | ||
| netParams: a dict of trainable parameters | ||
| ''' | ||
| self.pretrainedCollection = [] | ||
| self.trainableCollection = [] | ||
|
|
||
| def _variableInit(self, name, shape, initializer, pretrain=True, train=True): | ||
| var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32) | ||
| if pretrain: | ||
| self.pretrainedCollection.append(var) | ||
| if train: | ||
| self.trainableCollection.append(var) | ||
| return var | ||
|
|
||
| def _variableInitDecay(self, name, shape, stddev, wd, pretrain = True, train = True): | ||
| ''' | ||
| Initialize weights and biases. | ||
| Args: | ||
| name: variable nama | ||
| shape: variable shape | ||
| stddev: standard devision of your variables | ||
| wd: L2 loss lambda | ||
| Returns: | ||
| a tensor of variables | ||
| ''' | ||
|
|
||
| initializer = tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32) | ||
| var = self._variableInit(name, | ||
| shape, | ||
| initializer=initializer, | ||
| pretrain=pretrain, | ||
| train=train) | ||
| if wd is not None: | ||
| weightDecay = tf.multiply(tf.nn.l2_loss(var), wd, name='weightLoss') | ||
| tf.add_to_collection('losses', weightDecay) | ||
| return var | ||
|
|
||
| def conv2d(self, scope, input, kernel_size, stride = 1, pretrain = True, train = True): | ||
| ''' | ||
| Convolutional layer. | ||
| Args: | ||
| scope: tensorflow scope name | ||
| input: a 4-D tensor [batch size, height, width, channels] | ||
| kernel_size: [height, width, #input channel, #output channel] | ||
| stride: a stride value, int | ||
| pretrain: True or False | ||
| train: True or False | ||
| Return: | ||
| a 4-D tensor by [batch size, height, width, #outpuut channel] | ||
| ''' | ||
| with tf.variable_scope(scope) as scope: | ||
| # initialize kernel for a specified layer | ||
| kernel = self._variableInitDecay('weights', | ||
| kernel_size, | ||
| stddev=5e-2, | ||
| wd = self.wd, | ||
| pretrain=pretrain, | ||
| train = train ) | ||
|
|
||
| conv = tf.nn.conv2d(input, | ||
| kernel, | ||
| strides=[1, stride, stride, 1], | ||
| padding='SAME') | ||
|
|
||
| biases = self._variableInit('biases', | ||
| kernel_size[3:], | ||
| tf.constant_initializer(0.0), | ||
| pretrain=pretrain, | ||
| train=train) | ||
|
|
||
| convout = self.leakyRelu(tf.nn.bias_add(conv, biases)) | ||
| return convout | ||
|
|
||
| def maxPool(self, input, kernel_size, stride): | ||
| ''' | ||
| Max pooling layer. | ||
| Args: | ||
| input: a 4-D tensor | ||
| kernel_size: [height, width] | ||
| stride: a int32 number | ||
| Return: | ||
| output: 4-D tensor | ||
| ''' | ||
| return tf.nn.max_pool(input, | ||
| ksize=[1, kernel_size[0], kernel_size[1], 1], | ||
| strides=[1, stride, stride, 1], | ||
| padding='SAME') | ||
|
|
||
| def fullyConnectLayer(self, scope, input, inDim, outDim, leaky = True, pretrain = True, train = True): | ||
| ''' | ||
| Fully connected layer. | ||
| Args: | ||
| scope: variable scope name | ||
| input: input tensor | ||
| inDim: int32 | ||
| outDim: int 32 | ||
| Return: | ||
| output: a 2-D tensor [batch size, outDim] | ||
| ''' | ||
| with tf.variable_scope(scope) as scope: | ||
| reshape = tf.reshape(input, | ||
| [tf.shape(input)[0],-1]) | ||
| weights = self._variableInitDecay('weights', | ||
| [inDim, outDim], | ||
| stddev= 0.04, | ||
| wd = self.wd, | ||
| pretrain=pretrain, | ||
| train=train) | ||
|
|
||
| biases = self._variableInit('biases', | ||
| [outDim], | ||
| tf.constant_initializer(0.0), | ||
| pretrain, | ||
| train) | ||
|
|
||
| fcnOut = tf.matmul(reshape, weights) + biases | ||
|
|
||
| return fcnOut | ||
|
|
||
| def leakyRelu(self, x, alpha = 0.1, dtype = tf.float32): | ||
| ''' | ||
| Leaky relu. | ||
| If x > 0, return x; else, 0.1x. | ||
| Args: | ||
| x: a tensor | ||
| alpha: the specified slope | ||
| Return: | ||
| y: a tensor | ||
| ''' | ||
| x = tf.cast(x, dtype = dtype) | ||
| boolMask = (x > 0) | ||
| mask = tf.cast(boolMask, dtype=dtype) | ||
| return 1.0 * mask * x + alpha * (1 - mask) * x |
| @@ -0,0 +1,348 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Thu Feb 22 19:12:15 2018 | ||
| @author: root | ||
| """ | ||
| import tensorflow as tf | ||
| import numpy as np | ||
| import re | ||
|
|
||
| from yolo.net.net import Net | ||
|
|
||
| class YoloNet(Net): | ||
| ''' | ||
| Yolo net implementation. | ||
| ''' | ||
| def __init__(self, commonParams, netParams, test = False): | ||
| ''' | ||
| Init the object. | ||
| Args: | ||
| commonParams: a dict of pretrained parameters | ||
| netParams: a dict of trainable parameters | ||
| ''' | ||
| super(YoloNet, self).__init__(commonParams, netParams) | ||
|
|
||
| #Process the parameters. | ||
| self.imageSize = int(commonParams['image_size']) | ||
| self.numClasses = int(commonParams['num_classes']) | ||
| self.cellSize = int(netParams['cell_size']) | ||
| self.boxesPerCell = int(netParams['boxes_per_cell']) | ||
| self.batchSize = int(commonParams['batch_size']) | ||
| self.wd = float(netParams['weight_decay']) | ||
|
|
||
| if not test: | ||
| self.objectScale = float(netParams['object_scale']) | ||
| self.noobjectScale = float(netParams['noobject_scale']) | ||
| self.classScale = float(netParams['class_scale']) | ||
| self.coordScale = float(netParams['coord_scale']) | ||
|
|
||
| def yoloModel(self, images): | ||
| ''' | ||
| Build your yolo model. | ||
| Args: | ||
| images: 4-D tensor [batch size, height, width, channels] | ||
| Return: | ||
| predicts: [batch size, cell size, cell size, #class + 5*boxes per cell] | ||
| ''' | ||
| convNum = 1 | ||
| temp = self.conv2d("conv" + str(convNum), images, [7, 7, 3, 64], stride = 2) | ||
| convNum += 1 | ||
|
|
||
| temp = self.maxPool(temp, [2, 2], 2) | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 64, 192], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.maxPool(temp, [2,2], 2) | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 192, 128], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 128, 256], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 256, 256], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 256, 512], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.maxPool(temp, [2, 2], 2) | ||
|
|
||
| for i in range(4): | ||
| temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 512, 256], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 256, 512], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 512, 512], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 512, 1024], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.maxPool(temp, [2, 2,], 2) | ||
|
|
||
| for i in range(2): | ||
| temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 1024, 512], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 512, 1024], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 2) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1) | ||
| convNum += 1 | ||
|
|
||
| f1 = self.fullyConnectLayer('f1', temp, 49*1024, 4096) | ||
| f1 = tf.nn.dropout(f1, keep_prob=0.5) | ||
|
|
||
| f2 = self.fullyConnectLayer('f2', | ||
| f1, | ||
| 4096, | ||
| self.cellSize * self.cellSize * (self.numClasses * 5 * self.boxesPerCell), | ||
| leaky = False) | ||
|
|
||
| f2 = tf.reshape(f2, [tf.shape(f2)[0], self.cellSize, self.cellSize, self.numClasses + 5 * self.boxesPerCell]) | ||
|
|
||
| return f2 | ||
|
|
||
| def iou(self, boxes1, boxes2): | ||
| ''' | ||
| Compute IOU. | ||
| Args: | ||
| boxes1: 4-D tensor [cell size, cell size, boxes per cell, 4] 4 means [x center, y center, width, height] | ||
| boxes2: 1-D tensor [4] 4 means [x center, y center, width, height] | ||
| Return: | ||
| iou: 3-D tensor [cell size, cell size, boses per cell] | ||
| ''' | ||
| #Calculate the left-up and right-bottom coordinatino of boses1 & 2. | ||
| boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2]/2, | ||
| boxes1[:, :, :, 1] - boxes1[:, :, :, 3]/2, | ||
| boxes1[:, :, :, 0] + boxes1[:, :, :, 2]/2, | ||
| boxes1[:, :, :, 1] + boxes1[:, :, :, 3]/2]) | ||
| boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) | ||
|
|
||
| boxes2 = tf.stack([boxes2[0] - boxes2[2]/2, | ||
| boxes2[1] - boxes2[3]/2, | ||
| boxes2[0] + boxes2[2]/2, | ||
| boxes2[1] + boxes2[3]/2]) | ||
|
|
||
| # left up intersection point | ||
| lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) | ||
| rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) | ||
|
|
||
| # the variable intersection contains the width and height of the intersection area. | ||
| intersection = lu - rd | ||
|
|
||
| interSquare = intersection[:, :, :, 0] * intersection[:, :, :, 1] | ||
|
|
||
| #Make sure there is intersection | ||
| mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) | ||
|
|
||
| interSquare = interSquare * mask | ||
|
|
||
| s1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1]) | ||
| s2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1]) | ||
|
|
||
| return interSquare/(s1 + s2 - interSquare + 1e-6) | ||
|
|
||
|
|
||
| def cond1(self, num, objectNum, loss, predict, labels, nilboy): | ||
| ''' | ||
| ''' | ||
| return num < objectNum | ||
|
|
||
| def body1(self, num, objectNum, loss, predict, labels, nilboy): | ||
| ''' | ||
| Calculate loss. | ||
| Args: | ||
| num: spedify which image is to be processed | ||
| objectNum: #objects in an image | ||
| loss: [class loss, object loss, no object loss, coord loss] | ||
| predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] | ||
| labels: [max_objects, 5] (x_center, y_center, w, h, class) | ||
| --- > class and coord | ||
| --- > x_center is the x value of resized image. the same to y_center | ||
| nilboy: has/no objects | ||
| ''' | ||
| #Get label form labels by the varibale num | ||
| label = labels[num] | ||
| label = tf.reshape(label, [-1]) | ||
|
|
||
| minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize) | ||
| maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize) | ||
| minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize) | ||
| maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize) | ||
|
|
||
| #Determine which cell is the object belongs to. | ||
| minX = tf.floor(minX) | ||
| minY = tf.floor(minY) | ||
| maxX = tf.ceil(maxX) | ||
| maxY = tf.ceil(maxY) | ||
|
|
||
| #temp: if a cell contains an object, temp = 1, else 0 | ||
| temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32) | ||
| objects = tf.ones(temp, tf.float32) | ||
|
|
||
| #temp: if a cell doesn't contains an object, temp = 0 | ||
| #Which means pad it to S*S scale. | ||
| temp = tf.cast(tf.stack([minY, self.cellSize - maxY, minX, self.cellSize - maxX]), | ||
| dtype=tf.int32) | ||
| temp = tf.reshape(temp, (2, 2)) | ||
| objects = tf.pad(objects, temp, 'CONSTANT') | ||
|
|
||
| #Calculate which cell contains the center point of the object. | ||
| centerX = label[0] / (self.imageSize / self.cellSize) | ||
| centerX = tf.floor(centerX) | ||
| centerY = label[1] / (self.imageSize / self.cellSize) | ||
| centerY = tf.floor(centerY) | ||
| response = tf.ones([1, 1], tf.float32) | ||
|
|
||
| # pad to S*S scale. | ||
| temp = tf.cast(tf.stack([centerY, self.cellSize - centerY - 1, centerX,self.cellSize - centerX -1]), | ||
| dtype=tf.int32) | ||
| temp = tf.reshape(temp, (2, 2)) | ||
| response = tf.pad(response, temp, 'CONSTANT') | ||
|
|
||
| #predictBoxes: predicted boxes | ||
| predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:] | ||
|
|
||
| # 7 * 7 * 2 * 4 | ||
| predictBoxes = tf.reshape(predictBoxes, | ||
| [self.cellSize, self.cellSize, self.boxesPerCell, 4]) | ||
|
|
||
| # get real size form 0-1 predicted size | ||
| predictBoxes = predictBoxes * [self.imageSize / self.cellSize, | ||
| self.imageSize / self.cellSize, | ||
| self.imageSize, | ||
| self.imageSize] | ||
|
|
||
| #grid cell coord | ||
| baseBoxes = np.zeros([self.cellSize, self.cellSize, 4]) | ||
|
|
||
| for y in range(self.cellSize): | ||
| for x in range(self.cellSize): | ||
| baseBoxes[y, x, :] = [self.imageSize / self.cellSize * x, | ||
| self.imageSize / self.cellSize * y, | ||
| 0, 0] | ||
|
|
||
| #Make the shape of baseBoxes is the same with predictedBoxes. | ||
| baseBoxes = np.tile(np.resize(baseBoxes, | ||
| [self.cellSize, self.cellSize, 1, 4]),[1, 1, self.boxesPerCell, 1]) | ||
|
|
||
| # predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image. | ||
| predictBoxes = baseBoxes + predictBoxes | ||
|
|
||
| #iou for each cell 7 * 7 * 1 | ||
| iouPredictTruth = self.iou(predictBoxes, label[0:4]) | ||
|
|
||
| # filter out the cells that don't have objects | ||
| C = iouPredictTruth * tf.reshape(response, | ||
| [self.cellSize, self.cellSize, 1]) | ||
|
|
||
| # | ||
| I = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1]) | ||
|
|
||
| #get the maximum iou for each cell's boxes | ||
| maxI = tf.reduce_max(I, 2, keep_dims=True) | ||
|
|
||
| # the max iou for the cell contains the center point | ||
| I = tf.cast((I >= maxI), tf.float32) * tf.reshape(response, (self.cellSize, self.cellSize, 1)) | ||
|
|
||
| #noI: [cell size, cell size, boxes per cell] | ||
| noI = tf.ones_like(I, dtype=tf.float32) - I | ||
|
|
||
| # B confidences | ||
| pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell] | ||
|
|
||
| #real x center, y center | ||
| x = label[0] | ||
| y = label[1] | ||
|
|
||
| sqrtW = tf.sqrt(tf.abs(label[2])) | ||
| sqrtH = tf.sqrt(tf.abs(label[3])) | ||
|
|
||
| # real predicted x center and y center | ||
| pX = predictBoxes[:, :, :, 0] | ||
| pY = predictBoxes[:, :, :, 1] | ||
|
|
||
| #square root of predicted boxes' width and height | ||
| pSqrtW = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 2]))) | ||
| pSqrtH = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 3]))) | ||
|
|
||
| # one hot encoding | ||
| P = tf.one_hot(tf.cast(label[4], tf.int32), self.numClasses, dtype=tf.float32) | ||
|
|
||
| #predict classes | ||
| pP = predict[:, :, 0:self.numClasses] | ||
|
|
||
| #classLoss: only cells containing objects | ||
| classLoss = tf.nn.l2_loss(tf.reshape(objects, (self.cellSize, self.cellSize, 1)) * (pP -P)) * self.classScale | ||
|
|
||
| #objectLoss: object center location loss | ||
| objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale | ||
|
|
||
| noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale | ||
|
|
||
| coordLoss = (tf.nn.l2_loss(I * (pX -x)/(self.imageSize/self.cellSize)) + | ||
| tf.nn.l2_loss(I * (pY -y)/(self.imageSize/self.cellSize)) + | ||
| tf.nn.l2_loss(I * (pSqrtW - sqrtW))/self.imageSize + | ||
| tf.nn.l2_loss(I * (pSqrtH - sqrtH))/self.imageSize) + self.coordScale | ||
| nilboy = I | ||
|
|
||
| return num + 1, objectNum, [loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss, loss[3] + coordLoss],predict, labels, nilboy | ||
|
|
||
| def loss(self, predicts, labels, objectsNum): | ||
| ''' | ||
| Add loss to all the trainable variables. | ||
| Args: | ||
| predicts: 4-D tensor [batch size, cell size, cell size, 5 * boxes per cell] | ||
| labels: labels: 3-D tensor [batch size, max objects, 5] | ||
| objectNum: 1-D tensor [batch size] | ||
| ''' | ||
| classLoss = tf.constant(0, tf.float32) | ||
| objectLoss = tf.constant(0, tf.float32) | ||
| noObjectLoss = tf.constant(0, tf.float32) | ||
| coordLoss = tf.constant(0, tf.float32) | ||
|
|
||
| loss = [0, 0, 0, 0] | ||
|
|
||
| for i in range(self.batchSize): | ||
| predict = predicts[i, :, :, :] | ||
| label = labels[i, :, :, :] | ||
| objectNum = objectsNum[i] | ||
| nilboy = tf.ones([7, 7, 2]) | ||
| tupleResults = tf.while_loop(self.cond1, self.body1, | ||
| [tf.constant(0), objectNum, [classLoss, objectLoss, noObjectLoss, coordLoss], predict, label, nilboy]) | ||
| for j in range(4): | ||
| loss[j] = loss[j] + tupleResults[2][j] | ||
| nilboy = tupleResults[5] | ||
|
|
||
| tf.add_to_collection('losses', (loss[0] + loss[1] +loss[2] +loss[3])/self.batchSize) | ||
|
|
||
| tf.summary.scalar('class_loss', loss[0]/self.batchSize) | ||
| tf.summary.scalar('object_loss', loss[1]/self.batchSize) | ||
| tf.summary.scalar('noobject_loss', loss[2]/self.batchSize) | ||
| tf.summary.scalsr('coord_loss', loss[2]/self.batchSize) | ||
| tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batchSize) | ||
|
|
||
| return tf.add_n(tf.get_collection('losses'), name = 'total_loss'), nilboy |
| @@ -0,0 +1,346 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Thu Feb 22 19:12:15 2018 | ||
| @author: root | ||
| """ | ||
| import tensorflow as tf | ||
| import numpy as np | ||
| import re | ||
|
|
||
| from yolo.net.net import Net | ||
|
|
||
| class YoloTinyNet(Net): | ||
| ''' | ||
| Yolo net implementation. | ||
| ''' | ||
| def __init__(self, commonParams, netParams, test = False): | ||
| ''' | ||
| Init the object. | ||
| Args: | ||
| commonParams: a dict of pretrained parameters | ||
| netParams: a dict of trainable parameters | ||
| ''' | ||
| super(YoloTinyNet, self).__init__(commonParams, netParams) | ||
|
|
||
| #Process the parameters. | ||
| self.imageSize = int(commonParams['image_size']) | ||
| self.numClasses = int(commonParams['num_classes']) | ||
| self.cellSize = int(netParams['cell_size']) | ||
| self.boxesPerCell = int(netParams['boxes_per_cell']) | ||
| self.batchSize = int(commonParams['batch_size']) | ||
| self.wd = float(netParams['weight_decay']) | ||
|
|
||
| if not test: | ||
| self.objectScale = float(netParams['object_scale']) | ||
| self.noobjectScale = float(netParams['noobject_scale']) | ||
| self.classScale = float(netParams['class_scale']) | ||
| self.coordScale = float(netParams['coord_scale']) | ||
|
|
||
| def yoloTinyModel(self, images): | ||
| ''' | ||
| Build your yolo model. | ||
| Args: | ||
| images: 4-D tensor [batch size, height, width, channels] | ||
| Return: | ||
| predicts: [batch size, cell size, cell size, #class + 5*boxes per cell] | ||
| ''' | ||
| conv_num = 1 | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_pool = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_pool = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.maxPool(temp_conv, [2, 2], 2) | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) | ||
| conv_num += 1 | ||
|
|
||
| temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2)) | ||
|
|
||
| #Fully connected layer | ||
| local1 = self.fullyConnectLayer('local1', temp_conv, self.cellSize * self.cellSize * 1024, 256) | ||
|
|
||
| local2 = self.fullyConnectLayer('local2', local1, 256, 4096) | ||
|
|
||
| local3 = self.fullyConnectLayer('local3', | ||
| local2, | ||
| 4096, | ||
| self.cellSize * self.cellSize * (self.numClasses + self.boxesPerCell * 5), | ||
| leaky=False, | ||
| pretrain=False, | ||
| train=True) | ||
|
|
||
| n1 = self.cellSize * self.cellSize * self.numClasses | ||
|
|
||
| n2 = n1 + self.cellSize * self.cellSize * self.boxesPerCell | ||
|
|
||
| class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cellSize, self.cellSize, self.numClasses)) | ||
| scales = tf.reshape(local3[:, n1:n2], (-1, self.cellSize, self.cellSize, self.boxesPerCell)) | ||
| boxes = tf.reshape(local3[:, n2:], (-1, self.cellSize, self.cellSize, self.boxesPerCell * 4)) | ||
|
|
||
| local3 = tf.concat([class_probs, scales, boxes], 3) | ||
|
|
||
| predicts = local3 | ||
|
|
||
| return predicts | ||
|
|
||
| def iou(self, boxes1, boxes2): | ||
| ''' | ||
| Compute IOU. | ||
| Args: | ||
| boxes1: 4-D tensor [cell size, cell size, boxes per cell, 4] 4 means [x center, y center, width, height] | ||
| boxes2: 1-D tensor [4] 4 means [x center, y center, width, height] | ||
| Return: | ||
| iou: 3-D tensor [cell size, cell size, boses per cell] | ||
| ''' | ||
| #Calculate the left-up and right-bottom coordinatino of boses1 & 2. | ||
| boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2]/2, | ||
| boxes1[:, :, :, 1] - boxes1[:, :, :, 3]/2, | ||
| boxes1[:, :, :, 0] + boxes1[:, :, :, 2]/2, | ||
| boxes1[:, :, :, 1] + boxes1[:, :, :, 3]/2]) | ||
| boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) | ||
|
|
||
| boxes2 = tf.stack([boxes2[0] - boxes2[2]/2, | ||
| boxes2[1] - boxes2[3]/2, | ||
| boxes2[0] + boxes2[2]/2, | ||
| boxes2[1] + boxes2[3]/2]) | ||
|
|
||
| # left up intersection point | ||
| lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) | ||
| rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) | ||
|
|
||
| # the variable intersection contains the width and height of the intersection area. | ||
| intersection = lu - rd | ||
|
|
||
| interSquare = intersection[:, :, :, 0] * intersection[:, :, :, 1] | ||
|
|
||
| #Make sure there is intersection | ||
| mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) | ||
|
|
||
| interSquare = interSquare * mask | ||
|
|
||
| s1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1]) | ||
| s2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1]) | ||
|
|
||
| return interSquare/(s1 + s2 - interSquare + 1e-6) | ||
|
|
||
|
|
||
| def cond1(self, num, objectNum, loss, predict, labels, nilboy): | ||
| ''' | ||
| ''' | ||
| return num < objectNum | ||
|
|
||
| def body1(self, num, objectNum, loss, predict, labels, nilboy): | ||
| ''' | ||
| Calculate loss. | ||
| Args: | ||
| num: spedify which image is to be processed | ||
| objectNum: #objects in an image | ||
| loss: [class loss, object loss, no object loss, coord loss] | ||
| predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] | ||
| labels: [max_objects, 5] (x_center, y_center, w, h, class) | ||
| --- > class and coord | ||
| --- > x_center is the x value of resized image. the same to y_center | ||
| nilboy: has/no objects | ||
| ''' | ||
| #Get label form labels by the varibale num | ||
| label = labels[num] | ||
| label = tf.reshape(label, [-1]) | ||
|
|
||
| minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize) | ||
| maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize) | ||
| minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize) | ||
| maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize) | ||
|
|
||
| #Determine which cell is the object belongs to. | ||
| minX = tf.floor(minX) | ||
| minY = tf.floor(minY) | ||
| maxX = tf.ceil(maxX) | ||
| maxY = tf.ceil(maxY) | ||
|
|
||
| #temp: if a cell contains an object, temp = 1, else 0 | ||
| temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32) | ||
| objects = tf.ones(temp, tf.float32) | ||
|
|
||
| #temp: if a cell doesn't contains an object, temp = 0 | ||
| #Which means pad it to S*S scale. | ||
| temp = tf.cast(tf.stack([minY, self.cellSize - maxY, minX, self.cellSize - maxX]), | ||
| dtype=tf.int32) | ||
| temp = tf.reshape(temp, (2, 2)) | ||
| objects = tf.pad(objects, temp, 'CONSTANT') | ||
|
|
||
| #Calculate which cell contains the center point of the object. | ||
| centerX = label[0] / (self.imageSize / self.cellSize) | ||
| centerX = tf.floor(centerX) | ||
| centerY = label[1] / (self.imageSize / self.cellSize) | ||
| centerY = tf.floor(centerY) | ||
| response = tf.ones([1, 1], tf.float32) | ||
|
|
||
| # pad to S*S scale. | ||
| temp = tf.cast(tf.stack([centerY, self.cellSize - centerY - 1, centerX,self.cellSize - centerX -1]), | ||
| dtype=tf.int32) | ||
| temp = tf.reshape(temp, (2, 2)) | ||
| response = tf.pad(response, temp, 'CONSTANT') | ||
|
|
||
| #predictBoxes: predicted boxes | ||
| predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:] | ||
|
|
||
| # 7 * 7 * 2 * 4 | ||
| predictBoxes = tf.reshape(predictBoxes, | ||
| [self.cellSize, self.cellSize, self.boxesPerCell, 4]) | ||
|
|
||
| # get real size form 0-1 predicted size | ||
| predictBoxes = predictBoxes * [self.imageSize / self.cellSize, | ||
| self.imageSize / self.cellSize, | ||
| self.imageSize, | ||
| self.imageSize] | ||
|
|
||
| #grid cell coord | ||
| baseBoxes = np.zeros([self.cellSize, self.cellSize, 4]) | ||
|
|
||
| for y in range(self.cellSize): | ||
| for x in range(self.cellSize): | ||
| baseBoxes[y, x, :] = [self.imageSize / self.cellSize * x, | ||
| self.imageSize / self.cellSize * y, | ||
| 0, 0] | ||
|
|
||
| #Make the shape of baseBoxes is the same with predictedBoxes. | ||
| baseBoxes = np.tile(np.resize(baseBoxes, | ||
| [self.cellSize, self.cellSize, 1, 4]),[1, 1, self.boxesPerCell, 1]) | ||
|
|
||
| # predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image. | ||
| predictBoxes = baseBoxes + predictBoxes | ||
|
|
||
| #iou for each cell 7 * 7 * 1 | ||
| iouPredictTruth = self.iou(predictBoxes, label[0:4]) | ||
|
|
||
| # filter out the cells that don't have objects | ||
| C = iouPredictTruth * tf.reshape(response, | ||
| [self.cellSize, self.cellSize, 1]) | ||
|
|
||
| # | ||
| I = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1]) | ||
|
|
||
| #get the maximum iou for each cell's boxes | ||
| maxI = tf.reduce_max(I, 2, keepdims=True) | ||
|
|
||
| # the max iou for the cell contains the center point | ||
| I = tf.cast((I >= maxI), tf.float32) * tf.reshape(response, (self.cellSize, self.cellSize, 1)) | ||
|
|
||
| #noI: [cell size, cell size, boxes per cell] | ||
| noI = tf.ones_like(I, dtype=tf.float32) - I | ||
|
|
||
| # B confidences | ||
| pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell] | ||
|
|
||
| #real x center, y center | ||
| x = label[0] | ||
| y = label[1] | ||
|
|
||
| sqrtW = tf.sqrt(tf.abs(label[2])) | ||
| sqrtH = tf.sqrt(tf.abs(label[3])) | ||
|
|
||
| # real predicted x center and y center | ||
| pX = predictBoxes[:, :, :, 0] | ||
| pY = predictBoxes[:, :, :, 1] | ||
|
|
||
| #square root of predicted boxes' width and height | ||
| pSqrtW = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 2]))) | ||
| pSqrtH = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 3]))) | ||
|
|
||
| # one hot encoding | ||
| P = tf.one_hot(tf.cast(label[4], tf.int32), self.numClasses, dtype=tf.float32) | ||
|
|
||
| #predict classes | ||
| pP = predict[:, :, 0:self.numClasses] | ||
|
|
||
| #classLoss: only cells containing objects | ||
| classLoss = tf.nn.l2_loss(tf.reshape(objects, (self.cellSize, self.cellSize, 1)) * (pP -P)) * self.classScale | ||
|
|
||
| #objectLoss: object center location loss | ||
| objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale | ||
|
|
||
| noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale | ||
|
|
||
| coordLoss = (tf.nn.l2_loss(I * (pX -x)/(self.imageSize/self.cellSize)) + | ||
| tf.nn.l2_loss(I * (pY -y)/(self.imageSize/self.cellSize)) + | ||
| tf.nn.l2_loss(I * (pSqrtW - sqrtW))/self.imageSize + | ||
| tf.nn.l2_loss(I * (pSqrtH - sqrtH))/self.imageSize) + self.coordScale | ||
| nilboy = I | ||
|
|
||
| return num + 1, objectNum, [loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss, loss[3] + coordLoss],predict, labels, nilboy | ||
|
|
||
| def loss(self, predicts, labels, objectsNum): | ||
| ''' | ||
| Add loss to all the trainable variables. | ||
| Args: | ||
| predicts: 4-D tensor [batch size, cell size, cell size, 5 * boxes per cell] | ||
| labels: labels: 3-D tensor [batch size, max objects, 5] | ||
| objectNum: 1-D tensor [batch size] | ||
| ''' | ||
| classLoss = tf.constant(0, tf.float32) | ||
| objectLoss = tf.constant(0, tf.float32) | ||
| noObjectLoss = tf.constant(0, tf.float32) | ||
| coordLoss = tf.constant(0, tf.float32) | ||
|
|
||
| loss = [0, 0, 0, 0] | ||
|
|
||
| for i in range(self.batchSize): | ||
| predict = predicts[i, :, :, :] | ||
| label = labels[i, :, :] | ||
| objectNum = objectsNum[i] | ||
| nilboy = tf.ones([7, 7, 2]) | ||
| tupleResults = tf.while_loop(self.cond1, self.body1, | ||
| [tf.constant(0), objectNum, [classLoss, objectLoss, noObjectLoss, coordLoss], predict, label, nilboy]) | ||
| for j in range(4): | ||
| loss[j] = loss[j] + tupleResults[2][j] | ||
| nilboy = tupleResults[5] | ||
|
|
||
| tf.add_to_collection('losses', (loss[0] + loss[1] +loss[2] +loss[3])/self.batchSize) | ||
|
|
||
| tf.summary.scalar('class_loss', loss[0]/self.batchSize) | ||
| tf.summary.scalar('object_loss', loss[1]/self.batchSize) | ||
| tf.summary.scalar('noobject_loss', loss[2]/self.batchSize) | ||
| tf.summary.scalar('coord_loss', loss[2]/self.batchSize) | ||
| tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batchSize) | ||
|
|
||
| return tf.add_n(tf.get_collection('losses'), name = 'total_loss'), nilboy |
| @@ -0,0 +1 @@ | ||
| import yolo.text_dataset.text_dataset |
| @@ -0,0 +1,153 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Wed Mar 7 21:09:49 2018 | ||
| @author: root | ||
| """ | ||
|
|
||
| import random | ||
| import cv2 | ||
| import numpy as np | ||
| import queue | ||
| from threading import Thread | ||
|
|
||
| class TextDataSet(): | ||
| ''' | ||
| Process text input fiel dataset. | ||
| format: | ||
| imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2 | ||
| ''' | ||
|
|
||
| def __init__(self, commonParams, datasetParams): | ||
| ''' | ||
| Args: | ||
| commonParams: dict | ||
| datasetParams: dict | ||
| ''' | ||
| self.dataPath = str(datasetParams['path']) | ||
| self.width = int(commonParams['image_size']) | ||
| self.height = int(commonParams['image_size']) | ||
| self.batchSize = int(commonParams['batch_size']) | ||
| self.threadNum = int(datasetParams['thread_num']) | ||
| self.maxObjects = int(commonParams['max_objects_per_image']) | ||
|
|
||
| self.recordQueue = queue.Queue(10000) | ||
| self.imageLabelQueue = queue.Queue(512) | ||
|
|
||
| self.recordList = [] | ||
|
|
||
| inputFile = open(self.dataPath, 'r') | ||
|
|
||
| for line in inputFile: | ||
| line = line.strip() | ||
| ss = line.split(' ') | ||
| ss[1:] = [float(num) for num in ss[1:]] | ||
| self.recordList.append(ss) | ||
|
|
||
| self.recordPoint = 0 | ||
| self.recordNumber = len(self.recordList) | ||
|
|
||
| self.numBatchPerEpoch = int(self.recordNumber / self.batchSize) | ||
|
|
||
| tRecordProducer = Thread(target = self.recordProducer) | ||
| tRecordProducer.daemon = True | ||
| tRecordProducer.start() | ||
|
|
||
| for i in range(self.threadNum): | ||
| t = Thread(target = self.recordCustomer) | ||
| t.daemon = True | ||
| t.start() | ||
|
|
||
| def recordProducer(self): | ||
| ''' | ||
| ''' | ||
| while True: | ||
| if self.recordPoint % self.recordNumber == 0: | ||
| random.shuffle(self.recordList) | ||
| self.recordPoint = 0 | ||
| self.recordQueue.put(self.recordList[self.recordPoint]) | ||
| self.recordPoint += 1 | ||
|
|
||
| def recordProcess(self, record): | ||
| ''' | ||
| Record Process | ||
| Args: | ||
| record: imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2 | ||
| Returns: | ||
| image: 3-D ndarray | ||
| labels: 2-D list [self.maxObjects, 5] --> [xCenter, yCenter, w, h, classNum] | ||
| objectNum: int of total object number | ||
| ''' | ||
|
|
||
| image = cv2.imread(record[0]) | ||
|
|
||
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | ||
| h = image.shape[0] | ||
| w = image.shape[1] | ||
|
|
||
| widthRate = self.width*1.0 / w | ||
| heightRate = self.height*1.0 / h | ||
|
|
||
| image = cv2.resize(image, (self.height, self.width)) | ||
|
|
||
| labels = [[0, 0, 0, 0, 0]] * self.maxObjects | ||
| i = 1 | ||
| objectNum = 0 | ||
|
|
||
| while i < len(record): | ||
| xmin = record[i] | ||
| ymin = record[i + 1] | ||
| xmax = record[i + 2] | ||
| ymax = record[i + 3] | ||
| classNum = record[i + 4] | ||
| #Real position of ficed size | ||
| xcenter = (xmin + xmax) * 1.0 / 2 * widthRate | ||
| ycenter = (ymin + ymax) * 1.0 / 2 * heightRate | ||
|
|
||
| boxW = (xmax - xmin) * widthRate | ||
| boxH = (ymax - ymin) * heightRate | ||
|
|
||
| labels[objectNum] = [xcenter, ycenter, boxW, boxH, classNum] | ||
| objectNum += 1 | ||
|
|
||
| i += 5 | ||
| if objectNum >= self.maxObjects: | ||
| break | ||
|
|
||
| return [image, labels, objectNum] | ||
|
|
||
| def recordCustomer(self): | ||
| while True: | ||
| item = self.recordQueue.get() | ||
| #print(item) | ||
| out = self.recordProcess(item) | ||
| self.imageLabelQueue.put(out) | ||
|
|
||
| def batch(self): | ||
| ''' | ||
| Get batch. | ||
| Returns: | ||
| images: 4-D ndarray [batch size, h, w, 3] | ||
| labels: 3-D ndarray [batch size, max objects, 5] | ||
| objectsNum: 1-D ndarray [batch size] | ||
| ''' | ||
| images = [] | ||
| labels = [] | ||
| objectsNum = [] | ||
|
|
||
| for i in range(self.batchSize): | ||
| image, label, objectNum = self.imageLabelQueue.get() | ||
| images.append(image) | ||
| labels.append(label) | ||
| objectsNum.append(objectNum) | ||
|
|
||
| images = np.asarray(images, dtype=np.float32) | ||
| images = images/255*2 - 1 | ||
| labels = np.asarray(labels, dtype = np.float32) | ||
| objectsNum = np.asarray(objectsNum, dtype = np.float32) | ||
| return images, labels, objectsNum |
| @@ -0,0 +1,45 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Thu Mar 8 20:09:41 2018 | ||
| @author: root | ||
| """ | ||
|
|
||
| import configparser | ||
|
|
||
| def processConfig(configFile): | ||
| ''' | ||
| Process configure file. | ||
| Args: | ||
| configFile: path to the configure file | ||
| Return: | ||
| CommonParams, datasetParams, netParams, solverParams | ||
| ''' | ||
| commonParams = {} | ||
| datasetParams = {} | ||
| netParams = {} | ||
| solverParams = {} | ||
|
|
||
| config = configparser.ConfigParser() | ||
| config.read(configFile) | ||
|
|
||
| for section in config.sections(): | ||
| if section == 'Common': | ||
| for option in config.options(section): | ||
| commonParams[option] = config.get(section, option) | ||
|
|
||
| if section == 'DataSet': | ||
| for option in config.options(section): | ||
| datasetParams[option] = config.get(section, option) | ||
|
|
||
| if section == 'Net': | ||
| for option in config.options(section): | ||
| netParams[option] = config.get(section, option) | ||
| if section == 'Solver': | ||
| for option in config.options(section): | ||
| solverParams[option] = config.get(section, option) | ||
|
|
||
| return commonParams, datasetParams, netParams, solverParams |
| @@ -0,0 +1 @@ | ||
| import yolo.yolo_solver.yolo_solver |
| @@ -0,0 +1,102 @@ | ||
| #!/usr/bin/env python3 | ||
| # -*- coding: utf-8 -*- | ||
| """ | ||
| Created on Fri Mar 9 09:05:37 2018 | ||
| @author: root | ||
| """ | ||
| import tensorflow as tf | ||
| import sys | ||
| import time | ||
| import numpy as np | ||
| import os | ||
| from datetime import datetime | ||
|
|
||
|
|
||
| class YoloSolver: | ||
| def __init__(self, dataset, net, commonParams, solverParams): | ||
| ''' | ||
| Process the paramters | ||
| ''' | ||
| self.moment = float(solverParams['moment']) | ||
| self.learningRate = float(solverParams['learning_rate']) | ||
| self.batchSize = int(commonParams['batch_size']) | ||
| self.height = int(commonParams['image_size']) | ||
| self.width = int(commonParams['image_size']) | ||
| self.maxObjects = int(commonParams['max_objects_per_image']) | ||
| self.pretrainPath = str(solverParams['pretrain_model_path']) | ||
| self.trainDir = str(solverParams['train_dir']) | ||
| self.maxIterators = int(solverParams['max_iterators']) | ||
|
|
||
| self.dataset = dataset | ||
| self.net = net | ||
| self.constructGraph() | ||
|
|
||
| def _train(self): | ||
| ''' | ||
| Train the model. | ||
| ''' | ||
| opt = tf.train.MomentumOptimizer(self.learningRate, self.moment) | ||
| grads = opt.compute_gradients(self.totalLoss) | ||
| applyGradientOp = opt.apply_gradients(grads, self.globalStep) | ||
|
|
||
| return applyGradientOp | ||
|
|
||
| def constructGraph(self): | ||
| ''' | ||
| Construct graph | ||
| ''' | ||
| self.globalStep = tf.Variable(0, trainable=False) | ||
| self.images = tf.placeholder(tf.float32, (self.batchSize, self.height, self.width, 3)) | ||
| self.labels = tf.placeholder(tf.float32, (self.batchSize, self.maxObjects, 5)) | ||
| self.objectsNum = tf.placeholder(tf.int32, (self.batchSize)) | ||
|
|
||
| self.predicts = self.net.yoloTinyModel(self.images) | ||
| self.totalLoss, self.nilboy = self.net.loss(self.predicts, self.labels, self.objectsNum) | ||
| tf.summary.scalar('loss', self.totalLoss) | ||
| self.trainOp = self._train() | ||
|
|
||
| def solve(self): | ||
|
|
||
| saver1 = tf.train.Saver(self.net.pretrainedCollection, write_version=1) | ||
| saver2 = tf.train.Saver(self.net.trainableCollection, write_version=1) | ||
|
|
||
| init = tf.global_variables_initializer() | ||
| summaryOp = tf.summary.merge_all() | ||
|
|
||
| os.environ["CUDA_VISIBLE_DEVICES"] = '1' | ||
| config = tf.ConfigProto() | ||
| config.gpu_options.allow_growth = True | ||
|
|
||
| sess = tf.Session(config = config) | ||
| sess.run(init) | ||
|
|
||
| saver1.restore(sess, self.pretrainPath) | ||
| summaryWriter = tf.summary.FileWriter(self.trainDir, sess.graph) | ||
|
|
||
| for step in range(self.maxIterators): | ||
| startTime = time.time() | ||
| npImages, npLabels, npObjectsNum = self.dataset.batch() | ||
|
|
||
| _, lossValue, nilboy = sess.run([self.trainOp, self.totalLoss, self.nilboy], | ||
| feed_dict={self.images: npImages, self.labels: npLabels, self.objectsNum: npObjectsNum}) | ||
|
|
||
| duration = time.time() - startTime | ||
| assert not np.isnan(lossValue), 'Model deverged with loss = NaN' | ||
| if step%10 == 0: | ||
| numExamplesPerStep = self.dataset.batchSize | ||
| examplesPerSec = numExamplesPerStep / duration | ||
| secPerBatch = float(duration) | ||
|
|
||
| print('time: '+str(datetime.now())+', step: '+str(step)+', loss: '+str(lossValue)+', examplePerSec: '+str(examplesPerSec)+', secPerBatch: '+str(secPerBatch)) | ||
| sys.stdout.flush() | ||
|
|
||
| if step % 100 == 0: | ||
| summaryStr = sess.run(summaryOp, | ||
| feed_dict={self.images: npImages, self.labels:npLabels, self.objectsNum: npObjectsNum}) | ||
|
|
||
| summaryWriter.add_summary(summaryStr, step) | ||
|
|
||
| if step % 5000 == 0: | ||
| saver2.save(sess, self.trainDir + 'model.ckpt', global_step=step) | ||
| sess.close() |
| @@ -0,0 +1,203 @@ | ||
| import sys | ||
|
|
||
| sys.path.append('./') | ||
|
|
||
| import time | ||
| from yolo.net.yolo_tiny_net import YoloTinyNet | ||
| import tensorflow as tf | ||
| import cv2 | ||
| import numpy as np | ||
| import os | ||
|
|
||
| classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] | ||
|
|
||
|
|
||
| class Timer(object): | ||
| """A simple timer.""" | ||
| def __init__(self): | ||
| self.total_time = 0. | ||
| self.calls = 0 | ||
| self.start_time = 0. | ||
| self.diff = 0. | ||
| self.average_time = 0. | ||
|
|
||
| def tic(self): | ||
| # using time.time instead of time.clock because time time.clock | ||
| # does not normalize for multi-threading | ||
| self.start_time = time.time() | ||
|
|
||
| def toc(self, average=True): | ||
| self.diff = time.time() - self.start_time | ||
| self.total_time += self.diff | ||
| self.calls += 1 | ||
| self.average_time = self.total_time / self.calls | ||
| if average: | ||
| return self.average_time | ||
| else: | ||
| return self.diff | ||
|
|
||
|
|
||
|
|
||
| def process_predicts(resized_img, predicts, thresh=0.12): | ||
| """ | ||
| process the predicts of object detection with one image input. | ||
| Args: | ||
| resized_img: resized source image. | ||
| predicts: output of the model. | ||
| thresh: thresh of bounding box confidence. | ||
| Return: | ||
| predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}. | ||
| """ | ||
| p_classes = predicts[0, :, :, 0:20] # 20 classes. | ||
| C = predicts[0, :, :, 20:22] # two bounding boxes in one cell. | ||
| coordinate = predicts[0, :, :,22:] # all bounding boxes position. | ||
|
|
||
| p_classes = np.reshape(p_classes, (7, 7, 1, 20)) | ||
| C = np.reshape(C, (7, 7, 2, 1)) | ||
|
|
||
| P = C * p_classes # confidencefor all classes of all bounding boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2, 1). | ||
|
|
||
| predicts_dict = {} | ||
| for i in range(7): | ||
| for j in range(7): | ||
| temp_data = np.zeros_like(P, np.float32) | ||
| temp_data[i, j, :, :] = P[i, j, :, :] | ||
| position = np.argmax(temp_data) # refer to the class num (with maximum confidence) for every bounding box. | ||
| index = np.unravel_index(position, P.shape) | ||
|
|
||
| if P[index] > thresh: | ||
| class_num = index[-1] | ||
| coordinate = np.reshape(coordinate, (7, 7, 2, 4)) # (cell_size, cell_size, bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax] | ||
| max_coordinate = coordinate[index[0], index[1], index[2], :] | ||
|
|
||
| xcenter = max_coordinate[0] | ||
| ycenter = max_coordinate[1] | ||
| w = max_coordinate[2] | ||
| h = max_coordinate[3] | ||
|
|
||
| xcenter = (index[1] + xcenter) * (448/7.0) | ||
| ycenter = (index[0] + ycenter) * (448/7.0) | ||
|
|
||
| w = w * 448 | ||
| h = h * 448 | ||
| xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0) | ||
| ymin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0) | ||
| xmax = resized_img.shape[0] if (xmin + w) > resized_img.shape[0] else (xmin + w) | ||
| ymax = resized_img.shape[1] if (ymin + h) > resized_img.shape[1] else (ymin + h) | ||
|
|
||
| class_name = classes_name[class_num] | ||
| predicts_dict.setdefault(class_name, []) | ||
| predicts_dict[class_name].append([int(xmin), int(ymin), int(xmax), int(ymax), P[index]]) | ||
|
|
||
| return predicts_dict | ||
|
|
||
|
|
||
| def non_max_suppress(predicts_dict, threshold=0.25): | ||
| """ | ||
| implement non-maximum supression on predict bounding boxes. | ||
| Args: | ||
| predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}. | ||
| threshhold: iou threshold | ||
| Return: | ||
| predicts_dict processed by non-maximum suppression | ||
| """ | ||
| for object_name, bbox in predicts_dict.items(): | ||
| bbox_array = np.array(bbox, dtype=np.float) | ||
| x1, y1, x2, y2, scores = bbox_array[:,0], bbox_array[:,1], bbox_array[:,2], bbox_array[:,3], bbox_array[:,4] | ||
| areas = (x2-x1+1) * (y2-y1+1) | ||
| #print "areas shape = ", areas.shape | ||
| order = scores.argsort()[::-1] | ||
| #print "order = ", order | ||
| keep = [] | ||
|
|
||
| while order.size > 0: | ||
| i = order[0] | ||
| keep.append(i) | ||
| xx1 = np.maximum(x1[i], x1[order[1:]]) | ||
| yy1 = np.maximum(y1[i], y1[order[1:]]) | ||
| xx2 = np.minimum(x2[i], x2[order[1:]]) | ||
| yy2 = np.minimum(y2[i], y2[order[1:]]) | ||
|
|
||
| inter = np.maximum(0.0, xx2-xx1+1) * np.maximum(0.0, yy2-yy1+1) | ||
| iou = inter/(areas[i]+areas[order[1:]]-inter) | ||
| indexs = np.where(iou<=threshold)[0] | ||
| order = order[indexs+1] | ||
| bbox = bbox_array[keep] | ||
| predicts_dict[object_name] = bbox.tolist() | ||
| predicts_dict = predicts_dict | ||
| return predicts_dict | ||
|
|
||
|
|
||
| def plot_result(src_img, predicts_dict): | ||
| """ | ||
| plot bounding boxes on source image. | ||
| Args: | ||
| src_img: source image | ||
| predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}. | ||
| """ | ||
| height_ratio = src_img.shape[0]/448.0 | ||
| width_ratio = src_img.shape[1]/448.0 | ||
| for object_name, bbox in predicts_dict.items(): | ||
| for box in bbox: | ||
| xmin, ymin, xmax, ymax, score = box | ||
| src_xmin = xmin * width_ratio | ||
| src_ymin = ymin * height_ratio | ||
| src_xmax = xmax * width_ratio | ||
| src_ymax = ymax * height_ratio | ||
| score = float("%.3f" %score) | ||
|
|
||
| cv2.rectangle(src_img, (int(src_xmin), int(src_ymin)), (int(src_xmax), int(src_ymax)), (0, 0, 255)) | ||
| cv2.putText(src_img, object_name + str(score), (int(src_xmin), int(src_ymin)), 1, 2, (0, 0, 255)) | ||
|
|
||
| #cv2.imshow("result", src_img) | ||
| cv2.imwrite("result.jpg", src_img) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| common_params = {'image_size': 448, 'num_classes': 20, 'batch_size': 1} | ||
| net_params = {'cell_size': 7, 'boxes_per_cell': 2, 'weight_decay': 0.0005} | ||
|
|
||
| net = YoloTinyNet(common_params, net_params, test=True) | ||
|
|
||
| image = tf.placeholder(tf.float32, (1, 448, 448, 3)) | ||
| predicts = net.yoloTinyModel(image) | ||
|
|
||
| os.environ["CUDA_VISIBLE_DEVICES"] = "1" | ||
| config = tf.ConfigProto() | ||
| config.gpu_options.allow_growth = True | ||
|
|
||
| sess = tf.Session(config=config) | ||
| src_img = cv2.imread("./test2.jpg") | ||
| #src_img = cv2.imread("./data/VOCdevkit2007/VOC2007/JPEGImages/000058.jpg") | ||
| resized_img = cv2.resize(src_img, (448, 448)) | ||
| #height_ratio = src_img.shape[0]/448.0 | ||
| #width_ratio = src_img.shape[1]/448.0 | ||
|
|
||
| # convert to rgb image | ||
| np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) | ||
| # convert data type used in tf | ||
| np_img = np_img.astype(np.float32) | ||
| # data normalization and reshape to input tensor | ||
| np_img = np_img / 255.0 * 2 - 1 | ||
| np_img = np.reshape(np_img, (1, 448, 448, 3)) | ||
|
|
||
| saver = tf.train.Saver(net.trainableCollection) | ||
| saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt') | ||
|
|
||
| timer = Timer() | ||
| timer.tic() | ||
|
|
||
| print('Procession detection...') | ||
| np_predict = sess.run(predicts, feed_dict={image: np_img}) | ||
| timer.toc() | ||
| print('One detection took {:.3f}s in average'.format(timer.total_time)) | ||
| predicts_dict = process_predicts(resized_img, np_predict) | ||
| print ("predict dict: ", predicts_dict) | ||
| predicts_dict = non_max_suppress(predicts_dict) | ||
| print ("predict dict after non-maximum suppression: ", predicts_dict) | ||
|
|
||
| plot_result(src_img, predicts_dict) | ||
|
|
||
| #cv2.waitKey(0) | ||
| sess.close() |
| @@ -0,0 +1,38 @@ | ||
| <?xml version="1.0" encoding="utf-8"?> | ||
| <Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
| <PropertyGroup> | ||
| <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||
| <SchemaVersion>2.0</SchemaVersion> | ||
| <ProjectGuid>9a2d606a-3c97-4945-a5fc-6a8c5bfc2a85</ProjectGuid> | ||
| <ProjectHome>.</ProjectHome> | ||
| <ProjectTypeGuids>{D22814C2-A430-4A53-8052-A3A64BFB2240};{888888A0-9F3D-457C-B088-3A5042F75D52}</ProjectTypeGuids> | ||
| <StartupFile>yoloTF.py</StartupFile> | ||
| <SearchPath> | ||
| </SearchPath> | ||
| <WorkingDirectory>.</WorkingDirectory> | ||
| <OutputPath>.</OutputPath> | ||
| <CommandLineArguments>--input_dir . --output_dir .</CommandLineArguments> | ||
| <Name>yoloTF</Name> | ||
| <RootNamespace>yoloTF</RootNamespace> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition=" '$(Configuration)' == 'Debug' "> | ||
| <DebugSymbols>true</DebugSymbols> | ||
| <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition=" '$(Configuration)' == 'Release' "> | ||
| <DebugSymbols>true</DebugSymbols> | ||
| <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging> | ||
| </PropertyGroup> | ||
| <ItemGroup> | ||
| <Compile Include="yoloTF.py" /> | ||
| </ItemGroup> | ||
| <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" /> | ||
| <!-- Uncomment the CoreCompile target to enable the Build command in | ||
| Visual Studio and specify your pre- and post-build commands in | ||
| the BeforeBuild and AfterBuild targets below. --> | ||
| <!--<Target Name="CoreCompile" />--> | ||
| <Target Name="BeforeBuild"> | ||
| </Target> | ||
| <Target Name="AfterBuild"> | ||
| </Target> | ||
| </Project> |