In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

dataset,metadata = tfds.load('fashion_mnist',as_supervised=True,with_info=True)
print(metadata)
train_dataset,test_dataset = dataset['train'],dataset['test']
train_dataset = train_dataset.shuffle(100).batch(12).repeat()

for img,label in train_dataset.take(1):
    img = img.numpy()
    print(img.shape)
    print(img)

In [None]:
import numpy as np

a = np.arange(24).reshape(2,3,4).transpose(1,2,0)
print(a)
print(a.shape)

### 选择性搜索算法-select search

In [2]:
import sys
import cv2
 
# 读取照片，这里要根据具体情况设置路径
im = cv2.imread("./dog_cat.jpg")

# 重置图片大小，高设置为 400，保持高、宽比例
newHeight = 400
newWidth = int(im.shape[1]*400/im.shape[0])
im = cv2.resize(im, (newWidth, newHeight))    

# 创建 Selective Search Segmentation 对象
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

# 添加待处理的图片
ss.setBaseImage(im)

# 可以选择快速但是低 recall 的方式 
# 这里的 recall 指的是选择出来的 region 是否包含了所有应该包含的区域。recall 越高越好
#ss.switchToSelectiveSearchFast()

# 也可以选择慢速但是高 recall 的方式
ss.switchToSelectiveSearchQuality()


# 进行 region 划分，输出得到的 region 数目
rects = ss.process()
print('Total Number of Region Proposals: {}'.format(len(rects)))

# 设定要显示的 region 数目
numShowRects = 100

# 可以通过按键逐步增加或者减少显示的 region 数目
increment = 10

while True:
    # 不要在原图上画 bounding box，而是复制一个新图
    imOut = im.copy()

    # 遍历 regions
    for i, rect in enumerate(rects):
        # 通过 bounding box 显示出指定数量的 region
        if (i < numShowRects):
            x, y, w, h = rect  # bounding box 左上角坐标 x,y, 以及 box 的宽和高
            cv2.rectangle(imOut, (x, y), (x+w, y+h), (0, 255, 0), 1) # 绿色 box，线宽为 1
        else:
            break

    # 显示图片+bbox
    cv2.imshow("Output", imOut)

    # 接收按键输入
    k = cv2.waitKey(0) & 0xFF

    # “m” 键 is pressed
    if k == 109:
        # 增加显示的 bbox 数目
        numShowRects += increment
    # “l” 键 is pressed
    elif k == 108 and numShowRects > increment:
        # 减少显示的 bbox 数目
        numShowRects -= increment
    # “q” 键 is pressed
    elif k == 113:  
        break
# close image show window
cv2.destroyAllWindows()

Total Number of Region Proposals: 7049


### 基于opencv+keras和yolo2的车道检测

Darknet19 Model

In [None]:
import functools
from functools import partial

from tensorflow.keras.layers import Conv2D,MaxPool2D,LeakyReLU,BatchNormalization
from tensorflow.keras import Model,Input
from tensorflow.keras.regularizers import l2
from ..utils import compose

_DarknetConv2D = partial(Conv2D,padding='same')

@functool.wraps(Conv2D)
def DarknetConv2D(*args,**kwargs):
    """Wrapper to set Darknet weight regularizer for Convolution2D"""
    darknet_conv_kwargs = {'kernel_regularizer':l2(5e-4)}
    darkent_conv_kwargs.update(kwargs)
    return _DarknetConv2D(*args,**darknet_conv_kwargs)

def DarknetConv2D_BN_Leaky(*args,**kwargs):
    """Darknet Convolution2D followed by BatchNormalization and LeakyReLU"""
    no_bias_kwargs = {'use_bias':False}
    no_bias_kwargs.update(kwargs)
    return compose(
        DarknetConv2D(*args,**no_bias_kwargs),
        BatchNormalization(),
        LeakyReLU(alpha=0.1))

def bottleneck_block(outer_filters,bottleneck_filters):
    """Bottleneck block of 3x3,1x1,3x3 convolutions."""
    return compose(
        DarknetConv2D_BN_Leaky(outer_filters,(3,3)),
        DarknetConv2D_BN_Leaky(bottleneck_filters,(1,1)),
        DarknetConv2D_BN_Leaky(outer_filters,(3,3)))

def bottleneck_x2_block(outer_filters,bottleneck_filters):
    """Bottleneck block of 3x3,1x1,3x3,1x1,3x3 convolutions."""
    return compose(
        bottleneck_block(outer_filters,bottleneck_filters),
        DarknetConv2D_BN_Leaky(bottleneck_filters,(1,1)),
        DarknetConv2D_BN_Leaky(outer_filters,(3,3)))

def darkent_body():
    """Generate first 18 conv layers of Darknet-19"""
    return compose(
        #Input(shape=())
        DarknetConv2D_BN_Leaky(32,(3,3)),
        MaxPool2D(),
        DarknetConv2D_BN_Leaky(64,(3,3)),
        MaxPool2D(),
        bottleneck_block(128,64),
        MaxPool2D(),
        bottleneck_block(256,128),
        MaxPool2D(),
        bottleneck_x2_block(512,256),
        MaxPool2D(),
        bottleneck_x2_block(1024,512))

def darknet19(inputs):
    """Generate Darknet-19 model for Imagenet classification"""
    body = darknet_body()(inputs)
    logits = DarknetConv2D(1000,(1,1),activation='softmax')(body)
    return Model(inputs,logits)


model = darknet19()
model.load_model()
model.summary()
model.compile(optimizer='Adam',
              loss='sparse_categorical_crossentropy',
              metrics='accuracy')

loss,accuracy = model.predict()

#### 目标检测YOLOv2-小白将

In [1]:
#cofig.py
"""
Yolov2 anchors and coco classes
"""

"""
anchors = [[0.738768, 0.874946],
           [2.42204, 2.65704],
           [4.30971, 7.04493],
           [10.246, 4.59428],
           [12.6868, 11.8741]]
"""
anchors = [[0.57273, 0.677385],
           [1.87446, 2.06253],
           [3.33843, 5.47434],
           [7.88282, 3.52778],
           [9.77052, 9.16828]]

def read_coco_labels():
    f = open("./data/coco_classes.txt")
    class_names = []
    for l in f.readlines():
        class_names.append(l[:-1])
    return class_names

class_names = read_coco_labels()

FileNotFoundError: [Errno 2] No such file or directory: './data/coco_classes.txt'

In [None]:
#demo.py
"""
Demo for yolov2
"""

import numpy as np
import tensorflow as tf
import cv2
from PIL import Image

from model import darknet
from detect_ops import decode
from utils import preprocess_image, postprocess, draw_detection
from config import anchors, class_names


input_size = (416, 416)
image_file = "./images/car.jpg"
image = cv2.imread(image_file)
image_shape = image.shape[:2]
image_cp = preprocess_image(image, input_size)
"""
image = Image.open(image_file)
image_cp = image.resize(input_size, Image.BICUBIC)
image_cp = np.array(image_cp, dtype=np.float32)/255.0
image_cp = np.expand_dims(image_cp, 0)
#print(image_cp)
"""


images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3])
detection_feat = darknet(images)
feat_sizes = input_size[0] // 32, input_size[1] // 32
detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors)

checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt"
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)
    bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp})

bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs,
                                         image_shape=image_shape)
img_detection = draw_detection(image, bboxes, scores, class_inds, class_names)
cv2.imwrite("detection.jpg", img_detection)
cv2.imshow("detection results", img_detection)

cv2.waitKey(0)

In [None]:
"""
Detection ops for Yolov2
"""

import tensorflow as tf
import numpy as np


def decode(detection_feat, feat_sizes=(13, 13), num_classes=80,
           anchors=None):
    """decode from the detection feature"""
    H, W = feat_sizes
    num_anchors = len(anchors)
    detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors,
                                        num_classes + 5])

    bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2])
    bbox_wh = tf.exp(detetion_results[:, :, :, 2:4])
    obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4])
    class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:])

    anchors = tf.constant(anchors, dtype=tf.float32)

    height_ind = tf.range(H, dtype=tf.float32)
    width_ind = tf.range(W, dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(height_ind, width_ind)
    x_offset = tf.reshape(x_offset, [1, -1, 1])
    y_offset = tf.reshape(y_offset, [1, -1, 1])

    # decode
    bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W
    bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H
    bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5
    bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5

    bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h,
                       bbox_x + bbox_w, bbox_y + bbox_h], axis=3)

    return bboxes, obj_probs, class_probs

In [None]:
#losses.py
"""
Loss function for YOLOv2
"""

import numpy as np
import tensorflow as tf

def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)):
    """
    Compute the loss of Yolov2 for training
    """
    H, W = feat_sizes
    C = num_classes
    B = len(anchors)
    anchors = tf.constant(anchors, dtype=tf.float32)
    anchors = tf.reshape(anchors, [1, 1, B, 2])

    sprob, sconf, snoob, scoor = scales  # the scales for different parts

    _coords = targets["coords"]  # ground truth [-1, H*W, B, 4]
    _probs = targets["probs"]    # class probability [-1, H*W, B, C] one hot
    _confs = targets["confs"]    # 1 for object, 0 for background, [-1, H*W, B]

    # decode the net output
    predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)])
    coords = predictions[:, :, :, :, 0:4]   # t_x, t_y, t_w, t_h
    coords = tf.reshape(coords, [-1, H*W, B, 4])
    coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2])  # (0, 1) relative cell top left
    coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors /
                        np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1)
    coords = tf.concat([coords_xy, coords_wh], axis=3)  # [batch_size, H*W, B, 4]

    confs = tf.nn.sigmoid(predictions[:, :, :, :, 4])  # object confidence
    confs = tf.reshape(confs, [-1, H*W, B, 1])

    probs = tf.nn.softmax(predictions[:, :, :, :, 5:])  # class probability
    probs = tf.reshape(probs, [-1, H*W, B, C])

    preds = tf.concat([coords, confs, probs], axis=3)  # [-1, H*W, B, (4+1+C)]

    # match ground truths with anchors (predictions in fact)
    # assign ground truths to the predictions with the best IOU (select 1 among 5 anchors)
    wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
    areas = wh[:, :, :, 0] * wh[:, :, :, 1]
    centers = coords[:, :, :, 0:2]
    up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5)

    # the ground truth
    _wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
    _areas = _wh[:, :, :, 0] * _wh[:, :, :, 1]
    _centers = _coords[:, :, :, 0:2]
    _up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5)

    # compute IOU
    inter_upleft = tf.maximum(up_left, _up_left)
    inter_downright = tf.minimum(down_right, _down_right)
    inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0)
    intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1]
    ious = tf.truediv(intersects, areas + _areas - intersects)

    best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True))
    best_iou_mask = tf.cast(best_iou_mask, tf.float32)
    mask = best_iou_mask * _confs  # [-1, H*W, B]
    mask = tf.expand_dims(mask, -1)  # [-1, H*W, B, 1]

    # compute weight terms
    confs_w = snoob * (1 - mask) + sconf * mask
    coords_w = scoor * mask
    probs_w = sprob * mask
    weights = tf.concat([coords_w, confs_w, probs_w], axis=3)

    truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3)

    loss = tf.pow(preds - truths, 2) * weights
    loss = tf.reduce_sum(loss, axis=[1, 2, 3])
    loss = 0.5 * tf.reduce_mean(loss)
    return loss

In [None]:
#model.py
"""
YOLOv2 implemented by Tensorflow, only for predicting
"""
import os

import numpy as np
import tensorflow as tf



######## basic layers #######

def leaky_relu(x):
    return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu")

# Conv2d
def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1,
           activation=leaky_relu, use_bias=False, name="conv2d"):
    if pad > 0:
        x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
    out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID",
                           activation=None, use_bias=use_bias, name=name)
    if batch_normalize == 1:
        out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9,
                                            training=False, name=name+"_bn")
    if activation:
        out = activation(out)
    return out

# maxpool2d
def maxpool(x, size=2, stride=2, name="maxpool"):
    return tf.layers.max_pooling2d(x, size, stride)

# reorg layer
def reorg(x, stride):
    return tf.extract_image_patches(x, [1, stride, stride, 1],
                        [1, stride, stride, 1], [1,1,1,1], padding="VALID")


def darknet(images, n_last_channels=425):
    """Darknet19 for YOLOv2"""
    net = conv2d(images, 32, 3, 1, name="conv1")
    net = maxpool(net, name="pool1")
    net = conv2d(net, 64, 3, 1, name="conv2")
    net = maxpool(net, name="pool2")
    net = conv2d(net, 128, 3, 1, name="conv3_1")
    net = conv2d(net, 64, 1, name="conv3_2")
    net = conv2d(net, 128, 3, 1, name="conv3_3")
    net = maxpool(net, name="pool3")
    net = conv2d(net, 256, 3, 1, name="conv4_1")
    net = conv2d(net, 128, 1, name="conv4_2")
    net = conv2d(net, 256, 3, 1, name="conv4_3")
    net = maxpool(net, name="pool4")
    net = conv2d(net, 512, 3, 1, name="conv5_1")
    net = conv2d(net, 256, 1, name="conv5_2")
    net = conv2d(net, 512, 3, 1, name="conv5_3")
    net = conv2d(net, 256, 1, name="conv5_4")
    net = conv2d(net, 512, 3, 1, name="conv5_5")
    shortcut = net
    net = maxpool(net, name="pool5")
    net = conv2d(net, 1024, 3, 1, name="conv6_1")
    net = conv2d(net, 512, 1, name="conv6_2")
    net = conv2d(net, 1024, 3, 1, name="conv6_3")
    net = conv2d(net, 512, 1, name="conv6_4")
    net = conv2d(net, 1024, 3, 1, name="conv6_5")
    # ---------
    net = conv2d(net, 1024, 3, 1, name="conv7_1")
    net = conv2d(net, 1024, 3, 1, name="conv7_2")
    # shortcut
    shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut")
    shortcut = reorg(shortcut, 2)
    net = tf.concat([shortcut, net], axis=-1)
    net = conv2d(net, 1024, 3, 1, name="conv8")
    # detection layer
    net = conv2d(net, n_last_channels, 1, batch_normalize=0,
                 activation=None, use_bias=True, name="conv_dec")
    return net



if __name__ == "__main__":
    x = tf.random_normal([1, 416, 416, 3])
    model = darknet(x)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt")
        print(sess.run(model).shape)

In [None]:
#utils.py
"""
Help functions for YOLOv2
"""
import random
import colorsys

import cv2
import numpy as np



############## preprocess image ##################


def preprocess_image(image, image_size=(416, 416)):
    """Preprocess a image to inference"""
    image_cp = np.copy(image).astype(np.float32)
    # resize the image
    image_rgb = cv2.cvtColor(image_cp, cv2.COLOR_BGR2RGB)
    image_resized = cv2.resize(image_rgb, image_size)
    # normalize
    image_normalized = image_resized.astype(np.float32) / 255.0
    # expand the batch_size dim
    image_expanded = np.expand_dims(image_normalized, axis=0)
    return image_expanded

def postprocess(bboxes, obj_probs, class_probs, image_shape=(416, 416),
                threshold=0.5):
    """post process the detection results"""
    bboxes = np.reshape(bboxes, [-1, 4])
    bboxes[:, 0::2] *= float(image_shape[1])
    bboxes[:, 1::2] *= float(image_shape[0])
    bboxes = bboxes.astype(np.int32)

    # clip the bboxs
    bbox_ref = [0, 0, image_shape[1] - 1, image_shape[0] - 1]
    bboxes = bboxes_clip(bbox_ref, bboxes)

    obj_probs = np.reshape(obj_probs, [-1])
    class_probs = np.reshape(class_probs, [len(obj_probs), -1])
    class_inds = np.argmax(class_probs, axis=1)
    class_probs = class_probs[np.arange(len(obj_probs)), class_inds]
    scores = obj_probs * class_probs

    # filter bboxes with scores > threshold
    keep_inds = scores > threshold
    bboxes = bboxes[keep_inds]
    scores = scores[keep_inds]
    class_inds = class_inds[keep_inds]

    # sort top K
    class_inds, scores, bboxes = bboxes_sort(class_inds, scores, bboxes)
    # nms
    class_inds, scores, bboxes = bboxes_nms(class_inds, scores, bboxes)

    return bboxes, scores, class_inds

def draw_detection(im, bboxes, scores, cls_inds, labels, thr=0.3):
    # for display
    ############################
    # Generate colors for drawing bounding boxes.
    hsv_tuples = [(x / float(len(labels)), 1., 1.)
                  for x in range(len(labels))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
            colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.
    # draw image
    imgcv = np.copy(im)
    h, w, _ = imgcv.shape
    for i, box in enumerate(bboxes):
        if scores[i] < thr:
            continue
        cls_indx = cls_inds[i]

        thick = int((h + w) / 300)
        cv2.rectangle(imgcv,
                      (box[0], box[1]), (box[2], box[3]),
                      colors[cls_indx], thick)
        mess = '%s: %.3f' % (labels[cls_indx], scores[i])
        if box[1] < 20:
            text_loc = (box[0] + 2, box[1] + 15)
        else:
            text_loc = (box[0], box[1] - 10)
        cv2.putText(imgcv, mess, text_loc,
                    cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * h, colors[cls_indx], thick // 3)

    return imgcv


############## process bboxes ##################
def bboxes_clip(bbox_ref, bboxes):
    """Clip bounding boxes with respect to reference bbox.
    """
    bboxes = np.copy(bboxes)
    bboxes = np.transpose(bboxes)
    bbox_ref = np.transpose(bbox_ref)
    bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
    bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
    bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
    bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
    bboxes = np.transpose(bboxes)
    return bboxes

def bboxes_sort(classes, scores, bboxes, top_k=400):
    """Sort bounding boxes by decreasing order and keep only the top_k
    """
    # if priority_inside:
    #     inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
    #         (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
    #     idxes = np.argsort(-scores)
    #     inside = inside[idxes]
    #     idxes = np.concatenate([idxes[inside], idxes[~inside]])
    idxes = np.argsort(-scores)
    classes = classes[idxes][:top_k]
    scores = scores[idxes][:top_k]
    bboxes = bboxes[idxes][:top_k]
    return classes, scores, bboxes

def bboxes_iou(bboxes1, bboxes2):
    """Computing iou between bboxes1 and bboxes2.
    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
    """
    bboxes1 = np.transpose(bboxes1)
    bboxes2 = np.transpose(bboxes2)
    # Intersection bbox and volume.
    int_ymin = np.maximum(bboxes1[0], bboxes2[0])
    int_xmin = np.maximum(bboxes1[1], bboxes2[1])
    int_ymax = np.minimum(bboxes1[2], bboxes2[2])
    int_xmax = np.minimum(bboxes1[3], bboxes2[3])

    int_h = np.maximum(int_ymax - int_ymin, 0.)
    int_w = np.maximum(int_xmax - int_xmin, 0.)
    int_vol = int_h * int_w
    # Union volume.
    vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
    vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
    iou = int_vol / (vol1 + vol2 - int_vol)
    return iou

def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5):
    """Apply non-maximum selection to bounding boxes.
    """
    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
    for i in range(scores.size-1):
        if keep_bboxes[i]:
            # Computer overlap with bboxes which are following.
            overlap = bboxes_iou(bboxes[i], bboxes[(i+1):])
            # Overlap threshold for keeping + checking part of the same class
            keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
            keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)

    idxes = np.where(keep_bboxes)
    return classes[idxes], scores[idxes], bboxes[idxes]