In [1]:
import torch
import torchvision
import cv2
import numpy as np
from torchvision.transforms import transforms as T
from PIL import Image, ImageDraw, ImageFont
from torch import tensor
from numpy import random
from tqdm import tqdm,trange
from scipy.optimize import linear_sum_assignment
from copy import deepcopy
torch.hub.list('facebookresearch/detr:main')
torch.cuda.empty_cache()

Using cache found in C:\Users\A/.cache\torch\hub\facebookresearch_detr_main


In [2]:
for i in trange(1000):
    torch.cuda.empty_cache()

100%|██████████| 1000/1000 [00:00<00:00, 1002942.13it/s]


In [3]:
transform = T.Compose(
    [
        T.Resize(800),  # 800
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

model = torch.hub.load("facebookresearch/detr:main", "detr_resnet101", pretrained=True)
model = model.cuda()

colors = random.randint(255, size=(101, 3))


def solve(image, context):

    # pred

    inp = transform(image).unsqueeze(0)
    inp = inp.cuda()
    oup = model(inp)

    W, H = image.size
    def cbox2box(cbox):
        x1 = cbox[:, 0] - cbox[:, 2] / 2
        y1 = cbox[:, 1] - cbox[:, 3] / 2
        x2 = cbox[:, 0] + cbox[:, 2] / 2
        y2 = cbox[:, 1] + cbox[:, 3] / 2
        box = torch.ones_like(cbox)
        box[:, 0] = x1 * W 
        box[:, 1] = y1 * H 
        box[:, 2] = x2 * W 
        box[:, 3] = y2 * H 
        return box

    oup_lgs = oup["pred_logits"].softmax(-1)
    labels = oup_lgs[..., :-1].argmax(-1)[0]
    scores = oup_lgs[..., :-1].max(-1)[0][0]
    scores_not1 = 1 - oup_lgs[..., -1][0]  # 非背景的分数

    center_boxes = oup["pred_boxes"][0]  # cbox (中心x, 中心y, box宽，box高)
    boxes = cbox2box(center_boxes)

 

    keep_i = 0
    if context:
        keep_i = range(labels.shape[0])
    else:
        keep_i = torchvision.ops.nms(boxes, scores, iou_threshold=0.5)
        keep_i = keep_i.tolist()

    # # 分类型 nms
    # keep_i = set()
    # for label in range(90):
    #     ss = oup_lgs[...,label][0]
    #     ks = torchvision.ops.nms(boxes, ss, iou_threshold=0.5)
    #     keep_i.update(ks.tolist())

    res = []
    for i in keep_i:
        label = int(labels[i])
        score = float(scores[i])
        box = boxes[i].tolist()
        # if score > 0.5:
        #     res.append([label, score, box, i])
        if score>0.3 and label in (3, 6, 5, 16):
            res.append([label, score, box, i])

    
    # i -> last_i mapping
    def solve_last_i_mapping(res, context):
        update_cnt = 0
        def IoU(box1, box2):
            x1min, y1min, x1max, y1max = box1[0], box1[1], box1[2], box1[3]
            if x1min > x1max: x1min,x1max = x1max,x1min
            if y1min > y1max: y1min,y1max = y1max,y1min
            x2min, y2min, x2max, y2max = box2[0], box2[1], box2[2], box2[3]
            if x2min > x2max: x2min,x2max = x2max,x2min
            if y2min > y2max: y2min,y2max = y2max,y2min
            s1 = (y1max - y1min + 0.1) * (x1max - x1min + 0.1)
            s2 = (y2max - y2min + 0.1) * (x2max - x2min + 0.1)
            xmin = max(x1min,x2min)
            ymin = max(y1min,y2min)
            xmax = min(x1max,x2max)
            ymax = min(y1max,y2max)
            inter_h = max(ymax - ymin + 0.1, 0)
            inter_w = max(xmax - xmin + 0.1, 0)
            intersection = inter_h * inter_w
            union = s1 + s2 - intersection
            iou = intersection / union
            return iou

        iou_matrix = np.zeros((len(res), len(context)), dtype=float)
        for i, nowIt in enumerate(res):
            for j, lastIt in enumerate(context):
                box1 = nowIt[2]
                box2 = lastIt[2]
                label1 = nowIt[0]
                label2 = lastIt[0]
                iou_matrix[i][j] = IoU(box1, box2) if label1 == label2 else 0

        # print(iou_matrix)

        r,l = linear_sum_assignment(iou_matrix, maximize=True)
        for i,j in zip(r,l):
            if iou_matrix[i][j] > 0:
                context[j][2] = res[i][2]
                update_cnt += 1

        return update_cnt

    # 用新的最接近的box更新
    update_cnt = 0
    if context:
        update_cnt = solve_last_i_mapping(res, context)
    else:
        context = res
        update_cnt = res.__len__()
    # draw

    draw = ImageDraw.Draw(image)
    font = ImageFont.truetype("consola.ttf", 10, encoding="unic")  # 设置字

    for it in context:
        label = it[0]
        score = it[1]
        box = it[2]
        i = it[3]

        # color = tuple(random.randint(255,size=(3)))
        color = tuple(colors[i])

        draw.rectangle(box, None, color)
        # draw.text((box[0], box[1]), "(%d, %.3f, %d)"%(label, score, i), color, font)
        draw.text((box[0], box[1]), "%d" % (i), color, font)

    # 更新框少于x%后重新检测
    if context.__len__()==0 or update_cnt / context.__len__() < 0.25:
        context = None

    return image, context


Using cache found in C:\Users\A/.cache\torch\hub\facebookresearch_detr_main


In [4]:
vedio = cv2.VideoCapture("../test/1.mp4")

W = int(vedio.get(3))
H = int(vedio.get(4))
fps = vedio.get(5)
fcout = int(vedio.get(7))

# print(W,H,fps,fcout)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
oup_vedio = cv2.VideoWriter("../test/oup/1.avi", fourcc, fps, (W, H), True) 

context = None

for _ in trange(fcout):
    rval, frame = vedio.read()
    if not rval:
        break

    image = Image.fromarray(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))  

    image, context = solve(image, context)

    cvImage = np.array(image)
    cvImage = cv2.cvtColor(cvImage,cv2.COLOR_RGB2BGR)
    # print(cvImage.shape)
    # break
    # if _ == 1: break

    oup_vedio.write(cvImage)
    # cv2.imshow('oup',cvImage)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break 

vedio.release()
oup_vedio.release()
cv2.destroyAllWindows()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ..\aten\src\ATen\native\BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
100%|█████████▉| 7493/7499 [37:42<00:01,  3.31it/s]
