In [1]:
import pandas as pd
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt


In [2]:
def build_points_mask(bbox, pts):
  bbox_mask = []
  for pt in pts:
    x, y = pt[0]  # pt имеет форму [1, 2], поэтому pt[0] = [x, y]
    dist = cv.pointPolygonTest(bbox, (x, y), False)
    bbox_mask.append(dist >= 0)
  bbox_mask = np.array(bbox_mask, dtype=bool)
  return bbox

In [4]:
def draw_bounding_box(source, filename):
  vid = cv.VideoCapture(source)
  sift = cv.SIFT_create(contrastThreshold = 0.03, edgeThreshold = 10, sigma = 1.6)

  ret1, frame_start = vid.read()
  gray_frame_start = cv.cvtColor(frame_start, cv.COLOR_BGR2GRAY)
  kp1, des1 = sift.detectAndCompute(gray_frame_start,None)
  box_points = np.float32([i.pt for i in kp1]).reshape(-1,1,2)
  x,y,w,h = cv.boundingRect(box_points)
  bbox = np.float32([ [x,y],[x,y+h],[x+w,y+h],[x+w,y] ]).reshape(-1,1,2)

  W = int(vid.get(cv.CAP_PROP_FRAME_WIDTH))
  H = int(vid.get(cv.CAP_PROP_FRAME_HEIGHT))
  fourcc = cv.VideoWriter_fourcc(*"XVID")
  out = cv.VideoWriter(filename, fourcc, 30.0, (W, H))

  MIN_MATCH_COUNT = 10
  FLANN_INDEX_KDTREE = 1
  index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
  search_params = dict(checks = 50)

  flann = cv.FlannBasedMatcher(index_params, search_params)
  cnt = 0
  while True:
    ret2, frame_second = vid.read()

    if not ret2:
      break

    gray_frame_second = cv.cvtColor(frame_second, cv.COLOR_BGR2GRAY)
    kp2, des2 = sift.detectAndCompute(gray_frame_second, None)

    if des1 is None or des2 is None or len(kp1) < 2 or len(kp2) < 2:
      new_bbox = bbox
    else:

      matches = flann.knnMatch(des1,des2,k=2)
      good = []
      for m,n in matches:
          if m.distance < 0.7*n.distance:
              good.append(m)

      if len(good)>MIN_MATCH_COUNT:
          src_pts = np.float32([kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
          dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)

          M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC,5.0)

          if M is not None:
            new_bbox = cv.perspectiveTransform(bbox, M)
            bbox = new_bbox.copy()
            mask_inside = build_points_mask(bbox, dst_pts)
            if np.sum(mask_inside) >= MIN_MATCH_COUNT:
              kp1 = [kp2[i] for i in range(len(kp2)) if cv.pointPolygonTest(bbox.reshape(-1, 2), kp2[i].pt, False) >= 0]
              if kp1:
                  des1 = np.vstack([des2[i] for i in range(len(des2)) if cv.pointPolygonTest(bbox.reshape(-1, 2), kp2[i].pt, False) >= 0])
              else:
                pass

          else:
            new_bbox = bbox

      else:
          #print( "Not enough matches are found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
          matchesMask = None
          new_bbox = bbox

    new_bbox = np.int32(new_bbox)
    x, y = new_bbox[0][0][0], new_bbox[0][0][1]
    label = "Object"
    cv.putText(
        frame_second,
        label,
        (x + 15, y + 15),
        cv.FONT_HERSHEY_SIMPLEX,
        fontScale=0.6,
        color=(255, 255, 255),
        thickness=2
    )
    cv.polylines(frame_second, [new_bbox], isClosed=True, color=(0,255,0), thickness=2)
    out.write(frame_second)
    cnt += 1

In [8]:
draw_bounding_box('/content/obj.mp4', '/content/obj-output.mp4')