In [2]:
# install dependencies: (use cu101 because colab has CUDA 10.1)
!pip install -U torch==1.5 torchvision==0.6 -f https://download.pytorch.org/whl/cu101/torch_stable.html 
!pip install cython pyyaml==5.1
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

Looking in links: https://download.pytorch.org/whl/cu101/torch_stable.html
Requirement already up-to-date: torch==1.5 in /usr/local/lib/python3.6/dist-packages (1.5.0+cu101)
Requirement already up-to-date: torchvision==0.6 in /usr/local/lib/python3.6/dist-packages (0.6.0+cu101)
Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-d0yg670u
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-d0yg670u
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp36-cp36m-linux_x86_64.whl size=275264 sha256=fbb8c9512f2e0a02cc59381ffea83430caf424e14df83a4270212cbff40aaecf
  Stored in directory: /tmp/pip-ephem-wheel-cache-86ajn297/wheels/90/51/41/646daf401c3bc408ff10de34ec76587a9b3ebfac8d21ca5c3a
Successfully built pycocotools
Installing

In [3]:
# install detectron2:
!pip install detectron2==0.1.2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html


In [0]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow
from glob import glob
import os

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.structures import pairwise_iou # used for box overlap

# Detecting Objects in Frames

In [4]:
!wget https://github.com/gkioxari/aims2020_visualrecognition/releases/download/v1.0/videoclip.zip
!unzip -qq videoclip.zip

--2020-05-15 15:37:16--  https://github.com/gkioxari/aims2020_visualrecognition/releases/download/v1.0/videoclip.zip
Resolving github.com (github.com)... 140.82.118.3
Connecting to github.com (github.com)|140.82.118.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/255177940/09ad9d80-7f47-11ea-93bc-002a89d4791c?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20200515%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200515T153716Z&X-Amz-Expires=300&X-Amz-Signature=70575bdee229e2aafb84865ed791595b9407478d62d34b8de9744f7aa9bf087e&X-Amz-SignedHeaders=host&actor_id=0&repo_id=255177940&response-content-disposition=attachment%3B%20filename%3Dvideoclip.zip&response-content-type=application%2Foctet-stream [following]
--2020-05-15 15:37:16--  https://github-production-release-asset-2e65be.s3.amazonaws.com/255177940/09ad9d80-7f47-11ea-93bc-002a89d4791c?X-Amz-Algorithm=AWS4-HMAC-SHA2

In [0]:
def makedir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [0]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

In [7]:
image_clips = glob("./clip/*.jpg", recursive=True)
image_clips = sorted(image_clips)
image_clips[0:5]

['./clip/00.jpg',
 './clip/01.jpg',
 './clip/02.jpg',
 './clip/03.jpg',
 './clip/04.jpg']

In [0]:
for img_pth in image_clips:
    im = cv2.imread(img_pth)
    outputs = predictor(im)
    # We can use `Visualizer` to draw the predictions on the image.
    v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    # cv2_imshow(v.get_image()[:, :, ::-1])
    final_img = v.get_image()[:, :, ::-1]
    img_name = img_pth.split('/')[-1]
    makedir('./result/')
    final_pth = './result/'+img_name
    cv2.imwrite(final_pth, final_img)

## Visualize some segmented images

In [0]:
pred_images = glob("./result/*.jpg", recursive=True)
pred_images = sorted(pred_images)

In [10]:
for img_path in random.sample(pred_images, 3):
    im = cv2.imread(img_path)
    cv2_imshow(im)

Output hidden; open in https://colab.research.google.com to view.

# Tracking Objects in Pairs of Frames

In [0]:
output_dict = {}

for img_pth in image_clips:
    im = cv2.imread(img_pth)
    outputs = predictor(im)

    img_name = img_pth.split('/')[-1]
    img_subname = img_name.split('.')[0]

    output_dict[int(img_subname)] = outputs

In [0]:
def pairwise_matcher(pred_a, pred_b):
    boxes_a = pred_a.pred_boxes
    boxes_b = pred_b.pred_boxes

    pred_a_cls= pred_a.pred_classes
    pred_b_cls = pred_b.pred_classes

    pred_classes = torch.ones(pred_a_cls.shape[0], pred_b_cls.shape[0], device='cuda')
    temp = pred_classes * pred_b.pred_classes

    for col in range(len(temp[0])):
        temp[:, col] = (temp[:, col] == pred_a_cls)

    
    matching_class = pairwise_iou(boxes_a, boxes_b)*temp

    max_vals, best_match = torch.max(matching_class, axis=1)
    best_match[max_vals==0] = -1 # no match

    return best_match

In [0]:
best_match = pairwise_matcher(output_dict[0]["instances"], output_dict[1]["instances"])

In [14]:
best_match

tensor([ 0,  1,  5, -1,  3, 10, -1,  8, 12, -1, -1,  4,  7, -1, 10,  5, -1, -1],
       device='cuda:0')

In [0]:
# tracking in the whole frames
tracking_list = []
for idx in range(len(output_dict)-1):
    pairwise_track = pairwise_matcher(output_dict[idx]["instances"], output_dict[1+idx]["instances"])
    tracking_list.append(pairwise_track)

Color-Codes for Tracked Objects

In [0]:
def generate_colors(num_boxes):
  box_colors = []
  r, g, b = 158, 39, 248
  step = 256 / num_boxes
  for i in range(num_boxes):
    r += step
    g += step
    b += step
    r = int(r) % 256
    g = int(g) % 256
    b = int(b) % 256
    box_colors.append((r/255, g/255, b/255)) 
  return box_colors

# Tracking Objects in Videos

In [0]:
present_pred_classes = output_dict[0]["instances"].pred_classes
present_colors = generate_colors(len(present_pred_classes))

class_names = ["track"+str(i) for i in range(len(present_pred_classes))]
global_class_name_counter = len(class_names)
tracked_output = []
start_frame = 0
num_frames = 40
makedir('./tracked_frames/')

for idx in range(start_frame, start_frame+num_frames):
      img = cv2.imread(image_clips[idx])
      v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)

      for cidx, bbox in enumerate(output_dict[idx]["instances"].pred_boxes.tensor):
          text_pos = (bbox[0], bbox[1])
          v.draw_box(bbox, edge_color=present_colors[cidx], alpha=0.9)
          v.draw_text(class_names[cidx], 
                      text_pos, 
                      color=present_colors[cidx], 
                      font_size=25)

      img = v.get_output().get_image()[:, :, ::-1]

      img_name = str(idx)+'.jpg' if len(str(idx)) == 2 else '0'+str(idx)+'.jpg'
      final_pth = os.path.join('./tracked_frames/', img_name)
      cv2.imwrite(final_pth, img)
      tracked_output.append(img)

      if idx < (num_frames - 1):
          present_pred_classes = output_dict[idx+1]["instances"].pred_classes
          present_colors = generate_colors(len(present_pred_classes))
          new_class_names = [""] * (len(present_pred_classes))
          for idx, track_idx in enumerate(tracking_list[idx]):
              if track_idx != -1:
                  new_class_names[track_idx] = class_names[idx]
          for i, name in enumerate(new_class_names):
              if name=="":
                  new_class_names[i]="track"+str(global_class_name_counter)
                  global_class_name_counter+=1
          class_names=new_class_names

In [18]:
tracked_images = glob("./tracked_frames/*.jpg", recursive=True)
tracked_images = sorted(tracked_images)
tracked_images[0:5]

['./tracked_frames/00.jpg',
 './tracked_frames/01.jpg',
 './tracked_frames/02.jpg',
 './tracked_frames/03.jpg',
 './tracked_frames/04.jpg']

In [19]:
!zip -r tracked_frames.zip tracked_frames 2>/dev/null

  adding: tracked_frames/ (stored 0%)
  adding: tracked_frames/26.jpg (deflated 6%)
  adding: tracked_frames/14.jpg (deflated 7%)
  adding: tracked_frames/17.jpg (deflated 6%)
  adding: tracked_frames/23.jpg (deflated 7%)
  adding: tracked_frames/22.jpg (deflated 7%)
  adding: tracked_frames/31.jpg (deflated 7%)
  adding: tracked_frames/03.jpg (deflated 6%)
  adding: tracked_frames/25.jpg (deflated 7%)
  adding: tracked_frames/16.jpg (deflated 6%)
  adding: tracked_frames/39.jpg (deflated 7%)
  adding: tracked_frames/24.jpg (deflated 8%)
  adding: tracked_frames/34.jpg (deflated 7%)
  adding: tracked_frames/36.jpg (deflated 7%)
  adding: tracked_frames/02.jpg (deflated 6%)
  adding: tracked_frames/09.jpg (deflated 7%)
  adding: tracked_frames/11.jpg (deflated 7%)
  adding: tracked_frames/13.jpg (deflated 7%)
  adding: tracked_frames/10.jpg (deflated 6%)
  adding: tracked_frames/12.jpg (deflated 7%)
  adding: tracked_frames/04.jpg (deflated 6%)
  adding: tracked_frames/29.jpg (deflated 

In [0]:
def make_video(images_list, file_name='tracker', fps=5):
    file_name = file_name + '.avi'
    size = (len(images_list[0][0]), len(images_list[0]))
    vid = cv2.VideoWriter(file_name,
                          cv2.VideoWriter_fourcc(*'DIVX'), 
                          fps, 
                          size)
    for img_path  in tracked_images:
        frame = cv2.imread(img_path)
        vid.write(frame)
    return vid.release()

In [0]:
make_video(images_list=tracked_output, file_name='tracked_vid', fps=1)

In [0]:
!ffmpeg -i tracked_vid.avi tracked_vid.mp4 2>/dev/null

In [0]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('tracked_vid.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

In [24]:
HTML("""
<video controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

Output hidden; open in https://colab.research.google.com to view.

In [0]:
# Download the results
# from google.colab import files
# files.download('tracked_vid.avi') # download video
# files.download('tracked_frames.zip') # download raw frames