#Object detection in a Video

We have divided the video into certain frames based on the video's framerate.
Predict on those particular frames and store them.
Make a video out of those frames.

In [1]:
!pip install pyyaml==5.1
!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l[K     |█▏                              | 10 kB 22.7 MB/s eta 0:00:01[K     |██▍                             | 20 kB 25.1 MB/s eta 0:00:01[K     |███▋                            | 30 kB 12.8 MB/s eta 0:00:01[K     |████▉                           | 40 kB 9.8 MB/s eta 0:00:01[K     |██████                          | 51 kB 5.4 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 6.0 MB/s eta 0:00:01[K     |████████▍                       | 71 kB 5.7 MB/s eta 0:00:01[K     |█████████▋                      | 81 kB 6.4 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 4.9 MB/s eta 0:00:01[K     |████████████                    | 102 kB 5.2 MB/s eta 0:00:01[K     |█████████████▏                  | 112 kB 5.2 MB/s eta 0:00:01[K     |██████████████▍                 | 122 kB 5.2 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 5.2 MB/s eta 0:00:01[K     |███

In [2]:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
Collecting detectron2
  Downloading https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/detectron2-0.6%2Bcu101-cp37-cp37m-linux_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 810 kB/s 
Collecting fvcore<0.1.6,>=0.1.5
  Downloading fvcore-0.1.5.post20211023.tar.gz (49 kB)
[K     |████████████████████████████████| 49 kB 2.9 MB/s 
[?25hCollecting iopath<0.1.10,>=0.1.7
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting black==21.4b2
  Downloading black-21.4b2-py3-none-any.whl (130 kB)
[K     |████████████████████████████████| 130 kB 15.8 MB/s 
Collecting omegaconf>=2.1
  Downloading omegaconf-2.1.1-py3-none-any.whl (74 kB)
[K     |████████████████████████████████| 74 kB 3.2 MB/s 
Collecting yacs>=0.1.8
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting hydra-core>=1.1
  Downloading hydra_core-1.1.1-py3-none-any.whl (145 kB)
[K     |█████

In [1]:
import torch
assert torch.__version__.startswith("1.8") 
import torchvision
import cv2
import os
import numpy as np
import json
import random
import matplotlib.pyplot as plt
%matplotlib inline

from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog

In [2]:
# list of classes considered
classes = ['car', 'autorickshaw', 'motorbike', 'building', 'bridge', 'truck', 'person', 'bus', 'traffic light', 'traffic sign']
classes.sort()
print(classes)

['autorickshaw', 'bridge', 'building', 'bus', 'car', 'motorbike', 'person', 'traffic light', 'traffic sign', 'truck']


In [3]:
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer

# config file for prediction
cfg = get_cfg()
# merge both the previous config file from training and the current one

cfg.merge_from_file("/content/drive/MyDrive/Coders++/new_model/output.yaml")

# add the model weights created from training
cfg.MODEL.WEIGHTS = os.path.join("/content/drive/MyDrive/Coders++/new_model/model_final.pth")

# threshold for detecting objects
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
# creation of predictor object based on new config file with retrained neural network and configurations
predictor = DefaultPredictor(cfg)

In [9]:
# Get a frame from a video at a particular second
def getFrame(video, sec): 
    video.set(cv2.CAP_PROP_POS_MSEC, sec*1000) 
    has_frames,image = video.read() 
    if has_frames: 
      return has_frames, image 

In [33]:
import cv2
from google.colab.patches import cv2_imshow

# source video
video = cv2.VideoCapture("/content/drive/MyDrive/Coders++/videos/mumbai_traffic.mp4")

seconds = 0 #current second
frame_rate = 0.1 # FPS

fps = video.get(cv2.CAP_PROP_FPS)
print("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps))

MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).set(thing_classes = classes)

has_frames, image = getFrame(video, seconds)
size = (image.shape[1], image.shape[0])

# print(image.shape)

img_array = []

# extract the frames, predict and add them into an array
while has_frames:
  outputs = predictor(image)
  v = Visualizer(image[:, :, ::-1],
                 metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                 scale = 1,
                 instance_mode = ColorMode.SEGMENTATION)
  
  v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
  # cv2_imshow(v.get_image()[:, :, ::-1])
  img_array.append(v.get_image()[:, :, ::-1])
  
  seconds += frame_rate
  try:
    has_frames, image = getFrame(video, seconds)
  except:
    break

# make a video out of those frames
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
# destination video
out = cv2.VideoWriter("/content/mumbai_traffic.mp4", fourcc, 10, size)
 
for i in range(len(img_array)):
  out.write(img_array[i])
out.release()

Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): 29.97002997002997
