# **Part 2) Object Detection in a Video:**

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install imageio-ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting imageio-ffmpeg
  Downloading imageio_ffmpeg-0.4.8-py3-none-manylinux2010_x86_64.whl (26.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.9/26.9 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: imageio-ffmpeg
Successfully installed imageio-ffmpeg-0.4.8


In [2]:
video_dir = '/content/drive/MyDrive/yolo_test.mp4'
classes_dir = '/content/drive/MyDrive/COCO_Class_Codes.txt'

In [4]:
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
from os import system
from time import sleep
from IPython.display import clear_output
import imageio
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML

In [5]:
labels = []
with open(classes_dir,'rt') as f:
    labels = f.read().split(',\n')
print(labels)
print(len(labels))

["'person'", "'bicycle'", "'car'", "'motorcycle'", "'airplane'", "'bus'", "'train'", "'truck'", "'boat'", "'traffic light'", "'fire hydrant'", "'stop sign'", "'parking meter'", "'bench'", "'bird'", "'cat'", "'dog'", "'horse'", "'sheep'", "'cow'", "'elephant'", "'bear'", "'zebra'", "'giraffe'", "'backpack'", "'umbrella'", "'handbag'", "'tie'", "'suitcase'", "'frisbee'", "'skis'", "'snowboard'", "'sports ball'", "'kite'", "'baseball bat'", "'baseball glove'", "'skateboard'", "'surfboard'", "'tennis racket'", "'bottle'", "'wine glass'", "'cup'", "'fork'", "'knife'", "'spoon'", "'bowl'", "'banana'", "'apple'", "'sandwich'", "'orange'", "'broccoli'", "'carrot'", "'hot dog'", "'pizza'", "'donut'", "'cake'", "'chair'", "'couch'", "'potted plant'", "'bed'", "'dining table'", "'toilet'", "'tv'", "'laptop'", "'mouse'", "'remote'", "'keyboard'", "'cell phone'", "'microwave'", "'oven'", "'toaster'", "'sink'", "'refrigerator'", "'book'", "'clock'", "'vase'", "'scissors'", "'teddy bear'", "'hair dri

In [6]:
desired_labels = ["'car'", "'person'", "'bus'"]

In [7]:
Architecture = '/content/drive/MyDrive/yolov3.cfg'
Weights = '/content/drive/MyDrive/yolov3.weights'

In [8]:
model = cv2.dnn.readNetFromDarknet(Architecture, Weights)
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [9]:
def find_objects(conf_Thresh, outputs, img):
    height, width, channel = img.shape
    bounding_box = []
    label_Id = []
    bb_conf = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            label = np.argmax(scores)
            confidence = scores[label]
            if confidence > conf_Thresh:
                w,h = int(detection[2]* width), int(detection[3]*height)
                x,y = int((detection[0]*width)-w/2), int((detection[1]*height)-h/2)
                bounding_box.append([x,y,w,h])
                label_Id.append(label)
                bb_conf.append(float(confidence))
    return bounding_box, label_Id, bb_conf, height, width

In [10]:
def show_detected_object(labels, desired_labels, bounding_box, label_Id, bb_conf, conf_Thresh, NMS_Thresh):

  indices = cv2.dnn.NMSBoxes(bounding_box, bb_conf, conf_Thresh, NMS_Thresh)
  for i in indices:
    box = bounding_box[i]
    x,y,w,h = box[0], box[1], box[2], box[3]
    if labels[label_Id[i]] in desired_labels: 
      cv2.rectangle(img, (x,y),(x+w,y+h),(255,0,255),2)
      cv2.putText(img,f'{labels[label_Id[i]].upper()} {int(bb_conf[i]*100)}%',
                      (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6,(255,0,255),2)
  return img

In [14]:
def display_video(video):
    fig = plt.figure(figsize=(10,10)) 

    mov = []
    for i in range(len(video)): 
        img = plt.imshow(video[i], animated=True)
        plt.axis('off')
        mov.append([img])

    anime = animation.ArtistAnimation(fig, mov, interval=50, repeat_delay=1000)

    plt.close()
    return anime

In [16]:
cap = cv2.VideoCapture(video_dir)
yolo_size = 320
conf_Thresh = 0.5
NMS_Thresh = 0.3
video = []

width = int(cap.get(3))
height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (width, height))

while True:
    success, img = cap.read()
    if success == False:
      break
    blob = cv2.dnn.blobFromImage(img, 1/255,(yolo_size,yolo_size),[0,0,0],crop=False)
    model.setInput(blob)
    layerNames = model.getLayerNames()
    outputNames = [layerNames[i-1] for i in model.getUnconnectedOutLayers()]
    outputs = model.forward(outputNames)
    bounding_box, label_Id, bb_conf, height, width = find_objects(conf_Thresh, outputs, img)
    image = show_detected_object(labels, desired_labels, bounding_box, label_Id, bb_conf, conf_Thresh, NMS_Thresh)
    out.write(image)
    video.append(image)

cap.release()
out.release()
cv2.destroyAllWindows()

In [17]:
HTML(display_video(video).to_html5_video())