In [2]:
!gdown https://pjreddie.com/media/files/yolov3.weights

Downloading...
From: https://pjreddie.com/media/files/yolov3.weights
To: /content/yolov3.weights
100% 248M/248M [00:04<00:00, 60.7MB/s]


In [3]:
!gdown https://opencv-tutorial.readthedocs.io/en/latest/_downloads/10e685aad953495a95c17bfecd1649e5/yolov3.cfg

Downloading...
From: https://opencv-tutorial.readthedocs.io/en/latest/_downloads/10e685aad953495a95c17bfecd1649e5/yolov3.cfg
To: /content/yolov3.cfg
  0% 0.00/8.34k [00:00<?, ?B/s]100% 8.34k/8.34k [00:00<00:00, 5.28MB/s]


In [4]:
!gdown https://opencv-tutorial.readthedocs.io/en/latest/_downloads/a9fb13cbea0745f3d11da9017d1b8467/coco.names

Downloading...
From: https://opencv-tutorial.readthedocs.io/en/latest/_downloads/a9fb13cbea0745f3d11da9017d1b8467/coco.names
To: /content/coco.names
  0% 0.00/621 [00:00<?, ?B/s]100% 621/621 [00:00<00:00, 484kB/s]


In [7]:
!gdown --id 18RDqTp5jQfkcaDmR9gND-vuLNTe9B0BB

Downloading...
From: https://drive.google.com/uc?id=18RDqTp5jQfkcaDmR9gND-vuLNTe9B0BB
To: /content/in.avi
0.00B [00:00, ?B/s]4.72MB [00:00, 27.8MB/s]8.13MB [00:00, 36.7MB/s]


In [21]:
# YOLO object detection
import cv2 as cv
import numpy as np
from google.colab.patches import cv2_imshow

# Load names of classes and get random colors
classes = open('/content/coco.names').read().strip().split('\n')
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

# Give the configuration and weight files for the model and load the network.
net = cv.dnn.readNetFromDarknet('/content/yolov3.cfg', '/content/yolov3.weights')
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
#net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

# determine the output layer
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

def post_process(img, outputs, conf = 0.5):
    H, W = img.shape[:2]

    boxes = []
    confidences = []
    classIDs = []

    for output in outputs:
        scores = output[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]
        if confidence > conf:
            x, y, w, h = output[:4] * np.array([W, H, W, H])
            p0 = int(x - w//2), int(y - h//2)
            p1 = int(x + w//2), int(y + h//2)
            boxes.append([*p0, int(w), int(h)])
            confidences.append(float(confidence))
            classIDs.append(classID)
    count = 0
    indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)
    if len(indices) > 0:
        for i in indices.flatten():
            if classes[classIDs[i]] == 'person':
              count += 1
              (x, y) = (boxes[i][0], boxes[i][1])
              (w, h) = (boxes[i][2], boxes[i][3])
              color = [int(c) for c in colors[classIDs[i]]]
              cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
              text = "{}: {:.4f}".format(classes[classIDs[i]], confidences[i])
              cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        cv.putText(img, f'Count: {count}', (50, 50), cv.FONT_HERSHEY_SIMPLEX, 1,(0,0,200), 3)
    return img

def detect(img):

    blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)

    net.setInput(blob)
    outputs = net.forward(ln)

    # combine the 3 output groups into 1 (10647, 85)
    # large objects (507, 85)
    # medium objects (2028, 85)
    # small objects (8112, 85)
    outputs = np.vstack(outputs)

    image = post_process(img, outputs, 0.5)
    return image

def detect_vid(in_path, out_path):
    video_reader = cv.VideoCapture(in_path)
    
    frame_width = int(video_reader.get(3))
    frame_height = int(video_reader.get(4))
      
    size = (frame_width, frame_height)
    
    video_writer = cv.VideoWriter(out_path, 
                          cv.VideoWriter_fourcc(*'MJPG'),
                          15, size)
    
    while True:

        ret, frame = video_reader.read()
        if not ret:
          break
        
        im = detect(frame)
        video_writer.write(frame)

    video_reader.release()
    video_writer.release()

In [22]:
detect_vid('/content/in.avi','/content/out.avi')