In [1]:
#https://comma.ai
#https://github.com/commaai/openpilot
#https://en.wikipedia.org/wiki/Adaptive_cruise_control

#clone darknet repo
!git clone https://github.com/AlexeyAB/darknet

Cloning into 'darknet'...
remote: Enumerating objects: 15283, done.[K
remote: Counting objects: 100% (97/97), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 15283 (delta 39), reused 51 (delta 26), pack-reused 15186[K
Receiving objects: 100% (15283/15283), 13.78 MiB | 16.84 MiB/s, done.
Resolving deltas: 100% (10350/10350), done.


In [2]:
%cd darknet

/content/darknet


In [3]:
#change makefile to have OPENCV, GPU, CUDNN enabled
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile

In [None]:
#compile Darknet
!make

In [None]:
#get yolov3 pretrained coco dataset weights
!wget https://pjreddie.com/media/files/yolov3.weights

In [9]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import IPython
import time
import math

In [10]:
min_confidence = 0.5

weight_file = 'yolov3.weights'
cfg_file = 'cfg/yolov3.cfg'
name_file = 'data/coco.names'

#https://bdd-data.berkeley.edu/ 에서 dataset download
#http://dl.yf.io/bdd100k/video_parts/bdd100k_videos_test_00.zip의 첫번째 video file
file_name = 'cabc30fc-e7726578.mp4'

In [8]:
from google.colab import files
files.upload()

Saving cabc30fc-e7726578.mp4 to cabc30fc-e7726578.mp4


In [11]:
#load yolo
net = cv2.dnn.readNet(weight_file, cfg_file)

In [12]:
classes = []
with open(name_file, 'r') as f:
  classes = [line.strip() for line in f.readlines()]
print(classes)
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0]-1] for i in net.getUnconnectedOutLayers()]

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [13]:
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0]-1] for i in net.getUnconnectedOutLayers()]

In [14]:
def writeFrame(img):
  #use global variable, writer
  global writer
  height, width  = img.shape[:2]
  if writer is None and output_name is not None:
    fourcc = cv2.VideoWriter_fourcc(*"MJPG")
    writer = cv2.VideoWriter(output_name, fourcc, 24, (width, height), True)
  if writer is not None:
    writer.write(img)

In [15]:
trackerTypes = ['BOOSTING', 'MIL', 'KCF','TLD', 'MEDIANFLOW', 'GOTURN', 'MOSSE', 'CSRT']

def createTrackerByName(trackerType):
  # Create a tracker based on tracker name
  if trackerType == trackerTypes[0]:
    tracker = cv2.TrackerBoosting_create()
  elif trackerType == trackerTypes[1]:
    tracker = cv2.TrackerMIL_create()
  elif trackerType == trackerTypes[2]:
    tracker = cv2.TrackerKCF_create()
  elif trackerType == trackerTypes[3]:
    tracker = cv2.TrackerTLD_create()
  elif trackerType == trackerTypes[4]:
    tracker = cv2.TrackerMedianFlow_create()
  elif trackerType == trackerTypes[5]:
    tracker = cv2.TrackerGOTURN_create()
  elif trackerType == trackerTypes[6]:
    tracker = cv2.TrackerMOSSE_create()
  elif trackerType == trackerTypes[7]:
    tracker = cv2.TrackerCSRT_create()
  else:
    tracker = None
    print('Incorrect tracker name')
    print('Available trackers are:')
    for t in trackerTypes:
      print(t)
  return tracker


In [None]:
frame_count = 0

#initialize the video writer
writer = None
output_name = 'output_video.avi'

file_name = 'cabc30fc-e7726578.mp4'
face_cascade_name = 'haarcascade_frontalface_alt.xml'

detected = False
frame_mode = 'Tracking'
elapsed_time = 0

trackers = cv2.MultiTracker_create()


# csrt
#tracker = cv2.TrackerCSRT_create()
# kcf
#tracker = cv2.TrackerKCF_create()
#boostring
#tracker = cv2.TrackerBoosting_create()
#mil
#tracker = cv2.TrackerMIL_create()
#tld
#tracker = cv2.TrackerTLD_create()
#medianflow
#tracker = cv2.TrackerMedianFlow_create()
#mosse
#tracker = cv2.TrackerMOSSE_create()

tracker = createTrackerByName(trackerTypes[2])

detected_width = 0
margin = 70


vs = cv2.VideoCapture(file_name)
while True:
  start_time = time.time()
  frame_count += 1
  
  ret, frame = vs.read()
  if frame is None:
    print('no more frame')
    break
  
  IPython.display.clear_output(wait=True)
  height, width, channels = frame.shape
  
  class_ids = []
  confidences = []
  boxes = []

  #region of interest
  roi_left = int(0.3*width)
  roi_right = int(0.6*width)

  if detected:
    frame_mode = 'Tracking'
    (success, tracking_boxes) = trackers.update(frame)
    for box in boxes:
      (x, y, w, h) = [int(v) for v in box]
      cv2.rectangle(frame, (x,y), (x+w, y+h), (0, 255,0), 2)

    tracking_box = tracking_boxes[0]
    tx = int(tracking_box[0])
    ty = int(tracking_box[1])
    tw = int(tracking_box[2])
    th = int(tracking_box[3])

    roi = frame[ty-margin:ty+th+margin, tx-margin:tx+tw+margin]
    
    roi_width, roi_height = roi.shape[:2]

    blob = cv2.dnn.blobFromImage(roi, 0.00392, (416,416), (0,0,0), True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)

    for out in outs:
      for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if (confidence > min_confidence) and (class_id == 2):
          #object detected
          center_x = int(detection[0] * roi_width)
          center_y = int(detection[1] * roi_height)
          w = int(detection[2] * roi_height)
          h = int(detection[3] * roi_height)

          #ractangle coordinates
          x = int(center_x - w / 2)
          y = int(center_y - h / 2)
          boxes.append([x,y,w,h])
    if len(boxes):
      boxes.sort(key=lambda x: x[2], reverse=True)
      box = boxes[0]
      x = box[0]
      y = box[1]
      w = box[2]
      h = box[3]
      roi_x = tx-margin + x    
      roi_y = ty-margin + y
      distance_width = w - detected_width

      cv2.rectangle(frame, (roi_x, roi_y), (roi_x+w, roi_y+h), (0, 255,255),1)
      label = 'Initial width: ' + str(detected_width) + ', current width: ' + str(w) + ', distance: ' + str(w-distance_width)
      print(box, label)
      if abs(distance_width) > 5:
        if distance_width < 0:
          cv2.putText(frame, 'speed up', (30,100), cv2.FONT_HERSHEY_COMPLEX, 2, (255,0,0), 5)
        elif distance_width < 30:
          cv2.putText(frame, 'slow down', (30,100), cv2.FONT_HERSHEY_COMPLEX, 2, (0,128,128), 5)
        else:
          cv2.putText(frame, 'be careful', (30,100), cv2.FONT_HERSHEY_COMPLEX, 2, (0,0,255), 5)
      cv2.putText(frame, label, (30, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255,0), 1)
    cv2.rectangle(frame, (tx, ty), (tx+w, ty+h), (255,255,0), 1)
    cv2.rectangle(frame, (tx-margin, ty-margin), (tx+tw+margin, ty+th+margin), (255,0,0), 1)
  else:
    frame_mode = 'Detection'
    
    height, width, channels = frame.shape

    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0,0,0), True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)
    
    
    for out in outs:
      for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]

        #filter only car
        if (confidence > min_confidence) and class_id == 2:
          #object detected
          center_x = int(detection[0]*width)
          center_y = int(detection[1]*height)
          w = int(detection[2]*width)
          h = int(detection[3]*height)

          #rectangle coordinates
          x = int(center_x - w / 2)
          y = int(center_y - h / 2)

          boxes.append([x, y, w, h])
          confidences.append(float(confidence))
          class_ids.append(class_id)

    #Region of interest

    roi_left = int(0.3 * width)
    roi_right = int(0.6 * width)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)
    font = cv2.FONT_HERSHEY_COMPLEX

    for i in range(len(boxes)):
      if i in indexes:
        x, y, w, h = boxes[i]
        
        #eliminate small object( < 50 )
        if ( w > 50 ) and ( x > roi_left and x < roi_right):
          label = str(classes[class_ids[i]])
          print(class_ids[i], label, w)
          color = (0, 255, 0)
          selected = boxes[i]
          detected_width = w
          cv2.rectangle(frame, (x,y), (x+w, y+h), color, 2)
    
    trackers.add(tracker, frame, tuple(selected))
    detected = True

    cv2_imshow(frame)
    frame_time = time.time() - start_time
    elapsed_time += frame_time
    print("[{}] Frame {} time {}".format(frame_mode, frame_count, frame_time))

    writeFrame(frame)
  
print("elapsed time {}".format(elapsed_time))
vs.release()


[50, 83, 85, 64] Initial width: 96, current width: 85, distance: 96
[51, 83, 84, 65] Initial width: 96, current width: 84, distance: 96
[52, 84, 84, 64] Initial width: 96, current width: 84, distance: 96
[49, 82, 85, 64] Initial width: 96, current width: 85, distance: 96
[50, 78, 88, 75] Initial width: 96, current width: 88, distance: 96
[50, 78, 87, 76] Initial width: 96, current width: 87, distance: 96
[49, 78, 88, 75] Initial width: 96, current width: 88, distance: 96
[49, 78, 88, 78] Initial width: 96, current width: 88, distance: 96
[49, 79, 88, 78] Initial width: 96, current width: 88, distance: 96
[48, 77, 88, 77] Initial width: 96, current width: 88, distance: 96
[47, 80, 89, 75] Initial width: 96, current width: 89, distance: 96
[48, 78, 92, 76] Initial width: 96, current width: 92, distance: 96
[48, 78, 90, 78] Initial width: 96, current width: 90, distance: 96
[48, 78, 90, 78] Initial width: 96, current width: 90, distance: 96
[47, 78, 91, 77] Initial width: 96, current widt

In [None]:
from google.colab import files
files.download(output_name)