# Install Darknet

Run the cell below to setup the environment

In [0]:
!apt-get update
!apt-get upgrade
!apt-get install build-essential
!apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
!apt-get install libavcodec-dev libavformat-dev libswscale-d
!apt-get -y install cmake
!apt-get install libopencv-dev
!git clone https://github.com/AlexeyAB/darknet/
!apt-get install vim
%cd darknet
!ls
!wget https://pjreddie.com/media/files/yolov3.weights
!ls
!sed -i 's/OPENCV=0/OPENCV=1/g' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/g' Makefile
!sed -i 's/GPU=0/GPU=1/g' Makefile
#!vim Makefile
!ls
%cd ../
!ls
!apt install g++-5
!apt install gcc-5

!update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 10
!update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 20
!update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 10
!update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 20
!update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
!update-alternatives --set cc /usr/bin/gcc
!update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
!update-alternatives --set c++ /usr/bin/g++
!apt update -qq;
!wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
!dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
!apt-get update -qq

!apt-get install cuda -y -qq #gcc-5 g++-5 
!apt update
!apt upgrade
!apt install cuda-8.0 -y
%cd darknet
!make
!ls
!./darknet detect cfg/yolov3.cfg yolov3.weights data/person.jpg

# Import Dependencies

In [0]:
import cv2 as cv
import numpy as np
import os.path
import matplotlib.pyplot as plt
from google.colab import files
%matplotlib inline

# Helper Functions

In [0]:
def img_show(path):
    image = cv.imread(path)
    height, width = image.shape[:2]
    resized_image = cv.resize(image,(3*width, 3*height), interpolation = cv.INTER_CUBIC)

    fig = plt.gcf()
    fig.set_size_inches(18, 10)
    plt.axis("off")
    #plt.rcParams['figure.figsize'] = [10, 5]
    plt.imshow(cv.cvtColor(resized_image, cv.COLOR_BGR2RGB))
    plt.show()

In [0]:
def upload():
    uploaded = files.upload() 
    for name, data in uploaded.items():
        with open(name, 'wb') as f:
            f.write(data)
        print ('saved file', name)

def download(path):
    files.download(path)

# Test

In [0]:
img_show('predictions.jpg')

# YOLO - OpenCV Implementation

## Hyper Params and Network Settings

In [0]:
# YOLO Params
conf_threshold = 0.5
nms_threshold = 0.4
input_width = 416
input_height = 416


# Load label names
classFile = 'data/coco.names'
classes = None

with open(classFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# get config and weight files
model_config = 'cfg/yolov3.cfg'
model_weights = 'yolov3.weights'

# create the network using the loaded files
net = cv.dnn.readNetFromDarknet(model_config, model_weights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_OPENCL)

## Get the label names

In [0]:
def get_output_names(net):
    # Grab the names of all the layers in the network
    layer_names = net.getLayerNames()
    # Get the names from the output layer
    return [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

## Drawing the Bounding Boxes

In [0]:
def draw_pred(frame, class_id, conf, left, top, right, bot):
    # Draw the bounding box
    frame = cv.rectangle(frame, (left, top), (right, bot), (255,178,50), 3)
    label = '%.2f' % conf

    # Get the label and confidence score

    if classes:
        assert(class_id < len(classes))
        label = f'{classes[class_id]} : {label}'


    # Display the label at the top of the bounding box
    label_size, base_line = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, label_size[1])
    frame = cv.rectangle(frame, (left, top - round(1.5 * label_size[1])),
                        (left + round(1.5 * label_size[0]), top + base_line),
                        (255,255,255), cv.FILLED)
    frame = cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
    return frame

## Removing Bounding Boxes with Low Confidence

**PARAMS**
frame(image matrix) = The pixel data of the current frame being analyzed
outputs(ND-Array) = All the predicted bounding boxes

**OUTPUT**

The current frame with the detected bounding boxes drawn on top.

**TODO**

1.) For each Bounding Box get the highest scoring class label.

2.) Get the confidence score for the winning label.

3.) Check that the confidence score is higher than the set threshold.

4.) Get the location of the predicted box.

5.) Use Non-Maximum Suppression to get rid of redundant overlapping boxes.

6.) Call draw_pred to draw the bounding boxes.

7.) return the frame with all the bounding boxes added.

In [0]:
def postprocess(frame, outputs):
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]

    '''
    Scan through all the bounding boxes output from the network and 
    keep the ones with a high confidence score. Assign the box's class label as
    the class with the highest score
    '''

    class_ids = []
    confidences = []
    boxes = []

    for out in outputs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            # Checking whether the score meets our confidence threshold
            if confidence > conf_threshold:
                center_x = int(detection[0] * frame_width)
                center_y = int(detection[1] * frame_height)
                width = int(detection[2] * frame_width)
                height = int(detection[3] * frame_height)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform Non-Maximum Suppression to eliminate redundant overlapping boxes
    indices = cv.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        draw_pred(class_ids[i], confidences[i], left, top, left + width, top + height)
        
    return frame

## Preprocess Input

**PARAMS**

input_file(str) = name of the file to be processed. Location should be part of the string if file is not in the root directory.

        ex: 'dog.jpg' or 'data/cat.jpg'
        
file_type(str) = The type of the input file. It can be either 'image' or 'video'

**OUTPUT**

Return True if the successful, False otherwise

**TODO:**

1.) Check whether file_type is 'image' or 'video' and set the output to the suitable format.

2.) Get the current frame pixel data.

3.) Create a blob with the correct dimensions to feed into the network

4.) Set the input to the network using the blob

5.) Run the a forward pass through the network to get the predicted boxes

6.) Run all the predicted boxes through postprocess to filter out the weak predictions

In [0]:
def yolo(input_file, file_type='video'):
    output_file = ''
    
    if not os.path.isfile(input_file):
        print('Input file ', input_file, ' wasn\'t found')
        return False

    # capture the raw input
    cap = cv.VideoCapture(input_file)
    
    # set the correct file type for the output
    if file_type == 'image':
        output_file = input_file[:-4] + '_yolo_output.jpg'

    elif file_type == 'video':
        output_file = input_file[:-4] + '_yolo_output.avi'
        vid_writer = cv.VideoWriter(output_file,
                                    cv.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                                    24, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),
                                         round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

    else:
        print('Invalid file type')
        return False
        

    while True:
        has_frame, frame = cap.read()

        if not has_frame:
            print('Done processing:')
            print('Output file is stored as: ', output_file)
            cap.release()
            break

        # preprocess the input so that it can be fed to the network
        blob = cv.dnn.blobFromImage(frame, 1/255, (input_width, input_height),
                                    [0, 0, 0], 1, crop=False)
        net.setInput(blob)
        # Do a forward pass through the network to get all the predictions
        outs = net.forward(get_output_names(net))
        # filter out weak and redundant predictions and draw the winning bounding boxes
        frame = postprocess(frame, outs)


        # get efficiency information
        t, _ = net.getPerfProfile()
        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
        cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

        # Write the frame with the detection boxes
        if file_type == 'image':
            cv.imwrite(output_file, frame.astype(np.uint8))
        else:
            vid_writer.write(frame.astype(np.uint8))

    return True

# Image Test

In [0]:
%cd data
!ls

In [0]:
%cd ..
!ls

In [0]:
yolo('data/eagle.jpg', 'image')

In [0]:
imShow('data/eagle_yolo_output.jpg')

Use the cell below to process videos with YOLO on darknet. Use the upload() helper function to upload the video you'd like to run through the network and then use download() to get the output back from colab.

In [0]:
!./darknet detector demo cfg/coco.data cfg/yolov3.cfg yolov3.weights -dont_show video.mp4 -i 0 -out_filename yolo_out.avi