## DarkFlow - Using YOLO in Python

### Loading our YOLO model

In [6]:
from darkflow.net.build import TFNet
import cv2
import tensorflow as tf

# Config TF, set True if using GPU
config = tf.ConfigProto(log_device_placement = False)
config.gpu_options.allow_growth = False 

with tf.Session(config=config) as sess:
    options = {
            'model': './cfg/yolo.cfg',
            'load': './yolov2.weights',
            'threshold': 0.6,
            #'gpu': 1.0 # uncomment these if using GPU
               }
    tfnet = TFNet(options)    

Parsing ./cfg/yolov2.cfg
Parsing ./cfg/yolo.cfg
Loading ./yolov2.weights ...
Successfully identified 203934260 bytes
Finished in 0.04818320274353027s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  

### Load our image and convert from BGR to RBG

In [2]:
img = cv2.imread('./sample_img/sample_horses.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = tfnet.return_predict(img)
print(results)

[{'label': 'horse', 'confidence': 0.49875364, 'topleft': {'x': 4, 'y': 175}, 'bottomright': {'x': 149, 'y': 340}}, {'label': 'horse', 'confidence': 0.35554495, 'topleft': {'x': 321, 'y': 201}, 'bottomright': {'x': 451, 'y': 317}}, {'label': 'horse', 'confidence': 0.7830673, 'topleft': {'x': 245, 'y': 197}, 'bottomright': {'x': 433, 'y': 370}}, {'label': 'horse', 'confidence': 0.8938936, 'topleft': {'x': 433, 'y': 210}, 'bottomright': {'x': 599, 'y': 350}}, {'label': 'horse', 'confidence': 0.8544165, 'topleft': {'x': 0, 'y': 192}, 'bottomright': {'x': 310, 'y': 411}}]


### Display our results using OpenCV

In [3]:
img = cv2.imread('./sample_img/sample_horses.jpg')
for (i, result) in enumerate(results):
    x = result['topleft']['x']
    w = result['bottomright']['x']-result['topleft']['x']
    y = result['topleft']['y']
    h = result['bottomright']['y']-result['topleft']['y']
    cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
    label_position = (x + int(w/2)), abs(y - 10)
    cv2.putText(img, result['label'], label_position , cv2.FONT_HERSHEY_SIMPLEX,0.5, (255,255,255), 2)

cv2.imshow("Objet Detection YOLO", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Let's just encapsulate that OpenCV display method into a function

In [3]:
def displayResults(results, img):
    for (i, result) in enumerate(results):
        x = result['topleft']['x']
        w = result['bottomright']['x']-result['topleft']['x']
        y = result['topleft']['y']
        h = result['bottomright']['y']-result['topleft']['y']
        cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
        label_position = (x + int(w/2)), abs(y - 10)
        cv2.putText(img, result['label'], label_position , cv2.FONT_HERSHEY_SIMPLEX,0.5, (255,255,255), 2)
    return img

### Running YOLO on a webcam

In [4]:
import cv2
from darkflow.net.build import TFNet
import numpy as np
import time
import tensorflow as tf

config = tf.ConfigProto(log_device_placement = False)
config.gpu_options.allow_growth = False 

with tf.Session(config=config) as sess:
    options = {
            'model': './cfg/yolo.cfg',
            'load': './yolov2.weights',
            'threshold': 0.5,         
    }
    tfnet = TFNet(options)

#colors = [tuple(255 * np.random.rand(3)) for _ in range(10)]

capture = cv2.VideoCapture(0)

while True:
    ret, frame = capture.read()
    
    if ret:
        results = tfnet.return_predict(frame)
        image = displayResults(results, frame)
        cv2.imshow('YOLOV2 - Object Detection', image)
        if cv2.waitKey(1) == 13: #13 is the Enter Key
            break

capture.release()
cv2.destroyAllWindows()


Parsing ./cfg/yolov2.cfg
Parsing ./cfg/yolo.cfg
Loading ./yolov2.weights ...
Successfully identified 203934260 bytes
Finished in 0.015048503875732422s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  | 

### Running YOLO on a video

In [7]:
import cv2

# Using OpenCV to initialize the webcam
cap = cv2.VideoCapture('all2.mp4')
frame_number = 0
while True:
    ret, frame = cap.read()
    frame_number += 1
    if ret:
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = tfnet.return_predict(img)
        
        for (i, result) in enumerate(results):
            x = result['topleft']['x']
            w = result['bottomright']['x']-result['topleft']['x']
            y = result['topleft']['y']
            h = result['bottomright']['y']-result['topleft']['y']
            cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,0),2)
            label_position = (x + int(w/2)), abs(y - 10)
            cv2.putText(frame, result['label'], label_position , cv2.FONT_HERSHEY_SIMPLEX,0.5, (255,255,255), 2)

        cv2.imshow("Objet Detection YOLO", frame)
        if frame_number == 240:
            break
        if cv2.waitKey(1) == 13: #13 is the Enter Key
            break

cap.release()
cv2.destroyAllWindows()

## Install Instructions

- git clone https://github.com/pjreddie/darknet
- cd darknet
- make
- wget https://pjreddie.com/media/files/yolov2.weights
- ./darknet detect cfg/yolov2.cfg yolov2.weights data/dog.jpg

- cd
- pip install Cython
- git clone https://github.com/thtrieu/darkflow.git
- cd darkflow
- pip install .
- cd bin
- wget https://pjreddie.com/media/files/yolov2.weights
- cd 
- cd darkflow/cfg


In [2]:
capture.release()
cv2.destroyAllWindows()

### Loading our Trained Custom Dataset Model

In [1]:
from darkflow.net.build import TFNet
import cv2
import tensorflow as tf

# Config TF, set True if using GPU
config = tf.ConfigProto(log_device_placement = False)
config.gpu_options.allow_growth = False 

with tf.Session(config=config) as sess:
    options = {
            'model': './cfg/yolo_1_class.cfg',
            'load': 400, #This is # of steps/epochs used in training, it tells it load the last saved model
            'threshold': 0.45, 
            #'gpu': 1.0 # uncomment these if using GPU
               }
    tfnet = TFNet(options)    

Parsing ./cfg/yolo_1_class.cfg
Loading None ...
Finished in 0.00011992454528808594s

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 288, 288, 3)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 288, 288, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 144, 144, 32)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 144, 144, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 72, 72, 64)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 72, 72, 128)
 Init  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 72, 72, 64)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 72, 72, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 36, 36, 128)
 Init  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 36, 36, 256)
 Init  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 36, 36,

### Let's test our new Object Detector!

In [5]:
# We're going to cycle through 10 images 

for i in range(1,12):
    file_name = './sample_img/sample_london_' + str(i) + '.jpg'
    img = cv2.imread(file_name)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = tfnet.return_predict(img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    for (i, result) in enumerate(results):
        x = result['topleft']['x']
        w = result['bottomright']['x']-result['topleft']['x']
        y = result['topleft']['y']
        h = result['bottomright']['y']-result['topleft']['y']
        cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
        label_position = (x + int(w/2)), abs(y - 10)
        cv2.putText(img, result['label'], label_position , cv2.FONT_HERSHEY_SIMPLEX,0.5, (0,255,0), 2)

    cv2.imshow("Objet Detection YOLO", img)
    cv2.waitKey(0)
cv2.destroyAllWindows()

### Let's test this on a video

In [10]:
import cv2

# Using OpenCV to initialize the webcam
cap = cv2.VideoCapture('tube.mp4')
frame_number = 0
while True:
    ret, frame = cap.read()
    frame_number += 1
    if ret:
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = tfnet.return_predict(img)
        
        for (i, result) in enumerate(results):
            x = result['topleft']['x']
            w = result['bottomright']['x']-result['topleft']['x']
            y = result['topleft']['y']
            h = result['bottomright']['y']-result['topleft']['y']
            cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,0),2)
            label_position = (x + int(w/2)), abs(y - 10)
            cv2.putText(frame, result['label'], label_position , cv2.FONT_HERSHEY_SIMPLEX,1, (255,0,0), 3)

        cv2.imshow("Objet Detection YOLO", frame)
        if frame_number == 240:
            break
        if cv2.waitKey(1) == 13: #13 is the Enter Key
            break

cap.release()
cv2.destroyAllWindows()