<h1>AI Object Detection Using CNN with YOLO and COCO classes</h1>
<h3>If you'd like to use this code for a video or images please scroll down to the end of the Notebook take a look at the last two cells, follow the instructions</h3>

<h4>Please note that the network's been trained for use with FLIR and well as color spectrum photography.</h4>

<h4>* This version is not intended for commercial use and for demonstration purposes only. </h4>

<h4>* There is a much more refined version available for licensing that i've built, intended for all commercial needs. Please email me at directly at   |  haseebk73@gmail.com  |</h4>


In [6]:
#You're Welcome to reuse this code with proper citations.
#Make sure you have all the dependencies installed.
#Import Packages
import os
import numpy as np
from keras import backend as K
from keras.models import load_model

from matplotlib import pyplot as plt
import cv2
import h5py


"""
https://github.com/gbusr/YAD2K provides the base work for implementation of YOLO v2 in Keras and Tensorflow
yad2k.py converts the YOLO's Darknet weights to Keras readable format
"""
from yad2k.models.keras_yolo import yolo_eval, yolo_head

In [7]:
color = (0, 0, 255)
model_path = 'model_data/yolo.h5'
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/coco_classes.txt'

In [8]:
# Detection Class
class Detector:
    # Class initialization
    def __init__(self, model_path, classes_path, anchors_path):
        # Open Tensorflow session
        self.session = K.get_session()
        
        # Load classes
        with open(classes_path) as f:
            self.class_names = f.readlines()
            self.class_names = [c.strip() for c in self.class_names]
            
        # Load anchors
        with open(anchors_path) as f:
            self.anchors = f.readline()
            self.anchors = [float(x) for x in self.anchors.split(',')]
            self.anchors = np.array(self.anchors).reshape(-1, 2)
        
        # Load model
        self.model = load_model(model_path)
        
        # Check if model is fully convolutional, assuming channel last order
        self.model_image_size = self.model.layers[0].input_shape[1:3]
        
        # Generate output tensor targets
        self.input_image_shape = K.placeholder(shape = (2, ))
        self._boxes, self._scores, self._classes = self.generate_tensor_targets(0.2, 0.4)
        
    # Generate output tensor targets for filtered bounding boxes
    def generate_tensor_targets(self, score_threshold = 0.2, iou_threshold = 0.4):
        model_outputs = yolo_head(self.model.output, self.anchors, len(self.class_names))      
        return yolo_eval(model_outputs,
                         self.input_image_shape,
                         score_threshold = score_threshold,
                         iou_threshold = iou_threshold)        
            
    # Resize image
    def resize_image(self, image):
        # Resize image
        if self.model_image_size != (None, None):
            # Fit to the fixed input shape
            new_image_size = tuple(reversed(self.model_image_size))
        else:
            # width and height as multiples of 32.
            new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32))
            
        return cv2.resize(image, new_image_size)
    
    # Preprocess image
    def process_image(self, image):
        # Resize image
        resized_image = self.resize_image(image)
        # Convert to image to array
        image_data = resized_image.astype(np.float32)
        # Normalize image data
        image_data /= 255.
        # Add batch dimension
        return np.expand_dims(image_data, 0), image.shape
    
    # Filter classes
    def filter_classes(self, boxes, scores, classes, class_filter):
        # If no filter
        if len(class_filter) is 0:
            return boxes, scores, classes
        
        # Filter the classes
        out_boxes = []; out_scores = []; out_classes = []
        for i, c in list(enumerate(classes)):
            if self.class_names[c] in class_filter:
                out_boxes.append(boxes[i])
                out_scores.append(scores[i])
                out_classes.append(classes[i])
                
        return out_boxes, out_scores, out_classes
    
    # Feed image
    def feed(self, image):
        image_data, image_shape = self.process_image(image)
        # Feed image data into tensorflow
        feed = {self.model.input: image_data,
                self.input_image_shape: image_shape[0:2],
                K.learning_phase(): 0}
        # Run tensor
        return self.session.run([self._boxes, self._scores, self._classes],
                                                  feed_dict = feed)        
            
    # Feed image for detection
    def feed_image(self, filename, class_filter):
        # Process image for feed
        image = cv2.imread(filename)
        boxes, scores, classes = self.feed(image)
        return self.filter_classes(boxes, scores, classes, class_filter)
            
    # Annotate the classes
    def annotate_image(self, image, boxes, scores, classes):
        for i, box in list(enumerate(boxes)):
            class_name = self.class_names[classes[i]]
            # Select annotation color
            if class_name == 'car':
                color = (0, 0, 255)
            elif class_name == 'person':
                color = (0,255, 0)
            else:
                color = (0, 255, 255)
            
            top, left, bottom, right = box
            cv2.rectangle(image, (left, top), (right, bottom), color, 2)
            font = cv2.FONT_HERSHEY_COMPLEX_SMALL
            annotation = class_name + ': ' + str(scores[i])
            cv2.putText(image, annotation, (left, int(top) - 5), font, 1, color, 1)
        return image
    
    # Display annotated image
    def display_image(self, image):
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        fig_size = (image.shape[1] * 16/1200)
        fig_size = min(fig_size, 16)
        fig_size = max(fig_size, 8)
        plt.figure(figsize = (fig_size, fig_size))
        plt.imshow(img_rgb)
        plt.show()  
        
    # Detect and Annotate
    def run_cars(self, filename):
        # Feed image
        boxes, scores, classes = self.feed_image(filename, class_filter = ['car'])
        # Annotate image
        image = cv2.imread(filename)
        image = self.annotate_image(image, boxes, scores, classes)  
        self.display_image(image)

In [9]:
# Create detector object
detector = Detector(model_path, classes_path, anchors_path)
# detector.model.summary()

In [23]:
# Feed image
filename = 'images/flir9.jpg'
image = cv2.imread(filename)
boxes, scores, classes = detector.feed_image(filename, class_filter = [])
# Annotate image
image = detector.annotate_image(image, boxes, scores, classes)
# Display image
detector.display_image(image)

error: OpenCV(4.1.0) /Users/travis/build/skvark/opencv-python/opencv/modules/imgproc/src/resize.cpp:3718: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [10]:
# Citation This function is inspired by 
# https://github.com/ajayaraman/CarND-VehicleDetection/blob/master/det_vehicles.py
def feed_video(file, class_filter, playback = False):
    # Capture video, extract the width and height
    video = cv2.VideoCapture(file)
    width = int(video.get(3))
    height = int(video.get(4))

    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    output_filename = os.path.join("output", "out_" + os.path.basename(file))
    output = cv2.VideoWriter(output_filename, fourcc, 30.0, (width, height))
    
    #Count to capture number of frames one is going through
    count = 1
    
    #Length of video in number of frames
    length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    print(length)
    while(count < length-50):
        print('frame ' + str(count))
        count += 1
        ret, frame = video.read()
        frameInp = cv2.COLOR_BGR2RGB
        frame = cv2.cvtColor(frame, frameInp)
        if ret == True:
            boxes, scores, classes = detector.feed(frame)
            boxes, scores, classes = detector.filter_classes(boxes, scores, classes, class_filter)
            
            outframe = detector.annotate_image(frame, boxes, scores, classes)        
            outframe = cv2.cvtColor(outframe, cv2.COLOR_RGB2BGR)
            output.write(outframe)

            if playback:
                cv2.imshow('frame', outframe)

                if cv2.waitKey(0.01) & 0xFF == ord('c'):
                    break
        else:
            break
    
    video.release()
    output.release()
    cv2.destroyAllWindows()

In [11]:
#Place your video file in the images folder and enter the filename here.
#Will return your output file to the output folder.
feed_video('images/flv1.mp4', [])

298
frame 1
frame 2
frame 3
frame 4
frame 5
frame 6
frame 7
frame 8
frame 9
frame 10
frame 11
frame 12
frame 13
frame 14
frame 15
frame 16
frame 17
frame 18
frame 19
frame 20
frame 21
frame 22
frame 23
frame 24
frame 25
frame 26
frame 27
frame 28
frame 29
frame 30
frame 31
frame 32
frame 33
frame 34
frame 35
frame 36
frame 37
frame 38
frame 39
frame 40
frame 41
frame 42
frame 43
frame 44
frame 45
frame 46
frame 47
frame 48
frame 49
frame 50
frame 51
frame 52
frame 53
frame 54
frame 55
frame 56
frame 57
frame 58
frame 59
frame 60
frame 61
frame 62
frame 63
frame 64
frame 65
frame 66
frame 67
frame 68
frame 69
frame 70
frame 71
frame 72
frame 73
frame 74
frame 75
frame 76
frame 77
frame 78
frame 79
frame 80
frame 81
frame 82
frame 83
frame 84
frame 85
frame 86
frame 87
frame 88
frame 89
frame 90
frame 91
frame 92
frame 93
frame 94
frame 95
frame 96
frame 97
frame 98
frame 99
frame 100
frame 101
frame 102
frame 103
frame 104
frame 105
frame 106
frame 107
frame 108
frame 109
frame 110
fram

In [None]:
# sess.close()