
# <center> YOLO

## 1. Importing Libraries

In [1]:
import numpy as np
import cv2 as cv

## 2. Class YOLO 

In [2]:
class YOLO:
    def __init__(self, model, conf_threshold, iou_threshold, classes, colors):
        self.model = model
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.classes = classes
        self.colors = colors
        
        if cv.cuda.getCudaEnabledDeviceCount():
            self.model.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
            self.model.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
    
    def get_output_layers(self):
        layer_names = self.model.getLayerNames()
        output_layers = [layer_names[i[0] - 1] for i in self.model.getUnconnectedOutLayers()]
        return output_layers

    def extract(self, outputs, height, width):
        boxes = []
        confidences = []
        class_ids = []

        for output in outputs:
            for detection in output:            
                probs = detection[5:]
                class_id = np.argmax(probs)
                confidence = probs[class_id]
                
                if confidence > self.conf_threshold:
                    box = detection[:4] * np.array([width, height, width, height])
                    center_x, center_y, w, h = box.astype('int')

                    x = int(center_x - (w / 2))
                    y = int(center_y - (h / 2))
                    
                    boxes.append([x, y, int(w), int(h)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        return boxes, confidences, class_ids

    def detect_in_image(self, image):
        blob = cv.dnn.blobFromImage(image, 1 / 255, (416, 416), swapRB=True, crop=False)
        self.model.setInput(blob)
        outputs = self.model.forward(self.get_output_layers())
        
        height, width = image.shape[:2]
        boxes, confidences, class_ids = self.extract(outputs, height, width)
        indices = cv.dnn.NMSBoxes(boxes, confidences, self.conf_threshold, self.iou_threshold)
        
        if len(indices) > 0:
            indices = indices.flatten()
            boxes = np.array(boxes)[indices]
            confidences = np.array(confidences)[indices]
            class_ids = np.array(class_ids)[indices]
        else:
            print("Object isn't detected")
            return

        return np.array([(*boxes[idx], confidences[idx], class_ids[idx]) for idx in range(len(class_ids))], dtype='object')

    def detect_in_video(self, cap, output=None, show=False, title='Title'):
        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                break

            preds = self.detect_in_image(image)
            if np.all(preds != None):
                image = self.bounding_box(image, preds)
        
            if output:
                output.write(image)
            if show:
                cv.imshow(title, image)
                if cv.waitKey(1) & 0xFF == ord('q'):
                    break
            
        cap.release()
        if output:
            output.release()
        cv.destroyAllWindows()
    
    def bounding_box(self, image, preds):
        for pred in preds:
            x, y, w, h, prob, class_id = pred
            color = [int(c) for c in self.colors[class_id]]  
            
            cv.rectangle(image, (x, y), (x + w, y + h), color, 4)
            text = f'{self.classes[class_id]}: {prob:.3f}'
            cv.putText(image, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 2, color, 4)
    
        return image
    
    @staticmethod
    def show_video(path, title='Title'):
        cap = cv.VideoCapture(path)
        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                break

            cv.imshow(title, image)
            if cv.waitKey(1) & 0xFF == ord('q'):
                break
            
        cap.release()
        cv.destroyAllWindows()

    @staticmethod
    def save_video(cap, path):
        width = int(cap.get(3))
        height = int(cap.get(4))
        fps = int(cap.get(cv.CAP_PROP_FPS))
        output = cv.VideoWriter(path, cv.VideoWriter_fourcc(*'MJPG'), fps, (width, height))

        return output
    
    @staticmethod
    def show_image(image, title='Title'):
        cv.imshow(title, image)
        cv.waitKey(0)
        cv.destroyAllWindows()

    @staticmethod
    def save_image(path, image):
        cv.imwrite(path, image)

In [3]:
cfg_path = 'model/yolov3.cfg'
weights_path = 'model/yolov3.weights'
names_path = 'model/coco.names'

with open(names_path, 'r') as f:
    classes = f.read().split('\n')
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

model = cv.dnn.readNetFromDarknet(cfg_path, weights_path)

## 3. Object Detection in Image

In [4]:
CONFIDENCE_THRESHOLD = 0.7
IOU_THRESHOLD = 0.4

yolo = YOLO(model, CONFIDENCE_THRESHOLD, IOU_THRESHOLD, classes, colors)

In [5]:
input_images_path = 'inputs/images/'
output_images_path = 'outputs/images/'
image_name = 'tesla.jpg'

In [6]:
image = cv.imread(f'{input_images_path}{image_name}')
preds = yolo.detect_in_image(image)
image = yolo.bounding_box(image, preds)
yolo.save_image(f'{output_images_path}{image_name}', image)

In [7]:
yolo.show_image(image)

## 4. Object Detection in Video

In [8]:
CONFIDENCE_THRESHOLD = 0.7
IOU_THRESHOLD = 0.4

yolo = YOLO(model, CONFIDENCE_THRESHOLD, IOU_THRESHOLD, classes, colors)

In [9]:
input_videos_path = 'inputs/videos/'
output_videos_path = 'outputs/videos/'
video_name = 'gopro1.mp4'

In [10]:
cap = cv.VideoCapture(f'{input_videos_path}{video_name}')
output = yolo.save_video(cap, f"{output_videos_path}{video_name.rsplit('.', 1)[0] + '.avi'}")
yolo.detect_in_video(cap, output)

In [11]:
yolo.show_video(f"{input_videos_path}{video_name.rsplit('.', 1)[0] + '.avi'}")

## 5. Object Detection in Real-Time

In [12]:
CONFIDENCE_THRESHOLD = 0.7
IOU_THRESHOLD = 0.4

yolo = YOLO(model, CONFIDENCE_THRESHOLD, IOU_THRESHOLD, classes, colors)

In [13]:
output_videos_path = 'outputs/videos/'
video_name = 'webcam.avi'

In [14]:
cap = cv.VideoCapture(0)
output = yolo.save_video(cap, f'{output_videos_path}{video_name}')
yolo.detect_in_video(cap, output, show=True)

In [15]:
yolo.show_video(f'{output_videos_path}{video_name}')